Merge git://www.linux-watchdog.org/linux-watchdog

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 19 Dec 2014 04:21:32 +0000 (20:21 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 19 Dec 2014 04:21:32 +0000 (20:21 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Dec 2014 04:21:32 +0000 (20:21 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Dec 2014 04:21:32 +0000 (20:21 -0800)
diff --git a/.mailmap b/.mailmap

index 1ad68731fb47a9658733520c1982038cf6aaaa00..ada8ad696b2e902489c6e8a8f713f1285bf5a9c5 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -17,7 +17,7 @@ Aleksey Gorelov <aleksey_gorelov@phoenix.com>
  Al Viro <viro@ftp.linux.org.uk>
  Al Viro <viro@zenIV.linux.org.uk>
  Andreas Herrmann <aherrman@de.ibm.com>
-Andrew Morton <akpm@osdl.org>
+Andrew Morton <akpm@linux-foundation.org>
  Andrew Vasquez <andrew.vasquez@qlogic.com>
  Andy Adamson <andros@citi.umich.edu>
  Archit Taneja <archit@ti.com>
@@ -102,6 +102,8 @@ Rudolf Marek <R.Marek@sh.cvut.cz>
  Rui Saraiva <rmps@joel.ist.utl.pt>
  Sachin P Sant <ssant@in.ibm.com>
  Sam Ravnborg <sam@mars.ravnborg.org>
+Santosh Shilimkar <ssantosh@kernel.org>
+Santosh Shilimkar <santosh.shilimkar@oracle.org>
  Sascha Hauer <s.hauer@pengutronix.de>
  S.Çağlar Onur <caglar@pardus.org.tr>
  Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml

index 0a2debfa68f616fa0aa15afc53687edfe1484263..350dfb3d71ea736aeeb379dcb69256b561c8a734 100644 (file)
--- a/Documentation/DocBook/media/v4l/compat.xml
+++ b/Documentation/DocBook/media/v4l/compat.xml
@@ -2579,6 +2579,18 @@ fields changed from _s32 to _u32.
        </orderedlist>
      </section>
  
+    <section>
+      <title>V4L2 in Linux 3.19</title>
+      <orderedlist>
+       <listitem>
+         <para>Rewrote Colorspace chapter, added new &v4l2-ycbcr-encoding;
+and &v4l2-quantization; fields to &v4l2-pix-format;, &v4l2-pix-format-mplane;
+and &v4l2-mbus-framefmt;.
+         </para>
+       </listitem>
+      </orderedlist>
+    </section>
+
      <section id="other">
        <title>Relation of V4L2 to other Linux multimedia APIs</title>
  
diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml

index ccf6053c1ae4563fa113ef02c34502a015d612be..d5eca4b8f74b41f8635c596d4747295213d0169f 100644 (file)
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -138,9 +138,25 @@ applicable values.</para></entry>
         <row>
           <entry>__u32</entry>
           <entry><structfield>flags</structfield></entry>
-           <entry>Flags set by the application or driver, see <xref
+         <entry>Flags set by the application or driver, see <xref
  linkend="format-flags" />.</entry>
         </row>
+       <row>
+         <entry>&v4l2-ycbcr-encoding;</entry>
+         <entry><structfield>ycbcr_enc</structfield></entry>
+         <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+       </row>
+       <row>
+         <entry>&v4l2-quantization;</entry>
+         <entry><structfield>quantization</structfield></entry>
+         <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+       </row>
        </tbody>
      </tgroup>
    </table>
@@ -231,10 +247,26 @@ codes can be used.</entry>
           <entry><structfield>flags</structfield></entry>
           <entry>Flags set by the application or driver, see <xref
  linkend="format-flags" />.</entry>
+       </row>
+       <row>
+         <entry>&v4l2-ycbcr-encoding;</entry>
+         <entry><structfield>ycbcr_enc</structfield></entry>
+         <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+       </row>
+       <row>
+         <entry>&v4l2-quantization;</entry>
+         <entry><structfield>quantization</structfield></entry>
+         <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
         </row>
          <row>
            <entry>__u8</entry>
-          <entry><structfield>reserved[10]</structfield></entry>
+          <entry><structfield>reserved[8]</structfield></entry>
            <entry>Reserved for future extensions. Should be zeroed by the
             application.</entry>
          </row>
diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml

index 18730b96e1e6453519af2e869113958c16aad7f9..c5ea868e390958d499681d1b63570c11c9933721 100644 (file)
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -33,9 +33,25 @@
           <entry>Image colorspace, from &v4l2-colorspace;. See
           <xref linkend="colorspaces" /> for details.</entry>
         </row>
+       <row>
+         <entry>&v4l2-ycbcr-encoding;</entry>
+         <entry><structfield>ycbcr_enc</structfield></entry>
+         <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+       </row>
+       <row>
+         <entry>&v4l2-quantization;</entry>
+         <entry><structfield>quantization</structfield></entry>
+         <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+       </row>
         <row>
           <entry>__u32</entry>
-         <entry><structfield>reserved</structfield>[7]</entry>
+         <entry><structfield>reserved</structfield>[6]</entry>
           <entry>Reserved for future extensions. Applications and drivers must
           set the array to zero.</entry>
         </row>
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml

index 7cfe618f754d35fecfb9c55e7ef3efad72c539f4..ac0f8d9d2a4976a59f6c02b44de1b6859e768134 100644 (file)
--- a/Documentation/DocBook/media/v4l/v4l2.xml
+++ b/Documentation/DocBook/media/v4l/v4l2.xml
@@ -151,6 +151,15 @@ structs, ioctls) must be noted in more detail in the history chapter
  (compat.xml), along with the possible impact on existing drivers and
  applications. -->
  
+      <revision>
+       <revnumber>3.19</revnumber>
+       <date>2014-12-05</date>
+       <authorinitials>hv</authorinitials>
+       <revremark>Rewrote Colorspace chapter, added new &v4l2-ycbcr-encoding; and &v4l2-quantization; fields
+to &v4l2-pix-format;, &v4l2-pix-format-mplane; and &v4l2-mbus-framefmt;.
+       </revremark>
+      </revision>
+
        <revision>
         <revnumber>3.17</revnumber>
         <date>2014-08-04</date>
@@ -539,7 +548,7 @@ and discussions on the V4L mailing list.</revremark>
  </partinfo>
  
  <title>Video for Linux Two API Specification</title>
- <subtitle>Revision 3.17</subtitle>
+ <subtitle>Revision 3.19</subtitle>
  
    <chapter id="common">
      &sub-common;
diff --git a/Documentation/devicetree/bindings/media/rcar_vin.txt b/Documentation/devicetree/bindings/media/rcar_vin.txt

index ba61782c2af94d01c82d3c7e6fb0c8f3083c0084..9dafe6b06cd286a053a5db66cbbf8a5288faba2a 100644 (file)
--- a/Documentation/devicetree/bindings/media/rcar_vin.txt
+++ b/Documentation/devicetree/bindings/media/rcar_vin.txt
@@ -6,6 +6,8 @@ family of devices. The current blocks are always slaves and suppot one input
  channel which can be either RGB, YUYV or BT656.
  
   - compatible: Must be one of the following
+   - "renesas,vin-r8a7794" for the R8A7794 device
+   - "renesas,vin-r8a7793" for the R8A7793 device
     - "renesas,vin-r8a7791" for the R8A7791 device
     - "renesas,vin-r8a7790" for the R8A7790 device
     - "renesas,vin-r8a7779" for the R8A7779 device
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt

deleted file mode 100644 (file)

index ffb5c80..0000000
--- a/Documentation/ia64/kvm.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-Currently, kvm module is in EXPERIMENTAL stage on IA64. This means that
-interfaces are not stable enough to use. So, please don't run critical
-applications in virtual machine.
-We will try our best to improve it in future versions!
-
-                               Guide: How to boot up guests on kvm/ia64
-
-This guide is to describe how to enable kvm support for IA-64 systems.
-
-1. Get the kvm source from git.kernel.org.
-       Userspace source:
-               git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
-       Kernel Source:
-               git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git
-
-2. Compile the source code.
-       2.1 Compile userspace code:
-               (1)cd ./kvm-userspace
-               (2)./configure
-               (3)cd kernel
-               (4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.)
-               (5)cd ..
-               (6)make qemu
-               (7)cd qemu; make install
-
-       2.2 Compile kernel source code:
-               (1) cd ./$kernel_dir
-               (2) Make menuconfig
-               (3) Enter into virtualization option, and choose kvm.
-               (4) make
-               (5) Once (4) done, make modules_install
-               (6) Make initrd, and use new kernel to reboot up host machine.
-               (7) Once (6) done, cd $kernel_dir/arch/ia64/kvm
-               (8) insmod kvm.ko; insmod kvm-intel.ko
-
-Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail.
-
-3. Get Guest Firmware named as Flash.fd, and put it under right place:
-       (1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly.
-
-       (2) If you have no firmware at hand, Please download its source from
-               hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
-           you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
-
-       (3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu
-
-4. Boot up Linux or Windows guests:
-       4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
-
-       4.2 Boot up guests use the following command.
-               /usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image
-               (xx is the number of virtual processors for the guest, now the maximum value is 4)
-
-5. Known possible issue on some platforms with old Firmware.
-
-In the event of strange host crash issues, try to solve it through either of the following ways:
-
-(1): Upgrade your Firmware to the latest one.
-
-(2): Applying the below patch to kernel source.
-diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
-index 0b53344..f02b0f7 100644
---- a/arch/ia64/kernel/pal.S
-+++ b/arch/ia64/kernel/pal.S
-@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static)
-       mov ar.pfs = loc1
-       mov rp = loc0
-       ;;
--      srlz.d                          // serialize restoration of psr.l
-+      srlz.i                  // serialize restoration of psr.l
-+      ;;
-       br.ret.sptk.many b0
- END(ia64_pal_call_static)
-
-6. Bug report:
-       If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list.
-       https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/
-
-Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger!
-
-
-                                                               Xiantao Zhang <xiantao.zhang@intel.com>
-                                                                                       2008.3.10
diff --git a/Documentation/networking/fib_trie.txt b/Documentation/networking/fib_trie.txt

index 0723db7f8495a1a0466ae06cd311c3e72ce24337..fe719388518ba1b31f1ecbad07db3f616d066237 100644 (file)
--- a/Documentation/networking/fib_trie.txt
+++ b/Documentation/networking/fib_trie.txt
@@ -73,8 +73,8 @@ trie_leaf_remove()
  
  trie_rebalance()
         The key function for the dynamic trie after any change in the trie
-       it is run to optimize and reorganize. Tt will walk the trie upwards 
-       towards the root from a given tnode, doing a resize() at each step 
+       it is run to optimize and reorganize. It will walk the trie upwards
+       towards the root from a given tnode, doing a resize() at each step
         to implement level compression.
  
  resize()
diff --git a/Documentation/video4linux/vivid.txt b/Documentation/video4linux/vivid.txt

index e5a940e3d304dbccbc673bfa990f76919a56bb8a..6cfc8541a362ccb4871259f01c91bdaadb3a8aba 100644 (file)
--- a/Documentation/video4linux/vivid.txt
+++ b/Documentation/video4linux/vivid.txt
@@ -640,6 +640,21 @@ Colorspace: selects which colorspace should be used when generating the image.
         Changing the colorspace will result in the V4L2_EVENT_SOURCE_CHANGE
         to be sent since it emulates a detected colorspace change.
  
+Y'CbCr Encoding: selects which Y'CbCr encoding should be used when generating
+       a Y'CbCr image. This only applies if the CSC Colorbar test pattern is
+       selected, and if the format is set to a Y'CbCr format as opposed to an
+       RGB format.
+
+       Changing the Y'CbCr encoding will result in the V4L2_EVENT_SOURCE_CHANGE
+       to be sent since it emulates a detected colorspace change.
+
+Quantization: selects which quantization should be used for the RGB or Y'CbCr
+       encoding when generating the test pattern. This only applies if the CSC
+       Colorbar test pattern is selected.
+
+       Changing the quantization will result in the V4L2_EVENT_SOURCE_CHANGE
+       to be sent since it emulates a detected colorspace change.
+
  Limited RGB Range (16-235): selects if the RGB range of the HDMI source should
         be limited or full range. This combines with the Digital Video 'Rx RGB
         Quantization Range' control and can be used to test what happens if
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt

index 7610eaa4d4917537e6db8668eb812cf72cd900b7..0007fef4ed8141390f01bef4d1fef63ec2596723 100644 (file)
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -68,9 +68,12 @@ description:
  
    Capability: which KVM extension provides this ioctl.  Can be 'basic',
        which means that is will be provided by any kernel that supports
-      API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
+      API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
        means availability needs to be checked with KVM_CHECK_EXTENSION
-      (see section 4.4).
+      (see section 4.4), or 'none' which means that while not all kernels
+      support this ioctl, there's no capability bit to check its
+      availability: for kernels that don't support the ioctl,
+      the ioctl returns -ENOTTY.
  
    Architectures: which instruction set architectures provide this ioctl.
        x86 includes both i386 and x86_64.
@@ -604,7 +607,7 @@ struct kvm_fpu {
  4.24 KVM_CREATE_IRQCHIP
  
  Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
-Architectures: x86, ia64, ARM, arm64, s390
+Architectures: x86, ARM, arm64, s390
  Type: vm ioctl
  Parameters: none
  Returns: 0 on success, -1 on error
@@ -612,7 +615,7 @@ Returns: 0 on success, -1 on error
  Creates an interrupt controller model in the kernel.  On x86, creates a virtual
  ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
  local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
+only go to the IOAPIC.  On ARM/arm64, a GIC is
  created. On s390, a dummy irq routing table is created.
  
  Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
@@ -622,7 +625,7 @@ before KVM_CREATE_IRQCHIP can be used.
  4.25 KVM_IRQ_LINE
  
  Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm, arm64
+Architectures: x86, arm, arm64
  Type: vm ioctl
  Parameters: struct kvm_irq_level
  Returns: 0 on success, -1 on error
@@ -676,7 +679,7 @@ struct kvm_irq_level {
  4.26 KVM_GET_IRQCHIP
  
  Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
  Type: vm ioctl
  Parameters: struct kvm_irqchip (in/out)
  Returns: 0 on success, -1 on error
@@ -698,7 +701,7 @@ struct kvm_irqchip {
  4.27 KVM_SET_IRQCHIP
  
  Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
  Type: vm ioctl
  Parameters: struct kvm_irqchip (in)
  Returns: 0 on success, -1 on error
@@ -991,7 +994,7 @@ for vm-wide capabilities.
  4.38 KVM_GET_MP_STATE
  
  Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
  Type: vcpu ioctl
  Parameters: struct kvm_mp_state (out)
  Returns: 0 on success; -1 on error
@@ -1005,16 +1008,15 @@ uniprocessor guests).
  
  Possible values are:
  
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86, ia64]
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86]
   - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal [x86,
-                                 ia64]
+                                 which has not yet received an INIT signal [x86]
   - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI [x86, ia64]
+                                 now ready for a SIPI [x86]
   - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt [x86, ia64]
+                                 is waiting for an interrupt [x86]
   - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86]
   - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390]
   - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
   - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
@@ -1022,7 +1024,7 @@ Possible values are:
   - KVM_MP_STATE_LOAD:            the vcpu is in a special load/startup state
                                   [s390]
  
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
  in-kernel irqchip, the multiprocessing state must be maintained by userspace on
  these architectures.
  
@@ -1030,7 +1032,7 @@ these architectures.
  4.39 KVM_SET_MP_STATE
  
  Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
  Type: vcpu ioctl
  Parameters: struct kvm_mp_state (in)
  Returns: 0 on success; -1 on error
@@ -1038,7 +1040,7 @@ Returns: 0 on success; -1 on error
  Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
  arguments.
  
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
  in-kernel irqchip, the multiprocessing state must be maintained by userspace on
  these architectures.
  
@@ -1065,7 +1067,7 @@ documentation when it pops into existence).
  4.41 KVM_SET_BOOT_CPU_ID
  
  Capability: KVM_CAP_SET_BOOT_CPU_ID
-Architectures: x86, ia64
+Architectures: x86
  Type: vm ioctl
  Parameters: unsigned long vcpu_id
  Returns: 0 on success, -1 on error
@@ -1257,8 +1259,8 @@ The flags bitmap is defined as:
  
  4.48 KVM_ASSIGN_PCI_DEVICE
  
-Capability: KVM_CAP_DEVICE_ASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
  Type: vm ioctl
  Parameters: struct kvm_assigned_pci_dev (in)
  Returns: 0 on success, -1 on error
@@ -1298,25 +1300,36 @@ Only PCI header type 0 devices with PCI BAR resources are supported by
  device assignment.  The user requesting this ioctl must have read/write
  access to the PCI sysfs resource files associated with the device.
  
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
  
  4.49 KVM_DEASSIGN_PCI_DEVICE
  
-Capability: KVM_CAP_DEVICE_DEASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
  Type: vm ioctl
  Parameters: struct kvm_assigned_pci_dev (in)
  Returns: 0 on success, -1 on error
  
  Ends PCI device assignment, releasing all associated resources.
  
-See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
+See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
  used in kvm_assigned_pci_dev to identify the device.
  
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
  
  4.50 KVM_ASSIGN_DEV_IRQ
  
  Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
  Type: vm ioctl
  Parameters: struct kvm_assigned_irq (in)
  Returns: 0 on success, -1 on error
@@ -1346,11 +1359,17 @@ The following flags are defined:
  It is not valid to specify multiple types per host or guest IRQ. However, the
  IRQ type of host and guest can differ or can even be null.
  
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
  
  4.51 KVM_DEASSIGN_DEV_IRQ
  
  Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
  Type: vm ioctl
  Parameters: struct kvm_assigned_irq (in)
  Returns: 0 on success, -1 on error
@@ -1365,7 +1384,7 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
  4.52 KVM_SET_GSI_ROUTING
  
  Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 ia64 s390
+Architectures: x86 s390
  Type: vm ioctl
  Parameters: struct kvm_irq_routing (in)
  Returns: 0 on success, -1 on error
@@ -1423,8 +1442,8 @@ struct kvm_irq_routing_s390_adapter {
  
  4.53 KVM_ASSIGN_SET_MSIX_NR
  
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
  Type: vm ioctl
  Parameters: struct kvm_assigned_msix_nr (in)
  Returns: 0 on success, -1 on error
@@ -1445,8 +1464,8 @@ struct kvm_assigned_msix_nr {
  
  4.54 KVM_ASSIGN_SET_MSIX_ENTRY
  
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
  Type: vm ioctl
  Parameters: struct kvm_assigned_msix_entry (in)
  Returns: 0 on success, -1 on error
@@ -1461,6 +1480,12 @@ struct kvm_assigned_msix_entry {
         __u16 padding[3];
  };
  
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
  
  4.55 KVM_SET_TSC_KHZ
  
@@ -2453,9 +2478,15 @@ return ENOEXEC for that vcpu.
  Note that because some registers reflect machine topology, all vcpus
  should be created before this ioctl is invoked.
  
+Userspace can call this function multiple times for a given vcpu, including
+after the vcpu has been run. This will reset the vcpu to its initial
+state. All calls to this function after the initial call must use the same
+target and same set of feature flags, otherwise EINVAL will be returned.
+
  Possible features:
         - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
-         Depends on KVM_CAP_ARM_PSCI.
+         Depends on KVM_CAP_ARM_PSCI.  If not set, the CPU will be powered on
+         and execute guest code when KVM_RUN is called.
         - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
           Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
         - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
@@ -2951,6 +2982,15 @@ HVC instruction based PSCI call from the vcpu. The 'type' field describes
  the system-level event type. The 'flags' field describes architecture
  specific flags for the system-level event.
  
+Valid values for 'type' are:
+  KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
+   VM. Userspace is not obliged to honour this, and if it does honour
+   this does not need to destroy the VM synchronously (ie it may call
+   KVM_RUN again before shutdown finally occurs).
+  KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
+   As with SHUTDOWN, userspace can choose to ignore the request, or
+   to schedule the reset to occur in the future and may call KVM_RUN again.
+
                 /* Fix the size of the union. */
                 char padding[256];
         };
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt

index 0d16f96c0eac9672a540498bbefe773dc55f03a0..d426fc87fe938c40a5fa87f55bed4c5edde0e26c 100644 (file)
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -12,14 +12,14 @@ specific.
  1. GROUP: KVM_S390_VM_MEM_CTRL
  Architectures: s390
  
-1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
  Parameters: none
-Returns: -EBUSY if already a vcpus is defined, otherwise 0
+Returns: -EBUSY if a vcpu is already defined, otherwise 0
  
-Enables CMMA for the virtual machine
+Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
  
-1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
-Parameteres: none
+1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
+Parameters: none
  Returns: 0
  
  Clear the CMMA status for all guest pages, so any pages the guest marked
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt

index 6d470ae7b073a2fa13e2f28d741166cbc346e42f..2a71c8f29f68eccf0727932015c52066834d2952 100644 (file)
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -168,7 +168,7 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
         64 byte memory area which must be in guest RAM and must be
         zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
         when asynchronous page faults are enabled on the vcpu 0 when
-       disabled. Bit 2 is 1 if asynchronous page faults can be injected
+       disabled. Bit 1 is 1 if asynchronous page faults can be injected
         when vcpu is in cpl == 0.
  
         First 4 byte of 64 byte memory location will be written to by
diff --git a/MAINTAINERS b/MAINTAINERS

index 4507a7e87c009892ebe7635f21022d9a889be210..7675425cabb86637a8b87790f5540534df38c593 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5495,15 +5495,6 @@ S:       Supported
  F:     arch/powerpc/include/asm/kvm*
  F:     arch/powerpc/kvm/
  
-KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
-M:     Xiantao Zhang <xiantao.zhang@intel.com>
-L:     kvm-ia64@vger.kernel.org
-W:     http://kvm.qumranet.com
-S:     Supported
-F:     Documentation/ia64/kvm.txt
-F:     arch/ia64/include/asm/kvm*
-F:     arch/ia64/kvm/
-
  KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
  M:     Christian Borntraeger <borntraeger@de.ibm.com>
  M:     Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -6619,19 +6610,8 @@ L:       netdev@vger.kernel.org
  S:     Maintained
  
  NETWORKING [WIRELESS]
-M:     "John W. Linville" <linville@tuxdriver.com>
  L:     linux-wireless@vger.kernel.org
  Q:     http://patchwork.kernel.org/project/linux-wireless/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless.git
-S:     Maintained
-F:     net/mac80211/
-F:     net/rfkill/
-F:     net/wireless/
-F:     include/net/ieee80211*
-F:     include/linux/wireless.h
-F:     include/uapi/linux/wireless.h
-F:     include/net/iw_handler.h
-F:     drivers/net/wireless/
  
  NETWORKING DRIVERS
  L:     netdev@vger.kernel.org
@@ -6652,6 +6632,14 @@ F:       include/linux/inetdevice.h
  F:     include/uapi/linux/if_*
  F:     include/uapi/linux/netdevice.h
  
+NETWORKING DRIVERS (WIRELESS)
+M:     Kalle Valo <kvalo@codeaurora.org>
+L:     linux-wireless@vger.kernel.org
+Q:     http://patchwork.kernel.org/project/linux-wireless/list/
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git/
+S:     Maintained
+F:     drivers/net/wireless/
+
  NETXEN (1/10) GbE SUPPORT
  M:     Manish Chopra <manish.chopra@qlogic.com>
  M:     Sony Chacko <sony.chacko@qlogic.com>
@@ -10249,13 +10237,13 @@ L:    netdev@vger.kernel.org
  S:     Maintained
  F:     drivers/net/ethernet/via/via-velocity.*
  
-VIVI VIRTUAL VIDEO DRIVER
+VIVID VIRTUAL VIDEO DRIVER
  M:     Hans Verkuil <hverkuil@xs4all.nl>
  L:     linux-media@vger.kernel.org
  T:     git git://linuxtv.org/media_tree.git
  W:     http://linuxtv.org
  S:     Maintained
-F:     drivers/media/platform/vivi*
+F:     drivers/media/platform/vivid/*
  
  VLAN (802.1Q)
  M:     Patrick McHardy <kaber@trash.net>
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig

index fe44b24946094d7ee7b0ea3add74c61c73f3a15a..df94ac1f75b6ac517784364cdeff6781eead9a6b 100644 (file)
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -428,3 +428,4 @@ source "arch/arc/Kconfig.debug"
  source "security/Kconfig"
  source "crypto/Kconfig"
  source "lib/Kconfig"
+source "kernel/power/Kconfig"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile

index 10bc3d4e8a443b891007f9d10b01bb2072413342..db72fec0e160fc8e67fe17c5faf9c6ef54c8bcdb 100644 (file)
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -12,7 +12,7 @@ ifeq ($(CROSS_COMPILE),)
  CROSS_COMPILE := arc-linux-uclibc-
  endif
  
-KBUILD_DEFCONFIG := fpga_defconfig
+KBUILD_DEFCONFIG := nsim_700_defconfig
  
  cflags-y       += -mA7 -fno-common -pipe -fno-builtin -D__linux__
  
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts

index cfaedd9c61c99d89f42c50ea694cfda48f2a2b18..1c169dc74ad1399685e0eb965bb94da5562327d2 100644 (file)
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -20,7 +20,7 @@
                 /* this is for console on PGU */
                 /* bootargs = "console=tty0 consoleblank=0"; */
                 /* this is for console on serial */
-               bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
         };
  
         aliases {
@@ -41,9 +41,9 @@
                         #interrupt-cells = <1>;
                 };
  
-               uart0: serial@c0000000 {
+               uart0: serial@f0000000 {
                         compatible = "ns8250";
-                       reg = <0xc0000000 0x2000>;
+                       reg = <0xf0000000 0x2000>;
                         interrupts = <11>;
                         clock-frequency = <3686400>;
                         baud = <115200>;
@@ -52,21 +52,21 @@
                         no-loopback-test = <1>;
                 };
  
-               pgu0: pgu@c9000000 {
+               pgu0: pgu@f9000000 {
                         compatible = "snps,arcpgufb";
-                       reg = <0xc9000000 0x400>;
+                       reg = <0xf9000000 0x400>;
                 };
  
-               ps2: ps2@c9001000 {
+               ps2: ps2@f9001000 {
                         compatible = "snps,arc_ps2";
-                       reg = <0xc9000400 0x14>;
+                       reg = <0xf9000400 0x14>;
                         interrupts = <13>;
                         interrupt-names = "arc_ps2_irq";
                 };
  
-               eth0: ethernet@c0003000 {
+               eth0: ethernet@f0003000 {
                         compatible = "snps,oscilan";
-                       reg = <0xc0003000 0x44>;
+                       reg = <0xf0003000 0x44>;
                         interrupts = <7>, <8>;
                         interrupt-names = "rx", "tx";
                 };
diff --git a/arch/arc/configs/fpga_noramfs_defconfig b/arch/arc/configs/fpga_noramfs_defconfig

deleted file mode 100644 (file)

index 49c9301..0000000
--- a/arch/arc/configs/fpga_noramfs_defconfig
+++ /dev/null
@@ -1,63 +0,0 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NAMESPACES=y
-# CONFIG_UTS_NS is not set
-# CONFIG_PID_NS is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-# CONFIG_SLUB_DEBUG is not set
-# CONFIG_COMPAT_BRK is not set
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
-CONFIG_PREEMPT=y
-# CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_UNIX_DIAG=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-# CONFIG_IPV6 is not set
-# CONFIG_STANDALONE is not set
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_FIRMWARE_IN_KERNEL is not set
-# CONFIG_BLK_DEV is not set
-CONFIG_NETDEVICES=y
-CONFIG_ARC_EMAC=y
-CONFIG_LXT_PHY=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_NFS_FS=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/nsim_700_defconfig

similarity index 100%

rename from arch/arc/configs/fpga_defconfig

rename to arch/arc/configs/nsim_700_defconfig
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h

index 742816f1b2101e2e8ef090ea9d14d444ae97aec8..27ecc6975a5845dee5960197bded8edeff261cbc 100644 (file)
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -41,6 +41,15 @@
  
  /******************************************************************
   * IRQ Control Macros
+ *
+ * All of them have "memory" clobber (compiler barrier) which is needed to
+ * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
+ *
+ * Noted at the time of Abilis Timer List corruption
+ *     Orig Bug + Rejected solution    : https://lkml.org/lkml/2013/3/29/67
+ *     Reasoning                       : https://lkml.org/lkml/2013/4/8/15
+ *
   ******************************************************************/
  
  /*
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c

index d01df0c517a2044ddc15f5fdb895303aa3de56bb..20ebb602ea2f3a4f9721fff8f4458fcf4c0e3ec5 100644 (file)
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -26,8 +26,10 @@
  #include <asm/setup.h>
  #include <asm/mach_desc.h>
  
+#ifndef CONFIG_ARC_HAS_LLSC
  arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
  arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+#endif
  
  struct plat_smp_ops  plat_smp_ops;
  
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h

index b9db269c6e6155bbd1e60f2d7f8bad077b503913..66ce17655bb9e29a73f58ebd9a4716735fcf9739 100644 (file)
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
  void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
  void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
  
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr = HCR_GUEST_MASK;
+}
+
  static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
  {
         return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h

index 53036e21756b6b1efaef6e9c34e64ae3117f30d2..254e0650e48bbc1d07a81d091c8e33570f983851 100644 (file)
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -150,8 +150,6 @@ struct kvm_vcpu_stat {
         u32 halt_wakeup;
  };
  
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-                       const struct kvm_vcpu_init *init);
  int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
  unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
  int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h

index acb0d5712716050139f83145fc75a7fe21c0b476..63e0ecc0490180e8b8b5e548b6b6a17b95b725e1 100644 (file)
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
  void free_boot_hyp_pgd(void);
  void free_hyp_pgds(void);
  
+void stage2_unmap_vm(struct kvm *kvm);
  int kvm_alloc_stage2_pgd(struct kvm *kvm);
  void kvm_free_stage2_pgd(struct kvm *kvm);
  int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
  }
  
  static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-                                            unsigned long size)
+                                            unsigned long size,
+                                            bool ipa_uncached)
  {
-       if (!vcpu_has_cache_enabled(vcpu))
+       if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
                 kvm_flush_dcache_to_poc((void *)hva, size);
         
         /*
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c

index 9e193c8a959eaa1492838ae2913548aa7c6a80de..2d6d91001062f975981dd9beed2b43815f73c238 100644 (file)
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
         int err;
         struct kvm_vcpu *vcpu;
  
+       if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+               err = -EBUSY;
+               goto out;
+       }
+
         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
         if (!vcpu) {
                 err = -ENOMEM;
@@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
  {
         /* Force users to call KVM_ARM_VCPU_INIT */
         vcpu->arch.target = -1;
+       bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
  
         /* Set up the timer */
         kvm_timer_vcpu_init(vcpu);
@@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm)
  
  static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
  {
+       struct kvm *kvm = vcpu->kvm;
         int ret;
  
         if (likely(vcpu->arch.has_run_once))
@@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
         vcpu->arch.has_run_once = true;
  
         /*
-        * Initialize the VGIC before running a vcpu the first time on
-        * this VM.
+        * Map the VGIC hardware resources before running a vcpu the first
+        * time on this VM.
          */
-       if (unlikely(!vgic_initialized(vcpu->kvm))) {
-               ret = kvm_vgic_init(vcpu->kvm);
+       if (unlikely(!vgic_ready(kvm))) {
+               ret = kvm_vgic_map_resources(kvm);
                 if (ret)
                         return ret;
         }
  
+       /*
+        * Enable the arch timers only if we have an in-kernel VGIC
+        * and it has been properly initialized, since we cannot handle
+        * interrupts from the virtual timer with a userspace gic.
+        */
+       if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+               kvm_timer_enable(kvm);
+
         return 0;
  }
  
@@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
         return -EINVAL;
  }
  
+static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+                              const struct kvm_vcpu_init *init)
+{
+       unsigned int i;
+       int phys_target = kvm_target_cpu();
+
+       if (init->target != phys_target)
+               return -EINVAL;
+
+       /*
+        * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+        * use the same target.
+        */
+       if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+               return -EINVAL;
+
+       /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+       for (i = 0; i < sizeof(init->features) * 8; i++) {
+               bool set = (init->features[i / 32] & (1 << (i % 32)));
+
+               if (set && i >= KVM_VCPU_MAX_FEATURES)
+                       return -ENOENT;
+
+               /*
+                * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+                * use the same feature set.
+                */
+               if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+                   test_bit(i, vcpu->arch.features) != set)
+                       return -EINVAL;
+
+               if (set)
+                       set_bit(i, vcpu->arch.features);
+       }
+
+       vcpu->arch.target = phys_target;
+
+       /* Now we know what it is, we can reset it. */
+       return kvm_reset_vcpu(vcpu);
+}
+
+
  static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
                                          struct kvm_vcpu_init *init)
  {
@@ -658,11 +715,22 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
         if (ret)
                 return ret;
  
+       /*
+        * Ensure a rebooted VM will fault in RAM pages and detect if the
+        * guest MMU is turned off and flush the caches as needed.
+        */
+       if (vcpu->arch.has_run_once)
+               stage2_unmap_vm(vcpu->kvm);
+
+       vcpu_reset_hcr(vcpu);
+
         /*
          * Handle the "start in power-off" case by marking the VCPU as paused.
          */
-       if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+       if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
                 vcpu->arch.pause = true;
+       else
+               vcpu->arch.pause = false;
  
         return 0;
  }
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c

index cc0b78769bd8ab40e5237b88aaedb14068f767e0..384bab67c4629a9bece251d5577c9bf908f8348f 100644 (file)
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
  
  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
  {
-       vcpu->arch.hcr = HCR_GUEST_MASK;
         return 0;
  }
  
@@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void)
         }
  }
  
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-                       const struct kvm_vcpu_init *init)
-{
-       unsigned int i;
-
-       /* We can only cope with guest==host and only on A15/A7 (for now). */
-       if (init->target != kvm_target_cpu())
-               return -EINVAL;
-
-       vcpu->arch.target = init->target;
-       bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-       /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-       for (i = 0; i < sizeof(init->features) * 8; i++) {
-               if (test_bit(i, (void *)init->features)) {
-                       if (i >= KVM_VCPU_MAX_FEATURES)
-                               return -ENOENT;
-                       set_bit(i, vcpu->arch.features);
-               }
-       }
-
-       /* Now we know what it is, we can reset it. */
-       return kvm_reset_vcpu(vcpu);
-}
-
  int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
  {
         int target = kvm_target_cpu();
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c

index 4cb5a93182e9283f78f5ddd7c6b51c38a51c783b..5d3bfc0eb3f000cb41cb217eb7fdc2611da85fc9 100644 (file)
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
         }
  
         rt = vcpu->arch.mmio_decode.rt;
-       data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
  
-       trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
-                                        KVM_TRACE_MMIO_READ_UNSATISFIED,
-                       mmio.len, fault_ipa,
-                       (mmio.is_write) ? data : 0);
+       if (mmio.is_write) {
+               data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
+                                              mmio.len);
  
-       if (mmio.is_write)
+               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
+                              fault_ipa, data);
                 mmio_write_buf(mmio.data, mmio.len, data);
+       } else {
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+                              fault_ipa, 0);
+       }
  
         if (vgic_handle_mmio(vcpu, run, &mmio))
                 return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c

index 8664ff17cbbeaf531b03174e1524cc00a6e86849..1dc9778a00af358431bbed021ba2d5244642debb 100644 (file)
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -612,6 +612,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
         unmap_range(kvm, kvm->arch.pgd, start, size);
  }
  
+static void stage2_unmap_memslot(struct kvm *kvm,
+                                struct kvm_memory_slot *memslot)
+{
+       hva_t hva = memslot->userspace_addr;
+       phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+       phys_addr_t size = PAGE_SIZE * memslot->npages;
+       hva_t reg_end = hva + size;
+
+       /*
+        * A memory region could potentially cover multiple VMAs, and any holes
+        * between them, so iterate over all of them to find out if we should
+        * unmap any of them.
+        *
+        *     +--------------------------------------------+
+        * +---------------+----------------+   +----------------+
+        * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+        * +---------------+----------------+   +----------------+
+        *     |               memory region                |
+        *     +--------------------------------------------+
+        */
+       do {
+               struct vm_area_struct *vma = find_vma(current->mm, hva);
+               hva_t vm_start, vm_end;
+
+               if (!vma || vma->vm_start >= reg_end)
+                       break;
+
+               /*
+                * Take the intersection of this VMA with the memory region
+                */
+               vm_start = max(hva, vma->vm_start);
+               vm_end = min(reg_end, vma->vm_end);
+
+               if (!(vma->vm_flags & VM_PFNMAP)) {
+                       gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+                       unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+               }
+               hva = vm_end;
+       } while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+
+       slots = kvm_memslots(kvm);
+       kvm_for_each_memslot(memslot, slots)
+               stage2_unmap_memslot(kvm, memslot);
+
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, idx);
+}
+
  /**
   * kvm_free_stage2_pgd - free all stage-2 tables
   * @kvm:       The KVM struct pointer for the VM.
@@ -853,6 +918,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         struct vm_area_struct *vma;
         pfn_t pfn;
         pgprot_t mem_type = PAGE_S2;
+       bool fault_ipa_uncached;
  
         write_fault = kvm_is_write_fault(vcpu);
         if (fault_status == FSC_PERM && !write_fault) {
@@ -919,6 +985,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         if (!hugetlb && !force_pte)
                 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
  
+       fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
+
         if (hugetlb) {
                 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
                 new_pmd = pmd_mkhuge(new_pmd);
@@ -926,7 +994,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                         kvm_set_s2pmd_writable(&new_pmd);
                         kvm_set_pfn_dirty(pfn);
                 }
-               coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+               coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
+                                         fault_ipa_uncached);
                 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
         } else {
                 pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -934,7 +1003,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                         kvm_set_s2pte_writable(&new_pte);
                         kvm_set_pfn_dirty(pfn);
                 }
-               coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+               coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
+                                         fault_ipa_uncached);
                 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
                         pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
         }
@@ -1294,11 +1364,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                 hva = vm_end;
         } while (hva < reg_end);
  
-       if (ret) {
-               spin_lock(&kvm->mmu_lock);
+       spin_lock(&kvm->mmu_lock);
+       if (ret)
                 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
-               spin_unlock(&kvm->mmu_lock);
-       }
+       else
+               stage2_flush_memslot(kvm, memslot);
+       spin_unlock(&kvm->mmu_lock);
         return ret;
  }
  
@@ -1310,6 +1381,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
  int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
                             unsigned long npages)
  {
+       /*
+        * Readonly memslots are not incoherent with the caches by definition,
+        * but in practice, they are used mostly to emulate ROMs or NOR flashes
+        * that the guest may consider devices and hence map as uncached.
+        * To prevent incoherency issues in these cases, tag all readonly
+        * regions as incoherent.
+        */
+       if (slot->flags & KVM_MEM_READONLY)
+               slot->flags |= KVM_MEMSLOT_INCOHERENT;
         return 0;
  }
  
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c

index 09cf37737ee2ad24bda1251541689ea2f8bdb535..58cb3248d277bc03a6b38850a558186a34606205 100644 (file)
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,6 +15,7 @@
   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
   */
  
+#include <linux/preempt.h>
  #include <linux/kvm_host.h>
  #include <linux/wait.h>
  
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
  
  static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
  {
+       int i;
+       struct kvm_vcpu *tmp;
+
+       /*
+        * The KVM ABI specifies that a system event exit may call KVM_RUN
+        * again and may perform shutdown/reboot at a later time that when the
+        * actual request is made.  Since we are implementing PSCI and a
+        * caller of PSCI reboot and shutdown expects that the system shuts
+        * down or reboots immediately, let's make sure that VCPUs are not run
+        * after this call is handled and before the VCPUs have been
+        * re-initialized.
+        */
+       kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+               tmp->arch.pause = true;
+               kvm_vcpu_kick(tmp);
+       }
+
         memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
         vcpu->run->system_event.type = type;
         vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h

index 5674a55b551824bcdb4dfd7f26ae45cb3b6c8836..8127e45e263752821c833d1c354a8033372b2a47 100644 (file)
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
  void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
  void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
  
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+}
+
  static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
  {
         return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h

index 2012c4ba8d67bf15c484048b4a0fe5b68f8e1b94..0b7dfdb931dff6f9610df181015ce7ecc1a7f16a 100644 (file)
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -165,8 +165,6 @@ struct kvm_vcpu_stat {
         u32 halt_wakeup;
  };
  
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-                       const struct kvm_vcpu_init *init);
  int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
  unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
  int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
@@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
  struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
  
  u64 kvm_call_hyp(void *hypfn, ...);
+void force_vm_exit(const cpumask_t *mask);
  
  int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
                 int exception_index);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h

index 0caf7a59f6a191327f9f662a67cb94e638426e25..14a74f136272b94852d86901ae75c4f329ee2ee3 100644 (file)
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
  void free_boot_hyp_pgd(void);
  void free_hyp_pgds(void);
  
+void stage2_unmap_vm(struct kvm *kvm);
  int kvm_alloc_stage2_pgd(struct kvm *kvm);
  void kvm_free_stage2_pgd(struct kvm *kvm);
  int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
  }
  
  static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-                                            unsigned long size)
+                                            unsigned long size,
+                                            bool ipa_uncached)
  {
-       if (!vcpu_has_cache_enabled(vcpu))
+       if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
                 kvm_flush_dcache_to_poc((void *)hva, size);
  
         if (!icache_is_aliasing()) {            /* PIPT */
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c

index 76794692c20b9463717430f01b66b2e73f36bc23..9535bd555d1d47cf190f78306eb879484d70a261 100644 (file)
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
  
  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
  {
-       vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
         return 0;
  }
  
@@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void)
         return -EINVAL;
  }
  
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-                       const struct kvm_vcpu_init *init)
-{
-       unsigned int i;
-       int phys_target = kvm_target_cpu();
-
-       if (init->target != phys_target)
-               return -EINVAL;
-
-       vcpu->arch.target = phys_target;
-       bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-       /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-       for (i = 0; i < sizeof(init->features) * 8; i++) {
-               if (init->features[i / 32] & (1 << (i % 32))) {
-                       if (i >= KVM_VCPU_MAX_FEATURES)
-                               return -ENOENT;
-                       set_bit(i, vcpu->arch.features);
-               }
-       }
-
-       /* Now we know what it is, we can reset it. */
-       return kvm_reset_vcpu(vcpu);
-}
-
  int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
  {
         int target = kvm_target_cpu();
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig

index 536d13b0bea69d7dc1465ce6c5d75766ef4cefa9..371b55bc5a6ed9c07b582dd124f8d3195bfd9814 100644 (file)
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -20,7 +20,6 @@ config IA64
         select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
         select HAVE_FUNCTION_TRACER
         select HAVE_DMA_ATTRS
-       select HAVE_KVM
         select TTY
         select HAVE_ARCH_TRACEHOOK
         select HAVE_DMA_API_DEBUG
@@ -640,8 +639,6 @@ source "security/Kconfig"
  
  source "crypto/Kconfig"
  
-source "arch/ia64/kvm/Kconfig"
-
  source "lib/Kconfig"
  
  config IOMMU_HELPER
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile

index 5441b14994fccf46ebb6d6926186800b5ae11272..970d0bd99621b32eef163debccd8c2d41a970250 100644 (file)
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -53,7 +53,6 @@ core-$(CONFIG_IA64_HP_ZX1)    += arch/ia64/dig/
  core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
  core-$(CONFIG_IA64_SGI_SN2)    += arch/ia64/sn/
  core-$(CONFIG_IA64_SGI_UV)     += arch/ia64/uv/
-core-$(CONFIG_KVM)             += arch/ia64/kvm/
  
  drivers-$(CONFIG_PCI)          += arch/ia64/pci/
  drivers-$(CONFIG_IA64_HP_SIM)  += arch/ia64/hp/sim/
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h

deleted file mode 100644 (file)

index 4729752..0000000
--- a/arch/ia64/include/asm/kvm_host.h
+++ /dev/null
@@ -1,609 +0,0 @@
-/*
- * kvm_host.h: used for kvm module, and hold ia64-specific sections.
- *
- * Copyright (C) 2007, Intel Corporation.
- *
- * Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef __ASM_KVM_HOST_H
-#define __ASM_KVM_HOST_H
-
-#define KVM_USER_MEM_SLOTS 32
-
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
-
-/* define exit reasons from vmm to kvm*/
-#define EXIT_REASON_VM_PANIC           0
-#define EXIT_REASON_MMIO_INSTRUCTION   1
-#define EXIT_REASON_PAL_CALL           2
-#define EXIT_REASON_SAL_CALL           3
-#define EXIT_REASON_SWITCH_RR6         4
-#define EXIT_REASON_VM_DESTROY         5
-#define EXIT_REASON_EXTERNAL_INTERRUPT 6
-#define EXIT_REASON_IPI                        7
-#define EXIT_REASON_PTC_G              8
-#define EXIT_REASON_DEBUG              20
-
-/*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
-#define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000
-#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * Define vm_buffer, used by PAL Services, base address.
- * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
- */
-#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * kvm guest's data area looks as follow:
- *
- *            +----------------------+ ------- KVM_VM_DATA_SIZE
- *           |     vcpu[n]'s data   |   |     ___________________KVM_STK_OFFSET
- *                   |                      |   |    /                   |
- *                   |        ..........    |   |   /vcpu's struct&stack |
- *                   |        ..........    |   |  /---------------------|---- 0
- *           |     vcpu[5]'s data   |   | /       vpd            |
- *           |     vcpu[4]'s data   |   |/-----------------------|
- *           |     vcpu[3]'s data   |   /         vtlb           |
- *           |     vcpu[2]'s data   |  /|------------------------|
- *           |     vcpu[1]'s data   |/  |         vhpt           |
- *           |     vcpu[0]'s data   |____________________________|
- *            +----------------------+  |
- *           |    memory dirty log  |   |
- *            +----------------------+  |
- *           |    vm's data struct  |   |
- *            +----------------------+  |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |   vm's p2m table  |      |
- *           |                      |   |
- *            |                             |   |
- *           |                      |   |  |
- * vm's data->|                             |   |  |
- *           +----------------------+ ------- 0
- * To support large memory, needs to increase the size of p2m.
- * To support more vcpus, needs to ensure it has enough space to
- * hold vcpus' data.
- */
-
-#define KVM_VM_DATA_SHIFT      26
-#define KVM_VM_DATA_SIZE       (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE       (KVM_VMM_BASE + KVM_VM_DATA_SIZE)
-
-#define KVM_P2M_BASE           KVM_VM_DATA_BASE
-#define KVM_P2M_SIZE           (__IA64_UL_CONST(24) << 20)
-
-#define VHPT_SHIFT             16
-#define VHPT_SIZE              (__IA64_UL_CONST(1) << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES       (__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
-
-#define VTLB_SHIFT             16
-#define VTLB_SIZE              (__IA64_UL_CONST(1) << VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES       (1UL << (VHPT_SHIFT-5))
-
-#define VPD_SHIFT              16
-#define VPD_SIZE               (__IA64_UL_CONST(1) << VPD_SHIFT)
-
-#define VCPU_STRUCT_SHIFT      16
-#define VCPU_STRUCT_SIZE       (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-
-/*
- * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
- */
-#define KVM_STK_SHIFT          16
-#define KVM_STK_OFFSET         (__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
-
-#define KVM_VM_STRUCT_SHIFT    19
-#define KVM_VM_STRUCT_SIZE     (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
-
-#define KVM_MEM_DIRY_LOG_SHIFT 19
-#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
-
-#ifndef __ASSEMBLY__
-
-/*Define the max vcpus and memory for Guests.*/
-#define KVM_MAX_VCPUS  (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
-                       KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
-#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
-
-#define VMM_LOG_LEN 256
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/page.h>
-
-struct kvm_vcpu_data {
-       char vcpu_vhpt[VHPT_SIZE];
-       char vcpu_vtlb[VTLB_SIZE];
-       char vcpu_vpd[VPD_SIZE];
-       char vcpu_struct[VCPU_STRUCT_SIZE];
-};
-
-struct kvm_vm_data {
-       char kvm_p2m[KVM_P2M_SIZE];
-       char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
-       char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
-       struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
-};
-
-#define VCPU_BASE(n)   (KVM_VM_DATA_BASE + \
-                               offsetof(struct kvm_vm_data, vcpu_data[n]))
-#define KVM_VM_BASE    (KVM_VM_DATA_BASE + \
-                               offsetof(struct kvm_vm_data, kvm_vm_struct))
-#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
-                               offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
-
-#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
-#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
-#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
-#define VCPU_STRUCT_BASE(n)    (VCPU_BASE(n) + \
-                               offsetof(struct kvm_vcpu_data, vcpu_struct))
-
-/*IO section definitions*/
-#define IOREQ_READ      1
-#define IOREQ_WRITE     0
-
-#define STATE_IOREQ_NONE        0
-#define STATE_IOREQ_READY       1
-#define STATE_IOREQ_INPROCESS   2
-#define STATE_IORESP_READY      3
-
-/*Guest Physical address layout.*/
-#define GPFN_MEM        (0UL << 60) /* Guest pfn is normal mem */
-#define GPFN_FRAME_BUFFER   (1UL << 60) /* VGA framebuffer */
-#define GPFN_LOW_MMIO       (2UL << 60) /* Low MMIO range */
-#define GPFN_PIB        (3UL << 60) /* PIB base */
-#define GPFN_IOSAPIC        (4UL << 60) /* IOSAPIC base */
-#define GPFN_LEGACY_IO      (5UL << 60) /* Legacy I/O base */
-#define GPFN_GFW        (6UL << 60) /* Guest Firmware */
-#define GPFN_PHYS_MMIO      (7UL << 60) /* Directed MMIO Range */
-
-#define GPFN_IO_MASK        (7UL << 60) /* Guest pfn is I/O type */
-#define GPFN_INV_MASK       (1UL << 63) /* Guest pfn is invalid */
-#define INVALID_MFN       (~0UL)
-#define MEM_G   (1UL << 30)
-#define MEM_M   (1UL << 20)
-#define MMIO_START       (3 * MEM_G)
-#define MMIO_SIZE        (512 * MEM_M)
-#define VGA_IO_START     0xA0000UL
-#define VGA_IO_SIZE      0x20000
-#define LEGACY_IO_START  (MMIO_START + MMIO_SIZE)
-#define LEGACY_IO_SIZE   (64 * MEM_M)
-#define IO_SAPIC_START   0xfec00000UL
-#define IO_SAPIC_SIZE    0x100000
-#define PIB_START 0xfee00000UL
-#define PIB_SIZE 0x200000
-#define GFW_START        (4 * MEM_G - 16 * MEM_M)
-#define GFW_SIZE         (16 * MEM_M)
-
-/*Deliver mode, defined for ioapic.c*/
-#define dest_Fixed IOSAPIC_FIXED
-#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
-
-#define NMI_VECTOR                     2
-#define ExtINT_VECTOR                  0
-#define NULL_VECTOR                    (-1)
-#define IA64_SPURIOUS_INT_VECTOR       0x0f
-
-#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
-
-/*
- *Delivery mode
- */
-#define SAPIC_DELIV_SHIFT      8
-#define SAPIC_FIXED            0x0
-#define SAPIC_LOWEST_PRIORITY  0x1
-#define SAPIC_PMI              0x2
-#define SAPIC_NMI              0x4
-#define SAPIC_INIT             0x5
-#define SAPIC_EXTINT           0x7
-
-/*
- * vcpu->requests bit members for arch
- */
-#define KVM_REQ_PTC_G          32
-#define KVM_REQ_RESUME         33
-
-struct kvm_mmio_req {
-       uint64_t addr;          /*  physical address            */
-       uint64_t size;          /*  size in bytes               */
-       uint64_t data;          /*  data (or paddr of data)     */
-       uint8_t state:4;
-       uint8_t dir:1;          /*  1=read, 0=write             */
-};
-
-/*Pal data struct */
-struct kvm_pal_call{
-       /*In area*/
-       uint64_t gr28;
-       uint64_t gr29;
-       uint64_t gr30;
-       uint64_t gr31;
-       /*Out area*/
-       struct ia64_pal_retval ret;
-};
-
-/* Sal data structure */
-struct kvm_sal_call{
-       /*In area*/
-       uint64_t in0;
-       uint64_t in1;
-       uint64_t in2;
-       uint64_t in3;
-       uint64_t in4;
-       uint64_t in5;
-       uint64_t in6;
-       uint64_t in7;
-       struct sal_ret_values ret;
-};
-
-/*Guest change rr6*/
-struct kvm_switch_rr6 {
-       uint64_t old_rr;
-       uint64_t new_rr;
-};
-
-union ia64_ipi_a{
-       unsigned long val;
-       struct {
-               unsigned long rv  : 3;
-               unsigned long ir  : 1;
-               unsigned long eid : 8;
-               unsigned long id  : 8;
-               unsigned long ib_base : 44;
-       };
-};
-
-union ia64_ipi_d {
-       unsigned long val;
-       struct {
-               unsigned long vector : 8;
-               unsigned long dm  : 3;
-               unsigned long ig  : 53;
-       };
-};
-
-/*ipi check exit data*/
-struct kvm_ipi_data{
-       union ia64_ipi_a addr;
-       union ia64_ipi_d data;
-};
-
-/*global purge data*/
-struct kvm_ptc_g {
-       unsigned long vaddr;
-       unsigned long rr;
-       unsigned long ps;
-       struct kvm_vcpu *vcpu;
-};
-
-/*Exit control data */
-struct exit_ctl_data{
-       uint32_t exit_reason;
-       uint32_t vm_status;
-       union {
-               struct kvm_mmio_req     ioreq;
-               struct kvm_pal_call     pal_data;
-               struct kvm_sal_call     sal_data;
-               struct kvm_switch_rr6   rr_data;
-               struct kvm_ipi_data     ipi_data;
-               struct kvm_ptc_g        ptc_g_data;
-       } u;
-};
-
-union pte_flags {
-       unsigned long val;
-       struct {
-               unsigned long p    :  1; /*0      */
-               unsigned long      :  1; /* 1     */
-               unsigned long ma   :  3; /* 2-4   */
-               unsigned long a    :  1; /* 5     */
-               unsigned long d    :  1; /* 6     */
-               unsigned long pl   :  2; /* 7-8   */
-               unsigned long ar   :  3; /* 9-11  */
-               unsigned long ppn  : 38; /* 12-49 */
-               unsigned long      :  2; /* 50-51 */
-               unsigned long ed   :  1; /* 52    */
-       };
-};
-
-union ia64_pta {
-       unsigned long val;
-       struct {
-               unsigned long ve : 1;
-               unsigned long reserved0 : 1;
-               unsigned long size : 6;
-               unsigned long vf : 1;
-               unsigned long reserved1 : 6;
-               unsigned long base : 49;
-       };
-};
-
-struct thash_cb {
-       /* THASH base information */
-       struct thash_data       *hash; /* hash table pointer */
-       union ia64_pta          pta;
-       int           num;
-};
-
-struct kvm_vcpu_stat {
-       u32 halt_wakeup;
-};
-
-struct kvm_vcpu_arch {
-       int launched;
-       int last_exit;
-       int last_run_cpu;
-       int vmm_tr_slot;
-       int vm_tr_slot;
-       int sn_rtc_tr_slot;
-
-#define KVM_MP_STATE_RUNNABLE          0
-#define KVM_MP_STATE_UNINITIALIZED     1
-#define KVM_MP_STATE_INIT_RECEIVED     2
-#define KVM_MP_STATE_HALTED            3
-       int mp_state;
-
-#define MAX_PTC_G_NUM                  3
-       int ptc_g_count;
-       struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
-
-       /*halt timer to wake up sleepy vcpus*/
-       struct hrtimer hlt_timer;
-       long ht_active;
-
-       struct kvm_lapic *apic;    /* kernel irqchip context */
-       struct vpd *vpd;
-
-       /* Exit data for vmm_transition*/
-       struct exit_ctl_data exit_data;
-
-       cpumask_t cache_coherent_map;
-
-       unsigned long vmm_rr;
-       unsigned long host_rr6;
-       unsigned long psbits[8];
-       unsigned long cr_iipa;
-       unsigned long cr_isr;
-       unsigned long vsa_base;
-       unsigned long dirty_log_lock_pa;
-       unsigned long __gp;
-       /* TR and TC.  */
-       struct thash_data itrs[NITRS];
-       struct thash_data dtrs[NDTRS];
-       /* Bit is set if there is a tr/tc for the region.  */
-       unsigned char itr_regions;
-       unsigned char dtr_regions;
-       unsigned char tc_regions;
-       /* purge all */
-       unsigned long ptce_base;
-       unsigned long ptce_count[2];
-       unsigned long ptce_stride[2];
-       /* itc/itm */
-       unsigned long last_itc;
-       long itc_offset;
-       unsigned long itc_check;
-       unsigned long timer_check;
-       unsigned int timer_pending;
-       unsigned int timer_fired;
-
-       unsigned long vrr[8];
-       unsigned long ibr[8];
-       unsigned long dbr[8];
-       unsigned long insvc[4];         /* Interrupt in service.  */
-       unsigned long xtp;
-
-       unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-       unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */
-       unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-       unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-       unsigned long fp_psr;       /*used for lazy float register */
-       unsigned long saved_gp;
-       /*for phycial  emulation */
-       int mode_flags;
-       struct thash_cb vtlb;
-       struct thash_cb vhpt;
-       char irq_check;
-       char irq_new_pending;
-
-       unsigned long opcode;
-       unsigned long cause;
-       char log_buf[VMM_LOG_LEN];
-       union context host;
-       union context guest;
-
-       char mmio_data[8];
-};
-
-struct kvm_vm_stat {
-       u64 remote_tlb_flush;
-};
-
-struct kvm_sal_data {
-       unsigned long boot_ip;
-       unsigned long boot_gp;
-};
-
-struct kvm_arch_memory_slot {
-};
-
-struct kvm_arch {
-       spinlock_t dirty_log_lock;
-
-       unsigned long   vm_base;
-       unsigned long   metaphysical_rr0;
-       unsigned long   metaphysical_rr4;
-       unsigned long   vmm_init_rr;
-
-       int             is_sn2;
-
-       struct kvm_ioapic *vioapic;
-       struct kvm_vm_stat stat;
-       struct kvm_sal_data rdv_sal_data;
-
-       struct list_head assigned_dev_head;
-       struct iommu_domain *iommu_domain;
-       bool iommu_noncoherent;
-
-       unsigned long irq_sources_bitmap;
-       unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
-};
-
-union cpuid3_t {
-       u64 value;
-       struct {
-               u64 number : 8;
-               u64 revision : 8;
-               u64 model : 8;
-               u64 family : 8;
-               u64 archrev : 8;
-               u64 rv : 24;
-       };
-};
-
-struct kvm_pt_regs {
-       /* The following registers are saved by SAVE_MIN: */
-       unsigned long b6;  /* scratch */
-       unsigned long b7;  /* scratch */
-
-       unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
-       unsigned long ar_ssd; /* reserved for future use (scratch) */
-
-       unsigned long r8;  /* scratch (return value register 0) */
-       unsigned long r9;  /* scratch (return value register 1) */
-       unsigned long r10; /* scratch (return value register 2) */
-       unsigned long r11; /* scratch (return value register 3) */
-
-       unsigned long cr_ipsr; /* interrupted task's psr */
-       unsigned long cr_iip;  /* interrupted task's instruction pointer */
-       unsigned long cr_ifs;  /* interrupted task's function state */
-
-       unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
-       unsigned long ar_pfs;  /* prev function state  */
-       unsigned long ar_rsc;  /* RSE configuration */
-       /* The following two are valid only if cr_ipsr.cpl > 0: */
-       unsigned long ar_rnat;  /* RSE NaT */
-       unsigned long ar_bspstore; /* RSE bspstore */
-
-       unsigned long pr;  /* 64 predicate registers (1 bit each) */
-       unsigned long b0;  /* return pointer (bp) */
-       unsigned long loadrs;  /* size of dirty partition << 16 */
-
-       unsigned long r1;  /* the gp pointer */
-       unsigned long r12; /* interrupted task's memory stack pointer */
-       unsigned long r13; /* thread pointer */
-
-       unsigned long ar_fpsr;  /* floating point status (preserved) */
-       unsigned long r15;  /* scratch */
-
-       /* The remaining registers are NOT saved for system calls.  */
-       unsigned long r14;  /* scratch */
-       unsigned long r2;  /* scratch */
-       unsigned long r3;  /* scratch */
-       unsigned long r16;  /* scratch */
-       unsigned long r17;  /* scratch */
-       unsigned long r18;  /* scratch */
-       unsigned long r19;  /* scratch */
-       unsigned long r20;  /* scratch */
-       unsigned long r21;  /* scratch */
-       unsigned long r22;  /* scratch */
-       unsigned long r23;  /* scratch */
-       unsigned long r24;  /* scratch */
-       unsigned long r25;  /* scratch */
-       unsigned long r26;  /* scratch */
-       unsigned long r27;  /* scratch */
-       unsigned long r28;  /* scratch */
-       unsigned long r29;  /* scratch */
-       unsigned long r30;  /* scratch */
-       unsigned long r31;  /* scratch */
-       unsigned long ar_ccv;  /* compare/exchange value (scratch) */
-
-       /*
-        * Floating point registers that the kernel considers scratch:
-        */
-       struct ia64_fpreg f6;  /* scratch */
-       struct ia64_fpreg f7;  /* scratch */
-       struct ia64_fpreg f8;  /* scratch */
-       struct ia64_fpreg f9;  /* scratch */
-       struct ia64_fpreg f10;  /* scratch */
-       struct ia64_fpreg f11;  /* scratch */
-
-       unsigned long r4;  /* preserved */
-       unsigned long r5;  /* preserved */
-       unsigned long r6;  /* preserved */
-       unsigned long r7;  /* preserved */
-       unsigned long eml_unat;    /* used for emulating instruction */
-       unsigned long pad0;     /* alignment pad */
-};
-
-static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
-{
-       return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
-}
-
-typedef int kvm_vmm_entry(void);
-typedef void kvm_tramp_entry(union context *host, union context *guest);
-
-struct kvm_vmm_info{
-       struct module   *module;
-       kvm_vmm_entry   *vmm_entry;
-       kvm_tramp_entry *tramp_entry;
-       unsigned long   vmm_ivt;
-       unsigned long   patch_mov_ar;
-       unsigned long   patch_mov_ar_sn2;
-};
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
-void kvm_sal_emul(struct kvm_vcpu *vcpu);
-
-#define __KVM_HAVE_ARCH_VM_ALLOC 1
-struct kvm *kvm_arch_alloc_vm(void);
-void kvm_arch_free_vm(struct kvm *kvm);
-
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_free_memslot(struct kvm *kvm,
-               struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
-static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
-               struct kvm_userspace_memory_region *mem,
-               const struct kvm_memory_slot *old,
-               enum kvm_mr_change change) {}
-static inline void kvm_arch_hardware_unsetup(void) {}
-
-#endif /* __ASSEMBLY__*/
-
-#endif
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h

deleted file mode 100644 (file)

index 42b233b..0000000
--- a/arch/ia64/include/asm/pvclock-abi.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * same structure to x86's
- * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
- * For now, define same duplicated definitions.
- */
-
-#ifndef _ASM_IA64__PVCLOCK_ABI_H
-#define _ASM_IA64__PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
-
-/*
- * These structs MUST NOT be changed.
- * They are the ABI between hypervisor and guest OS.
- * KVM is using this.
- *
- * pvclock_vcpu_time_info holds the system time and the tsc timestamp
- * of the last update. So the guest can use the tsc delta to get a
- * more precise system time.  There is one per virtual cpu.
- *
- * pvclock_wall_clock references the point in time when the system
- * time was zero (usually boot time), thus the guest calculates the
- * current wall clock by adding the system time.
- *
- * Protocol for the "version" fields is: hypervisor raises it (making
- * it uneven) before it starts updating the fields and raises it again
- * (making it even) when it is done.  Thus the guest can make sure the
- * time values it got are consistent by checking the version before
- * and after reading them.
- */
-
-struct pvclock_vcpu_time_info {
-       u32   version;
-       u32   pad0;
-       u64   tsc_timestamp;
-       u64   system_time;
-       u32   tsc_to_system_mul;
-       s8    tsc_shift;
-       u8    pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
-       u32   version;
-       u32   sec;
-       u32   nsec;
-} __attribute__((__packed__));
-
-#endif /* __ASSEMBLY__ */
-#endif /* _ASM_IA64__PVCLOCK_ABI_H */
diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h

deleted file mode 100644 (file)

index 99503c2..0000000
--- a/arch/ia64/include/uapi/asm/kvm.h
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef __ASM_IA64_KVM_H
-#define __ASM_IA64_KVM_H
-
-/*
- * kvm structure definitions  for ia64
- *
- * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-/* Select x86 specific features in <linux/kvm.h> */
-#define __KVM_HAVE_IOAPIC
-#define __KVM_HAVE_IRQ_LINE
-
-/* Architectural interrupt line count. */
-#define KVM_NR_INTERRUPTS 256
-
-#define KVM_IOAPIC_NUM_PINS  48
-
-struct kvm_ioapic_state {
-       __u64 base_address;
-       __u32 ioregsel;
-       __u32 id;
-       __u32 irr;
-       __u32 pad;
-       union {
-               __u64 bits;
-               struct {
-                       __u8 vector;
-                       __u8 delivery_mode:3;
-                       __u8 dest_mode:1;
-                       __u8 delivery_status:1;
-                       __u8 polarity:1;
-                       __u8 remote_irr:1;
-                       __u8 trig_mode:1;
-                       __u8 mask:1;
-                       __u8 reserve:7;
-                       __u8 reserved[4];
-                       __u8 dest_id;
-               } fields;
-       } redirtbl[KVM_IOAPIC_NUM_PINS];
-};
-
-#define KVM_IRQCHIP_PIC_MASTER   0
-#define KVM_IRQCHIP_PIC_SLAVE    1
-#define KVM_IRQCHIP_IOAPIC       2
-#define KVM_NR_IRQCHIPS          3
-
-#define KVM_CONTEXT_SIZE       8*1024
-
-struct kvm_fpreg {
-       union {
-               unsigned long bits[2];
-               long double __dummy;    /* force 16-byte alignment */
-       } u;
-};
-
-union context {
-       /* 8K size */
-       char    dummy[KVM_CONTEXT_SIZE];
-       struct {
-               unsigned long       psr;
-               unsigned long       pr;
-               unsigned long       caller_unat;
-               unsigned long       pad;
-               unsigned long       gr[32];
-               unsigned long       ar[128];
-               unsigned long       br[8];
-               unsigned long       cr[128];
-               unsigned long       rr[8];
-               unsigned long       ibr[8];
-               unsigned long       dbr[8];
-               unsigned long       pkr[8];
-               struct kvm_fpreg   fr[128];
-       };
-};
-
-struct thash_data {
-       union {
-               struct {
-                       unsigned long p    :  1; /* 0 */
-                       unsigned long rv1  :  1; /* 1 */
-                       unsigned long ma   :  3; /* 2-4 */
-                       unsigned long a    :  1; /* 5 */
-                       unsigned long d    :  1; /* 6 */
-                       unsigned long pl   :  2; /* 7-8 */
-                       unsigned long ar   :  3; /* 9-11 */
-                       unsigned long ppn  : 38; /* 12-49 */
-                       unsigned long rv2  :  2; /* 50-51 */
-                       unsigned long ed   :  1; /* 52 */
-                       unsigned long ig1  : 11; /* 53-63 */
-               };
-               struct {
-                       unsigned long __rv1 : 53;     /* 0-52 */
-                       unsigned long contiguous : 1; /*53 */
-                       unsigned long tc : 1;         /* 54 TR or TC */
-                       unsigned long cl : 1;
-                       /* 55 I side or D side cache line */
-                       unsigned long len  :  4;      /* 56-59 */
-                       unsigned long io  : 1;  /* 60 entry is for io or not */
-                       unsigned long nomap : 1;
-                       /* 61 entry cann't be inserted into machine TLB.*/
-                       unsigned long checked : 1;
-                       /* 62 for VTLB/VHPT sanity check */
-                       unsigned long invalid : 1;
-                       /* 63 invalid entry */
-               };
-               unsigned long page_flags;
-       };                  /* same for VHPT and TLB */
-
-       union {
-               struct {
-                       unsigned long rv3  :  2;
-                       unsigned long ps   :  6;
-                       unsigned long key  : 24;
-                       unsigned long rv4  : 32;
-               };
-               unsigned long itir;
-       };
-       union {
-               struct {
-                       unsigned long ig2  :  12;
-                       unsigned long vpn  :  49;
-                       unsigned long vrn  :   3;
-               };
-               unsigned long ifa;
-               unsigned long vadr;
-               struct {
-                       unsigned long tag  :  63;
-                       unsigned long ti   :  1;
-               };
-               unsigned long etag;
-       };
-       union {
-               struct thash_data *next;
-               unsigned long rid;
-               unsigned long gpaddr;
-       };
-};
-
-#define        NITRS   8
-#define NDTRS  8
-
-struct saved_vpd {
-       unsigned long  vhpi;
-       unsigned long  vgr[16];
-       unsigned long  vbgr[16];
-       unsigned long  vnat;
-       unsigned long  vbnat;
-       unsigned long  vcpuid[5];
-       unsigned long  vpsr;
-       unsigned long  vpr;
-       union {
-               unsigned long  vcr[128];
-               struct {
-                       unsigned long dcr;
-                       unsigned long itm;
-                       unsigned long iva;
-                       unsigned long rsv1[5];
-                       unsigned long pta;
-                       unsigned long rsv2[7];
-                       unsigned long ipsr;
-                       unsigned long isr;
-                       unsigned long rsv3;
-                       unsigned long iip;
-                       unsigned long ifa;
-                       unsigned long itir;
-                       unsigned long iipa;
-                       unsigned long ifs;
-                       unsigned long iim;
-                       unsigned long iha;
-                       unsigned long rsv4[38];
-                       unsigned long lid;
-                       unsigned long ivr;
-                       unsigned long tpr;
-                       unsigned long eoi;
-                       unsigned long irr[4];
-                       unsigned long itv;
-                       unsigned long pmv;
-                       unsigned long cmcv;
-                       unsigned long rsv5[5];
-                       unsigned long lrr0;
-                       unsigned long lrr1;
-                       unsigned long rsv6[46];
-               };
-       };
-};
-
-struct kvm_regs {
-       struct saved_vpd vpd;
-       /*Arch-regs*/
-       int mp_state;
-       unsigned long vmm_rr;
-       /* TR and TC.  */
-       struct thash_data itrs[NITRS];
-       struct thash_data dtrs[NDTRS];
-       /* Bit is set if there is a tr/tc for the region.  */
-       unsigned char itr_regions;
-       unsigned char dtr_regions;
-       unsigned char tc_regions;
-
-       char irq_check;
-       unsigned long saved_itc;
-       unsigned long itc_check;
-       unsigned long timer_check;
-       unsigned long timer_pending;
-       unsigned long last_itc;
-
-       unsigned long vrr[8];
-       unsigned long ibr[8];
-       unsigned long dbr[8];
-       unsigned long insvc[4];         /* Interrupt in service.  */
-       unsigned long xtp;
-
-       unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-       unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */
-       unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-       unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-       unsigned long fp_psr;       /*used for lazy float register */
-       unsigned long saved_gp;
-       /*for phycial  emulation */
-
-       union context saved_guest;
-
-       unsigned long reserved[64];     /* for future use */
-};
-
-struct kvm_sregs {
-};
-
-struct kvm_fpu {
-};
-
-#define KVM_IA64_VCPU_STACK_SHIFT      16
-#define KVM_IA64_VCPU_STACK_SIZE       (1UL << KVM_IA64_VCPU_STACK_SHIFT)
-
-struct kvm_ia64_vcpu_stack {
-       unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
-};
-
-struct kvm_debug_exit_arch {
-};
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
-};
-
-/* definition of registers in kvm_run */
-struct kvm_sync_regs {
-};
-
-#endif
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig

deleted file mode 100644 (file)

index 3d50ea9..0000000
--- a/arch/ia64/kvm/Kconfig
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# KVM configuration
-#
-
-source "virt/kvm/Kconfig"
-
-menuconfig VIRTUALIZATION
-       bool "Virtualization"
-       depends on HAVE_KVM || IA64
-       default y
-       ---help---
-         Say Y here to get to see options for using your Linux host to run other
-         operating systems inside virtual machines (guests).
-         This option alone does not add any kernel code.
-
-         If you say N, all options in this submenu will be skipped and disabled.
-
-if VIRTUALIZATION
-
-config KVM
-       tristate "Kernel-based Virtual Machine (KVM) support"
-       depends on BROKEN
-       depends on HAVE_KVM && MODULES
-       depends on BROKEN
-       select PREEMPT_NOTIFIERS
-       select ANON_INODES
-       select HAVE_KVM_IRQCHIP
-       select HAVE_KVM_IRQFD
-       select HAVE_KVM_IRQ_ROUTING
-       select KVM_APIC_ARCHITECTURE
-       select KVM_MMIO
-       ---help---
-         Support hosting fully virtualized guest machines using hardware
-         virtualization extensions.  You will need a fairly recent
-         processor equipped with virtualization extensions. You will also
-         need to select one or more of the processor modules below.
-
-         This module provides access to the hardware capabilities through
-         a character device node named /dev/kvm.
-
-         To compile this as a module, choose M here: the module
-         will be called kvm.
-
-         If unsure, say N.
-
-config KVM_INTEL
-       tristate "KVM for Intel Itanium 2 processors support"
-       depends on KVM && m
-       ---help---
-         Provides support for KVM on Itanium 2 processors equipped with the VT
-         extensions.
-
-config KVM_DEVICE_ASSIGNMENT
-       bool "KVM legacy PCI device assignment support"
-       depends on KVM && PCI && IOMMU_API
-       default y
-       ---help---
-         Provide support for legacy PCI device assignment through KVM.  The
-         kernel now also supports a full featured userspace device driver
-         framework through VFIO, which supersedes much of this support.
-
-         If unsure, say Y.
-
-source drivers/vhost/Kconfig
-
-endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile

deleted file mode 100644 (file)

index 18e45ec..0000000
--- a/arch/ia64/kvm/Makefile
+++ /dev/null
@@ -1,67 +0,0 @@
-#This Make file is to generate asm-offsets.h and build source.
-#
-
-#Generate asm-offsets.h for vmm module build
-offsets-file := asm-offsets.h
-
-always  := $(offsets-file)
-targets := $(offsets-file)
-targets += arch/ia64/kvm/asm-offsets.s
-
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
-       "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
-endef
-
-quiet_cmd_offsets = GEN     $@
-define cmd_offsets
-       (set -e; \
-        echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
-        echo "#define __ASM_KVM_OFFSETS_H__"; \
-        echo "/*"; \
-        echo " * DO NOT MODIFY."; \
-        echo " *"; \
-        echo " * This file was generated by Makefile"; \
-        echo " *"; \
-        echo " */"; \
-        echo ""; \
-        sed -ne $(sed-y) $<; \
-        echo ""; \
-        echo "#endif" ) > $@
-endef
-
-# We use internal rules to avoid the "is up to date" message from make
-arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \
-                       $(wildcard $(srctree)/arch/ia64/include/asm/*.h)\
-                       $(wildcard $(srctree)/include/linux/*.h)
-       $(call if_changed_dep,cc_s_c)
-
-$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
-       $(call cmd,offsets)
-
-FORCE : $(obj)/$(offsets-file)
-
-#
-# Makefile for Kernel-based Virtual Machine module
-#
-
-ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-KVM := ../../../virt/kvm
-
-common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-               $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
-
-ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
-endif
-
-kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
-obj-$(CONFIG_KVM) += kvm.o
-
-CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
-kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
-       vtlb.o process.o kvm_lib.o
-#Add link memcpy and memset to avoid possible structure assignment error
-kvm-intel-objs += memcpy.o memset.o
-obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c

deleted file mode 100644 (file)

index 9324c87..0000000
--- a/arch/ia64/kvm/asm-offsets.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * asm-offsets.c Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- *
- * Anthony Xu    <anthony.xu@intel.com>
- * Xiantao Zhang <xiantao.zhang@intel.com>
- * Copyright (c) 2007 Intel Corporation  KVM support.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kbuild.h>
-
-#include "vcpu.h"
-
-void foo(void)
-{
-       DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
-       DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
-
-       BLANK();
-
-       DEFINE(VMM_VCPU_META_RR0_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
-       DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-                       offsetof(struct kvm_vcpu,
-                               arch.metaphysical_saved_rr0));
-       DEFINE(VMM_VCPU_VRR0_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.vrr[0]));
-       DEFINE(VMM_VPD_IRR0_OFFSET,
-                       offsetof(struct vpd, irr[0]));
-       DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.itc_check));
-       DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.irq_check));
-       DEFINE(VMM_VPD_VHPI_OFFSET,
-                       offsetof(struct vpd, vhpi));
-       DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.vsa_base));
-       DEFINE(VMM_VCPU_VPD_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.vpd));
-       DEFINE(VMM_VCPU_IRQ_CHECK,
-                       offsetof(struct kvm_vcpu, arch.irq_check));
-       DEFINE(VMM_VCPU_TIMER_PENDING,
-                       offsetof(struct kvm_vcpu, arch.timer_pending));
-       DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
-       DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.mode_flags));
-       DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.itc_offset));
-       DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.last_itc));
-       DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.saved_gp));
-
-       BLANK();
-
-       DEFINE(VMM_PT_REGS_B6_OFFSET,
-                               offsetof(struct kvm_pt_regs, b6));
-       DEFINE(VMM_PT_REGS_B7_OFFSET,
-                               offsetof(struct kvm_pt_regs, b7));
-       DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_csd));
-       DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_ssd));
-       DEFINE(VMM_PT_REGS_R8_OFFSET,
-                               offsetof(struct kvm_pt_regs, r8));
-       DEFINE(VMM_PT_REGS_R9_OFFSET,
-                               offsetof(struct kvm_pt_regs, r9));
-       DEFINE(VMM_PT_REGS_R10_OFFSET,
-                               offsetof(struct kvm_pt_regs, r10));
-       DEFINE(VMM_PT_REGS_R11_OFFSET,
-                               offsetof(struct kvm_pt_regs, r11));
-       DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
-                               offsetof(struct kvm_pt_regs, cr_ipsr));
-       DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
-                               offsetof(struct kvm_pt_regs, cr_iip));
-       DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
-                               offsetof(struct kvm_pt_regs, cr_ifs));
-       DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_unat));
-       DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_pfs));
-       DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_rsc));
-       DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_rnat));
-
-       DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_bspstore));
-       DEFINE(VMM_PT_REGS_PR_OFFSET,
-                               offsetof(struct kvm_pt_regs, pr));
-       DEFINE(VMM_PT_REGS_B0_OFFSET,
-                               offsetof(struct kvm_pt_regs, b0));
-       DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
-                               offsetof(struct kvm_pt_regs, loadrs));
-       DEFINE(VMM_PT_REGS_R1_OFFSET,
-                               offsetof(struct kvm_pt_regs, r1));
-       DEFINE(VMM_PT_REGS_R12_OFFSET,
-                               offsetof(struct kvm_pt_regs, r12));
-       DEFINE(VMM_PT_REGS_R13_OFFSET,
-                               offsetof(struct kvm_pt_regs, r13));
-       DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_fpsr));
-       DEFINE(VMM_PT_REGS_R15_OFFSET,
-                               offsetof(struct kvm_pt_regs, r15));
-       DEFINE(VMM_PT_REGS_R14_OFFSET,
-                               offsetof(struct kvm_pt_regs, r14));
-       DEFINE(VMM_PT_REGS_R2_OFFSET,
-                               offsetof(struct kvm_pt_regs, r2));
-       DEFINE(VMM_PT_REGS_R3_OFFSET,
-                               offsetof(struct kvm_pt_regs, r3));
-       DEFINE(VMM_PT_REGS_R16_OFFSET,
-                               offsetof(struct kvm_pt_regs, r16));
-       DEFINE(VMM_PT_REGS_R17_OFFSET,
-                               offsetof(struct kvm_pt_regs, r17));
-       DEFINE(VMM_PT_REGS_R18_OFFSET,
-                               offsetof(struct kvm_pt_regs, r18));
-       DEFINE(VMM_PT_REGS_R19_OFFSET,
-                               offsetof(struct kvm_pt_regs, r19));
-       DEFINE(VMM_PT_REGS_R20_OFFSET,
-                               offsetof(struct kvm_pt_regs, r20));
-       DEFINE(VMM_PT_REGS_R21_OFFSET,
-                               offsetof(struct kvm_pt_regs, r21));
-       DEFINE(VMM_PT_REGS_R22_OFFSET,
-                               offsetof(struct kvm_pt_regs, r22));
-       DEFINE(VMM_PT_REGS_R23_OFFSET,
-                               offsetof(struct kvm_pt_regs, r23));
-       DEFINE(VMM_PT_REGS_R24_OFFSET,
-                               offsetof(struct kvm_pt_regs, r24));
-       DEFINE(VMM_PT_REGS_R25_OFFSET,
-                               offsetof(struct kvm_pt_regs, r25));
-       DEFINE(VMM_PT_REGS_R26_OFFSET,
-                               offsetof(struct kvm_pt_regs, r26));
-       DEFINE(VMM_PT_REGS_R27_OFFSET,
-                               offsetof(struct kvm_pt_regs, r27));
-       DEFINE(VMM_PT_REGS_R28_OFFSET,
-                               offsetof(struct kvm_pt_regs, r28));
-       DEFINE(VMM_PT_REGS_R29_OFFSET,
-                               offsetof(struct kvm_pt_regs, r29));
-       DEFINE(VMM_PT_REGS_R30_OFFSET,
-                               offsetof(struct kvm_pt_regs, r30));
-       DEFINE(VMM_PT_REGS_R31_OFFSET,
-                               offsetof(struct kvm_pt_regs, r31));
-       DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_ccv));
-       DEFINE(VMM_PT_REGS_F6_OFFSET,
-                               offsetof(struct kvm_pt_regs, f6));
-       DEFINE(VMM_PT_REGS_F7_OFFSET,
-                               offsetof(struct kvm_pt_regs, f7));
-       DEFINE(VMM_PT_REGS_F8_OFFSET,
-                               offsetof(struct kvm_pt_regs, f8));
-       DEFINE(VMM_PT_REGS_F9_OFFSET,
-                               offsetof(struct kvm_pt_regs, f9));
-       DEFINE(VMM_PT_REGS_F10_OFFSET,
-                               offsetof(struct kvm_pt_regs, f10));
-       DEFINE(VMM_PT_REGS_F11_OFFSET,
-                               offsetof(struct kvm_pt_regs, f11));
-       DEFINE(VMM_PT_REGS_R4_OFFSET,
-                               offsetof(struct kvm_pt_regs, r4));
-       DEFINE(VMM_PT_REGS_R5_OFFSET,
-                               offsetof(struct kvm_pt_regs, r5));
-       DEFINE(VMM_PT_REGS_R6_OFFSET,
-                               offsetof(struct kvm_pt_regs, r6));
-       DEFINE(VMM_PT_REGS_R7_OFFSET,
-                               offsetof(struct kvm_pt_regs, r7));
-       DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
-                               offsetof(struct kvm_pt_regs, eml_unat));
-       DEFINE(VMM_VCPU_IIPA_OFFSET,
-                               offsetof(struct kvm_vcpu, arch.cr_iipa));
-       DEFINE(VMM_VCPU_OPCODE_OFFSET,
-                               offsetof(struct kvm_vcpu, arch.opcode));
-       DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
-       DEFINE(VMM_VCPU_ISR_OFFSET,
-                               offsetof(struct kvm_vcpu, arch.cr_isr));
-       DEFINE(VMM_PT_REGS_R16_SLOT,
-                               (((offsetof(struct kvm_pt_regs, r16)
-                               - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
-       DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-                               offsetof(struct kvm_vcpu, arch.mode_flags));
-       DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
-       BLANK();
-
-       DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
-       DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
-       DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.insvc[0]));
-       DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
-       DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
-
-       DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
-       DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
-       DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
-       DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
-       DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
-       DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
-       DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
-       DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
-       DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
-       DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
-       DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
-       DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
-       DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
-       DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
-       DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
-       DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
-       DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
-       DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
-       DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
-       DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
-       DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
-       DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
-       DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
-       DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
-       DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
-       DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
-       DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
-       DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
-       DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
-       DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
-       DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
-       DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
-       DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
-       DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
-       BLANK();
-}
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h

deleted file mode 100644 (file)

index c0785a7..0000000
--- a/arch/ia64/kvm/irq.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * irq.h: In-kernel interrupt controller related definitions
- * Copyright (c) 2008, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Authors:
- *   Xiantao Zhang <xiantao.zhang@intel.com>
- *
- */
-
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include "lapic.h"
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-       return 1;
-}
-
-#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c

deleted file mode 100644 (file)

index dbe46f4..0000000
--- a/arch/ia64/kvm/kvm-ia64.c
+++ /dev/null
@@ -1,1942 +0,0 @@
-/*
- * kvm_ia64.c: Basic KVM support On Itanium series processors
- *
- *
- *     Copyright (C) 2007, Intel Corporation.
- *     Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/percpu.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/bitops.h>
-#include <linux/hrtimer.h>
-#include <linux/uaccess.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-#include <linux/pci.h>
-
-#include <asm/pgtable.h>
-#include <asm/gcc_intrin.h>
-#include <asm/pal.h>
-#include <asm/cacheflush.h>
-#include <asm/div64.h>
-#include <asm/tlb.h>
-#include <asm/elf.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "misc.h"
-#include "vti.h"
-#include "iodev.h"
-#include "ioapic.h"
-#include "lapic.h"
-#include "irq.h"
-
-static unsigned long kvm_vmm_base;
-static unsigned long kvm_vsa_base;
-static unsigned long kvm_vm_buffer;
-static unsigned long kvm_vm_buffer_size;
-unsigned long kvm_vmm_gp;
-
-static long vp_env_info;
-
-static struct kvm_vmm_info *kvm_vmm_info;
-
-static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-       { NULL }
-};
-
-static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-       if (vcpu->kvm->arch.is_sn2)
-               return rtc_time();
-       else
-#endif
-               return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-static void kvm_flush_icache(unsigned long start, unsigned long len)
-{
-       int l;
-
-       for (l = 0; l < (len + 32); l += 32)
-               ia64_fc((void *)(start + l));
-
-       ia64_sync_i();
-       ia64_srlz_i();
-}
-
-static void kvm_flush_tlb_all(void)
-{
-       unsigned long i, j, count0, count1, stride0, stride1, addr;
-       long flags;
-
-       addr    = local_cpu_data->ptce_base;
-       count0  = local_cpu_data->ptce_count[0];
-       count1  = local_cpu_data->ptce_count[1];
-       stride0 = local_cpu_data->ptce_stride[0];
-       stride1 = local_cpu_data->ptce_stride[1];
-
-       local_irq_save(flags);
-       for (i = 0; i < count0; ++i) {
-               for (j = 0; j < count1; ++j) {
-                       ia64_ptce(addr);
-                       addr += stride1;
-               }
-               addr += stride0;
-       }
-       local_irq_restore(flags);
-       ia64_srlz_i();                  /* srlz.i implies srlz.d */
-}
-
-long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
-                       (u64)opt_handler);
-
-       return iprv.status;
-}
-
-static  DEFINE_SPINLOCK(vp_lock);
-
-int kvm_arch_hardware_enable(void)
-{
-       long  status;
-       long  tmp_base;
-       unsigned long pte;
-       unsigned long saved_psr;
-       int slot;
-
-       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-       local_irq_save(saved_psr);
-       slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-       local_irq_restore(saved_psr);
-       if (slot < 0)
-               return -EINVAL;
-
-       spin_lock(&vp_lock);
-       status = ia64_pal_vp_init_env(kvm_vsa_base ?
-                               VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
-                       __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
-       if (status != 0) {
-               spin_unlock(&vp_lock);
-               printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-               return -EINVAL;
-       }
-
-       if (!kvm_vsa_base) {
-               kvm_vsa_base = tmp_base;
-               printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
-       }
-       spin_unlock(&vp_lock);
-       ia64_ptr_entry(0x3, slot);
-
-       return 0;
-}
-
-void kvm_arch_hardware_disable(void)
-{
-
-       long status;
-       int slot;
-       unsigned long pte;
-       unsigned long saved_psr;
-       unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
-
-       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-                               PAGE_KERNEL));
-
-       local_irq_save(saved_psr);
-       slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-       local_irq_restore(saved_psr);
-       if (slot < 0)
-               return;
-
-       status = ia64_pal_vp_exit_env(host_iva);
-       if (status)
-               printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
-                               status);
-       ia64_ptr_entry(0x3, slot);
-}
-
-void kvm_arch_check_processor_compat(void *rtn)
-{
-       *(int *)rtn = 0;
-}
-
-int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
-{
-
-       int r;
-
-       switch (ext) {
-       case KVM_CAP_IRQCHIP:
-       case KVM_CAP_MP_STATE:
-       case KVM_CAP_IRQ_INJECT_STATUS:
-       case KVM_CAP_IOAPIC_POLARITY_IGNORED:
-               r = 1;
-               break;
-       case KVM_CAP_COALESCED_MMIO:
-               r = KVM_COALESCED_MMIO_PAGE_OFFSET;
-               break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-       case KVM_CAP_IOMMU:
-               r = iommu_present(&pci_bus_type);
-               break;
-#endif
-       default:
-               r = 0;
-       }
-       return r;
-
-}
-
-static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-       kvm_run->hw.hardware_exit_reason = 1;
-       return 0;
-}
-
-static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct kvm_mmio_req *p;
-       struct kvm_io_device *mmio_dev;
-       int r;
-
-       p = kvm_get_vcpu_ioreq(vcpu);
-
-       if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
-               goto mmio;
-       vcpu->mmio_needed = 1;
-       vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
-       vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
-       vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
-
-       if (vcpu->mmio_is_write)
-               memcpy(vcpu->arch.mmio_data, &p->data, p->size);
-       memcpy(kvm_run->mmio.data, &p->data, p->size);
-       kvm_run->exit_reason = KVM_EXIT_MMIO;
-       return 0;
-mmio:
-       if (p->dir)
-               r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-                                   p->size, &p->data);
-       else
-               r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-                                    p->size, &p->data);
-       if (r)
-               printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
-       p->state = STATE_IORESP_READY;
-
-       return 1;
-}
-
-static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-
-       if (p->exit_reason == EXIT_REASON_PAL_CALL)
-               return kvm_pal_emul(vcpu, kvm_run);
-       else {
-               kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-               kvm_run->hw.hardware_exit_reason = 2;
-               return 0;
-       }
-}
-
-static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-
-       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-               kvm_sal_emul(vcpu);
-               return 1;
-       } else {
-               kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-               kvm_run->hw.hardware_exit_reason = 3;
-               return 0;
-       }
-
-}
-
-static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
-{
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-       if (!test_and_set_bit(vector, &vpd->irr[0])) {
-               vcpu->arch.irq_new_pending = 1;
-               kvm_vcpu_kick(vcpu);
-               return 1;
-       }
-       return 0;
-}
-
-/*
- *  offset: address offset to IPI space.
- *  value:  deliver value.
- */
-static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
-                               uint64_t vector)
-{
-       switch (dm) {
-       case SAPIC_FIXED:
-               break;
-       case SAPIC_NMI:
-               vector = 2;
-               break;
-       case SAPIC_EXTINT:
-               vector = 0;
-               break;
-       case SAPIC_INIT:
-       case SAPIC_PMI:
-       default:
-               printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-               return;
-       }
-       __apic_accept_irq(vcpu, vector);
-}
-
-static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
-                       unsigned long eid)
-{
-       union ia64_lid lid;
-       int i;
-       struct kvm_vcpu *vcpu;
-
-       kvm_for_each_vcpu(i, vcpu, kvm) {
-               lid.val = VCPU_LID(vcpu);
-               if (lid.id == id && lid.eid == eid)
-                       return vcpu;
-       }
-
-       return NULL;
-}
-
-static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-       struct kvm_vcpu *target_vcpu;
-       struct kvm_pt_regs *regs;
-       union ia64_ipi_a addr = p->u.ipi_data.addr;
-       union ia64_ipi_d data = p->u.ipi_data.data;
-
-       target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
-       if (!target_vcpu)
-               return handle_vm_error(vcpu, kvm_run);
-
-       if (!target_vcpu->arch.launched) {
-               regs = vcpu_regs(target_vcpu);
-
-               regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
-               regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
-
-               target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-               if (waitqueue_active(&target_vcpu->wq))
-                       wake_up_interruptible(&target_vcpu->wq);
-       } else {
-               vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
-               if (target_vcpu != vcpu)
-                       kvm_vcpu_kick(target_vcpu);
-       }
-
-       return 1;
-}
-
-struct call_data {
-       struct kvm_ptc_g ptc_g_data;
-       struct kvm_vcpu *vcpu;
-};
-
-static void vcpu_global_purge(void *info)
-{
-       struct call_data *p = (struct call_data *)info;
-       struct kvm_vcpu *vcpu = p->vcpu;
-
-       if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
-               return;
-
-       set_bit(KVM_REQ_PTC_G, &vcpu->requests);
-       if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
-               vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
-                                                       p->ptc_g_data;
-       } else {
-               clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
-               vcpu->arch.ptc_g_count = 0;
-               set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
-       }
-}
-
-static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-       struct kvm *kvm = vcpu->kvm;
-       struct call_data call_data;
-       int i;
-       struct kvm_vcpu *vcpui;
-
-       call_data.ptc_g_data = p->u.ptc_g_data;
-
-       kvm_for_each_vcpu(i, vcpui, kvm) {
-               if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
-                               vcpu == vcpui)
-                       continue;
-
-               if (waitqueue_active(&vcpui->wq))
-                       wake_up_interruptible(&vcpui->wq);
-
-               if (vcpui->cpu != -1) {
-                       call_data.vcpu = vcpui;
-                       smp_call_function_single(vcpui->cpu,
-                                       vcpu_global_purge, &call_data, 1);
-               } else
-                       printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
-
-       }
-       return 1;
-}
-
-static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       return 1;
-}
-
-static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
-{
-       unsigned long pte, rtc_phys_addr, map_addr;
-       int slot;
-
-       map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
-       rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
-       pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
-       slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
-       vcpu->arch.sn_rtc_tr_slot = slot;
-       if (slot < 0) {
-               printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
-               slot = 0;
-       }
-       return slot;
-}
-
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
-{
-
-       ktime_t kt;
-       long itc_diff;
-       unsigned long vcpu_now_itc;
-       unsigned long expires;
-       struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-       unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-       if (irqchip_in_kernel(vcpu->kvm)) {
-
-               vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
-
-               if (time_after(vcpu_now_itc, vpd->itm)) {
-                       vcpu->arch.timer_check = 1;
-                       return 1;
-               }
-               itc_diff = vpd->itm - vcpu_now_itc;
-               if (itc_diff < 0)
-                       itc_diff = -itc_diff;
-
-               expires = div64_u64(itc_diff, cyc_per_usec);
-               kt = ktime_set(0, 1000 * expires);
-
-               vcpu->arch.ht_active = 1;
-               hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
-
-               vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-               kvm_vcpu_block(vcpu);
-               hrtimer_cancel(p_ht);
-               vcpu->arch.ht_active = 0;
-
-               if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
-                               kvm_cpu_has_pending_timer(vcpu))
-                       if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
-                               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-               if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-                       return -EINTR;
-               return 1;
-       } else {
-               printk(KERN_ERR"kvm: Unsupported userspace halt!");
-               return 0;
-       }
-}
-
-static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
-               struct kvm_run *kvm_run)
-{
-       kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
-       return 0;
-}
-
-static int handle_external_interrupt(struct kvm_vcpu *vcpu,
-               struct kvm_run *kvm_run)
-{
-       return 1;
-}
-
-static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
-                               struct kvm_run *kvm_run)
-{
-       printk("VMM: %s", vcpu->arch.log_buf);
-       return 1;
-}
-
-static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
-               struct kvm_run *kvm_run) = {
-       [EXIT_REASON_VM_PANIC]              = handle_vm_error,
-       [EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
-       [EXIT_REASON_PAL_CALL]              = handle_pal_call,
-       [EXIT_REASON_SAL_CALL]              = handle_sal_call,
-       [EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
-       [EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
-       [EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
-       [EXIT_REASON_IPI]                   = handle_ipi,
-       [EXIT_REASON_PTC_G]                 = handle_global_purge,
-       [EXIT_REASON_DEBUG]                 = handle_vcpu_debug,
-
-};
-
-static const int kvm_vti_max_exit_handlers =
-               sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
-
-static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p_exit_data;
-
-       p_exit_data = kvm_get_exit_data(vcpu);
-       return p_exit_data->exit_reason;
-}
-
-/*
- * The guest has exited.  See if we can fix it or if we need userspace
- * assistance.
- */
-static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
-{
-       u32 exit_reason = kvm_get_exit_reason(vcpu);
-       vcpu->arch.last_exit = exit_reason;
-
-       if (exit_reason < kvm_vti_max_exit_handlers
-                       && kvm_vti_exit_handlers[exit_reason])
-               return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
-       else {
-               kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-               kvm_run->hw.hardware_exit_reason = exit_reason;
-       }
-       return 0;
-}
-
-static inline void vti_set_rr6(unsigned long rr6)
-{
-       ia64_set_rr(RR6, rr6);
-       ia64_srlz_i();
-}
-
-static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-       unsigned long pte;
-       struct kvm *kvm = vcpu->kvm;
-       int r;
-
-       /*Insert a pair of tr to map vmm*/
-       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-       r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-       if (r < 0)
-               goto out;
-       vcpu->arch.vmm_tr_slot = r;
-       /*Insert a pairt of tr to map data of vm*/
-       pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
-       r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
-                                       pte, KVM_VM_DATA_SHIFT);
-       if (r < 0)
-               goto out;
-       vcpu->arch.vm_tr_slot = r;
-
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-       if (kvm->arch.is_sn2) {
-               r = kvm_sn2_setup_mappings(vcpu);
-               if (r < 0)
-                       goto out;
-       }
-#endif
-
-       r = 0;
-out:
-       return r;
-}
-
-static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-       struct kvm *kvm = vcpu->kvm;
-       ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
-       ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-       if (kvm->arch.is_sn2)
-               ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
-#endif
-}
-
-static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
-{
-       unsigned long psr;
-       int r;
-       int cpu = smp_processor_id();
-
-       if (vcpu->arch.last_run_cpu != cpu ||
-                       per_cpu(last_vcpu, cpu) != vcpu) {
-               per_cpu(last_vcpu, cpu) = vcpu;
-               vcpu->arch.last_run_cpu = cpu;
-               kvm_flush_tlb_all();
-       }
-
-       vcpu->arch.host_rr6 = ia64_get_rr(RR6);
-       vti_set_rr6(vcpu->arch.vmm_rr);
-       local_irq_save(psr);
-       r = kvm_insert_vmm_mapping(vcpu);
-       local_irq_restore(psr);
-       return r;
-}
-
-static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
-{
-       kvm_purge_vmm_mapping(vcpu);
-       vti_set_rr6(vcpu->arch.host_rr6);
-}
-
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       union context *host_ctx, *guest_ctx;
-       int r, idx;
-
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-again:
-       if (signal_pending(current)) {
-               r = -EINTR;
-               kvm_run->exit_reason = KVM_EXIT_INTR;
-               goto out;
-       }
-
-       preempt_disable();
-       local_irq_disable();
-
-       /*Get host and guest context with guest address space.*/
-       host_ctx = kvm_get_host_context(vcpu);
-       guest_ctx = kvm_get_guest_context(vcpu);
-
-       clear_bit(KVM_REQ_KICK, &vcpu->requests);
-
-       r = kvm_vcpu_pre_transition(vcpu);
-       if (r < 0)
-               goto vcpu_run_fail;
-
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
-       vcpu->mode = IN_GUEST_MODE;
-       kvm_guest_enter();
-
-       /*
-        * Transition to the guest
-        */
-       kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
-
-       kvm_vcpu_post_transition(vcpu);
-
-       vcpu->arch.launched = 1;
-       set_bit(KVM_REQ_KICK, &vcpu->requests);
-       local_irq_enable();
-
-       /*
-        * We must have an instruction between local_irq_enable() and
-        * kvm_guest_exit(), so the timer interrupt isn't delayed by
-        * the interrupt shadow.  The stat.exits increment will do nicely.
-        * But we need to prevent reordering, hence this barrier():
-        */
-       barrier();
-       kvm_guest_exit();
-       vcpu->mode = OUTSIDE_GUEST_MODE;
-       preempt_enable();
-
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-       r = kvm_handle_exit(kvm_run, vcpu);
-
-       if (r > 0) {
-               if (!need_resched())
-                       goto again;
-       }
-
-out:
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
-       if (r > 0) {
-               cond_resched();
-               idx = srcu_read_lock(&vcpu->kvm->srcu);
-               goto again;
-       }
-
-       return r;
-
-vcpu_run_fail:
-       local_irq_enable();
-       preempt_enable();
-       kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-       goto out;
-}
-
-static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
-{
-       struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
-
-       if (!vcpu->mmio_is_write)
-               memcpy(&p->data, vcpu->arch.mmio_data, 8);
-       p->state = STATE_IORESP_READY;
-}
-
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       int r;
-       sigset_t sigsaved;
-
-       if (vcpu->sigset_active)
-               sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
-       if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
-               kvm_vcpu_block(vcpu);
-               clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
-               r = -EAGAIN;
-               goto out;
-       }
-
-       if (vcpu->mmio_needed) {
-               memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
-               kvm_set_mmio_data(vcpu);
-               vcpu->mmio_read_completed = 1;
-               vcpu->mmio_needed = 0;
-       }
-       r = __vcpu_run(vcpu, kvm_run);
-out:
-       if (vcpu->sigset_active)
-               sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-       return r;
-}
-
-struct kvm *kvm_arch_alloc_vm(void)
-{
-
-       struct kvm *kvm;
-       uint64_t  vm_base;
-
-       BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
-
-       vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
-
-       if (!vm_base)
-               return NULL;
-
-       memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-       kvm = (struct kvm *)(vm_base +
-                       offsetof(struct kvm_vm_data, kvm_vm_struct));
-       kvm->arch.vm_base = vm_base;
-       printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
-
-       return kvm;
-}
-
-struct kvm_ia64_io_range {
-       unsigned long start;
-       unsigned long size;
-       unsigned long type;
-};
-
-static const struct kvm_ia64_io_range io_ranges[] = {
-       {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
-       {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
-       {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
-       {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
-       {PIB_START, PIB_SIZE, GPFN_PIB},
-};
-
-static void kvm_build_io_pmt(struct kvm *kvm)
-{
-       unsigned long i, j;
-
-       /* Mark I/O ranges */
-       for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
-                                                       i++) {
-               for (j = io_ranges[i].start;
-                               j < io_ranges[i].start + io_ranges[i].size;
-                               j += PAGE_SIZE)
-                       kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
-                                       io_ranges[i].type, 0);
-       }
-
-}
-
-/*Use unused rids to virtualize guest rid.*/
-#define GUEST_PHYSICAL_RR0     0x1739
-#define GUEST_PHYSICAL_RR4     0x2739
-#define VMM_INIT_RR            0x1660
-
-int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
-{
-       BUG_ON(!kvm);
-
-       if (type)
-               return -EINVAL;
-
-       kvm->arch.is_sn2 = ia64_platform_is("sn2");
-
-       kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
-       kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
-       kvm->arch.vmm_init_rr = VMM_INIT_RR;
-
-       /*
-        *Fill P2M entries for MMIO/IO ranges
-        */
-       kvm_build_io_pmt(kvm);
-
-       INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
-
-       /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
-       set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
-
-       return 0;
-}
-
-static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
-                                       struct kvm_irqchip *chip)
-{
-       int r;
-
-       r = 0;
-       switch (chip->chip_id) {
-       case KVM_IRQCHIP_IOAPIC:
-               r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
-               break;
-       default:
-               r = -EINVAL;
-               break;
-       }
-       return r;
-}
-
-static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
-{
-       int r;
-
-       r = 0;
-       switch (chip->chip_id) {
-       case KVM_IRQCHIP_IOAPIC:
-               r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
-               break;
-       default:
-               r = -EINVAL;
-               break;
-       }
-       return r;
-}
-
-#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
-
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-       int i;
-
-       for (i = 0; i < 16; i++) {
-               vpd->vgr[i] = regs->vpd.vgr[i];
-               vpd->vbgr[i] = regs->vpd.vbgr[i];
-       }
-       for (i = 0; i < 128; i++)
-               vpd->vcr[i] = regs->vpd.vcr[i];
-       vpd->vhpi = regs->vpd.vhpi;
-       vpd->vnat = regs->vpd.vnat;
-       vpd->vbnat = regs->vpd.vbnat;
-       vpd->vpsr = regs->vpd.vpsr;
-
-       vpd->vpr = regs->vpd.vpr;
-
-       memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
-
-       RESTORE_REGS(mp_state);
-       RESTORE_REGS(vmm_rr);
-       memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
-       memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
-       RESTORE_REGS(itr_regions);
-       RESTORE_REGS(dtr_regions);
-       RESTORE_REGS(tc_regions);
-       RESTORE_REGS(irq_check);
-       RESTORE_REGS(itc_check);
-       RESTORE_REGS(timer_check);
-       RESTORE_REGS(timer_pending);
-       RESTORE_REGS(last_itc);
-       for (i = 0; i < 8; i++) {
-               vcpu->arch.vrr[i] = regs->vrr[i];
-               vcpu->arch.ibr[i] = regs->ibr[i];
-               vcpu->arch.dbr[i] = regs->dbr[i];
-       }
-       for (i = 0; i < 4; i++)
-               vcpu->arch.insvc[i] = regs->insvc[i];
-       RESTORE_REGS(xtp);
-       RESTORE_REGS(metaphysical_rr0);
-       RESTORE_REGS(metaphysical_rr4);
-       RESTORE_REGS(metaphysical_saved_rr0);
-       RESTORE_REGS(metaphysical_saved_rr4);
-       RESTORE_REGS(fp_psr);
-       RESTORE_REGS(saved_gp);
-
-       vcpu->arch.irq_new_pending = 1;
-       vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
-       set_bit(KVM_REQ_RESUME, &vcpu->requests);
-
-       return 0;
-}
-
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
-               bool line_status)
-{
-       if (!irqchip_in_kernel(kvm))
-               return -ENXIO;
-
-       irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-                                       irq_event->irq, irq_event->level,
-                                       line_status);
-       return 0;
-}
-
-long kvm_arch_vm_ioctl(struct file *filp,
-               unsigned int ioctl, unsigned long arg)
-{
-       struct kvm *kvm = filp->private_data;
-       void __user *argp = (void __user *)arg;
-       int r = -ENOTTY;
-
-       switch (ioctl) {
-       case KVM_CREATE_IRQCHIP:
-               r = -EFAULT;
-               r = kvm_ioapic_init(kvm);
-               if (r)
-                       goto out;
-               r = kvm_setup_default_irq_routing(kvm);
-               if (r) {
-                       mutex_lock(&kvm->slots_lock);
-                       kvm_ioapic_destroy(kvm);
-                       mutex_unlock(&kvm->slots_lock);
-                       goto out;
-               }
-               break;
-       case KVM_GET_IRQCHIP: {
-               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-               struct kvm_irqchip chip;
-
-               r = -EFAULT;
-               if (copy_from_user(&chip, argp, sizeof chip))
-                               goto out;
-               r = -ENXIO;
-               if (!irqchip_in_kernel(kvm))
-                       goto out;
-               r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
-               if (r)
-                       goto out;
-               r = -EFAULT;
-               if (copy_to_user(argp, &chip, sizeof chip))
-                               goto out;
-               r = 0;
-               break;
-               }
-       case KVM_SET_IRQCHIP: {
-               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-               struct kvm_irqchip chip;
-
-               r = -EFAULT;
-               if (copy_from_user(&chip, argp, sizeof chip))
-                               goto out;
-               r = -ENXIO;
-               if (!irqchip_in_kernel(kvm))
-                       goto out;
-               r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
-               if (r)
-                       goto out;
-               r = 0;
-               break;
-               }
-       default:
-               ;
-       }
-out:
-       return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-               struct kvm_sregs *sregs)
-{
-       return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-               struct kvm_sregs *sregs)
-{
-       return -EINVAL;
-
-}
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-               struct kvm_translation *tr)
-{
-
-       return -EINVAL;
-}
-
-static int kvm_alloc_vmm_area(void)
-{
-       if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
-               kvm_vmm_base = __get_free_pages(GFP_KERNEL,
-                               get_order(KVM_VMM_SIZE));
-               if (!kvm_vmm_base)
-                       return -ENOMEM;
-
-               memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-               kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
-
-               printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
-                               kvm_vmm_base, kvm_vm_buffer);
-       }
-
-       return 0;
-}
-
-static void kvm_free_vmm_area(void)
-{
-       if (kvm_vmm_base) {
-               /*Zero this area before free to avoid bits leak!!*/
-               memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-               free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
-               kvm_vmm_base  = 0;
-               kvm_vm_buffer = 0;
-               kvm_vsa_base = 0;
-       }
-}
-
-static int vti_init_vpd(struct kvm_vcpu *vcpu)
-{
-       int i;
-       union cpuid3_t cpuid3;
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-       if (IS_ERR(vpd))
-               return PTR_ERR(vpd);
-
-       /* CPUID init */
-       for (i = 0; i < 5; i++)
-               vpd->vcpuid[i] = ia64_get_cpuid(i);
-
-       /* Limit the CPUID number to 5 */
-       cpuid3.value = vpd->vcpuid[3];
-       cpuid3.number = 4;      /* 5 - 1 */
-       vpd->vcpuid[3] = cpuid3.value;
-
-       /*Set vac and vdc fields*/
-       vpd->vac.a_from_int_cr = 1;
-       vpd->vac.a_to_int_cr = 1;
-       vpd->vac.a_from_psr = 1;
-       vpd->vac.a_from_cpuid = 1;
-       vpd->vac.a_cover = 1;
-       vpd->vac.a_bsw = 1;
-       vpd->vac.a_int = 1;
-       vpd->vdc.d_vmsw = 1;
-
-       /*Set virtual buffer*/
-       vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
-
-       return 0;
-}
-
-static int vti_create_vp(struct kvm_vcpu *vcpu)
-{
-       long ret;
-       struct vpd *vpd = vcpu->arch.vpd;
-       unsigned long  vmm_ivt;
-
-       vmm_ivt = kvm_vmm_info->vmm_ivt;
-
-       printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
-
-       ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
-
-       if (ret) {
-               printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static void init_ptce_info(struct kvm_vcpu *vcpu)
-{
-       ia64_ptce_info_t ptce = {0};
-
-       ia64_get_ptce(&ptce);
-       vcpu->arch.ptce_base = ptce.base;
-       vcpu->arch.ptce_count[0] = ptce.count[0];
-       vcpu->arch.ptce_count[1] = ptce.count[1];
-       vcpu->arch.ptce_stride[0] = ptce.stride[0];
-       vcpu->arch.ptce_stride[1] = ptce.stride[1];
-}
-
-static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
-{
-       struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-
-       if (hrtimer_cancel(p_ht))
-               hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
-}
-
-static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
-{
-       struct kvm_vcpu *vcpu;
-       wait_queue_head_t *q;
-
-       vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
-       q = &vcpu->wq;
-
-       if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
-               goto out;
-
-       if (waitqueue_active(q))
-               wake_up_interruptible(q);
-
-out:
-       vcpu->arch.timer_fired = 1;
-       vcpu->arch.timer_check = 1;
-       return HRTIMER_NORESTART;
-}
-
-#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
-
-bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
-{
-       return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
-       struct kvm_vcpu *v;
-       int r;
-       int i;
-       long itc_offset;
-       struct kvm *kvm = vcpu->kvm;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       union context *p_ctx = &vcpu->arch.guest;
-       struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
-
-       /*Init vcpu context for first run.*/
-       if (IS_ERR(vmm_vcpu))
-               return PTR_ERR(vmm_vcpu);
-
-       if (kvm_vcpu_is_bsp(vcpu)) {
-               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-               /*Set entry address for first run.*/
-               regs->cr_iip = PALE_RESET_ENTRY;
-
-               /*Initialize itc offset for vcpus*/
-               itc_offset = 0UL - kvm_get_itc(vcpu);
-               for (i = 0; i < KVM_MAX_VCPUS; i++) {
-                       v = (struct kvm_vcpu *)((char *)vcpu +
-                                       sizeof(struct kvm_vcpu_data) * i);
-                       v->arch.itc_offset = itc_offset;
-                       v->arch.last_itc = 0;
-               }
-       } else
-               vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
-
-       r = -ENOMEM;
-       vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
-       if (!vcpu->arch.apic)
-               goto out;
-       vcpu->arch.apic->vcpu = vcpu;
-
-       p_ctx->gr[1] = 0;
-       p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
-       p_ctx->gr[13] = (unsigned long)vmm_vcpu;
-       p_ctx->psr = 0x1008522000UL;
-       p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
-       p_ctx->caller_unat = 0;
-       p_ctx->pr = 0x0;
-       p_ctx->ar[36] = 0x0; /*unat*/
-       p_ctx->ar[19] = 0x0; /*rnat*/
-       p_ctx->ar[18] = (unsigned long)vmm_vcpu +
-                               ((sizeof(struct kvm_vcpu)+15) & ~15);
-       p_ctx->ar[64] = 0x0; /*pfs*/
-       p_ctx->cr[0] = 0x7e04UL;
-       p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
-       p_ctx->cr[8] = 0x3c;
-
-       /*Initialize region register*/
-       p_ctx->rr[0] = 0x30;
-       p_ctx->rr[1] = 0x30;
-       p_ctx->rr[2] = 0x30;
-       p_ctx->rr[3] = 0x30;
-       p_ctx->rr[4] = 0x30;
-       p_ctx->rr[5] = 0x30;
-       p_ctx->rr[7] = 0x30;
-
-       /*Initialize branch register 0*/
-       p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
-
-       vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
-       vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
-       vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
-
-       hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-       vcpu->arch.hlt_timer.function = hlt_timer_fn;
-
-       vcpu->arch.last_run_cpu = -1;
-       vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
-       vcpu->arch.vsa_base = kvm_vsa_base;
-       vcpu->arch.__gp = kvm_vmm_gp;
-       vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-       vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
-       vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
-       init_ptce_info(vcpu);
-
-       r = 0;
-out:
-       return r;
-}
-
-static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
-{
-       unsigned long psr;
-       int r;
-
-       local_irq_save(psr);
-       r = kvm_insert_vmm_mapping(vcpu);
-       local_irq_restore(psr);
-       if (r)
-               goto fail;
-       r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
-       if (r)
-               goto fail;
-
-       r = vti_init_vpd(vcpu);
-       if (r) {
-               printk(KERN_DEBUG"kvm: vpd init error!!\n");
-               goto uninit;
-       }
-
-       r = vti_create_vp(vcpu);
-       if (r)
-               goto uninit;
-
-       kvm_purge_vmm_mapping(vcpu);
-
-       return 0;
-uninit:
-       kvm_vcpu_uninit(vcpu);
-fail:
-       return r;
-}
-
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
-               unsigned int id)
-{
-       struct kvm_vcpu *vcpu;
-       unsigned long vm_base = kvm->arch.vm_base;
-       int r;
-       int cpu;
-
-       BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
-
-       r = -EINVAL;
-       if (id >= KVM_MAX_VCPUS) {
-               printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
-                               KVM_MAX_VCPUS);
-               goto fail;
-       }
-
-       r = -ENOMEM;
-       if (!vm_base) {
-               printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
-               goto fail;
-       }
-       vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
-                                       vcpu_data[id].vcpu_struct));
-       vcpu->kvm = kvm;
-
-       cpu = get_cpu();
-       r = vti_vcpu_setup(vcpu, id);
-       put_cpu();
-
-       if (r) {
-               printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
-               goto fail;
-       }
-
-       return vcpu;
-fail:
-       return ERR_PTR(r);
-}
-
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-       return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-       return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-                                       struct kvm_guest_debug *dbg)
-{
-       return -EINVAL;
-}
-
-void kvm_arch_free_vm(struct kvm *kvm)
-{
-       unsigned long vm_base = kvm->arch.vm_base;
-
-       if (vm_base) {
-               memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-               free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
-       }
-
-}
-
-static void kvm_release_vm_pages(struct kvm *kvm)
-{
-       struct kvm_memslots *slots;
-       struct kvm_memory_slot *memslot;
-       int j;
-
-       slots = kvm_memslots(kvm);
-       kvm_for_each_memslot(memslot, slots) {
-               for (j = 0; j < memslot->npages; j++) {
-                       if (memslot->rmap[j])
-                               put_page((struct page *)memslot->rmap[j]);
-               }
-       }
-}
-
-void kvm_arch_destroy_vm(struct kvm *kvm)
-{
-       kvm_iommu_unmap_guest(kvm);
-       kvm_free_all_assigned_devices(kvm);
-       kfree(kvm->arch.vioapic);
-       kvm_release_vm_pages(kvm);
-}
-
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-       if (cpu != vcpu->cpu) {
-               vcpu->cpu = cpu;
-               if (vcpu->arch.ht_active)
-                       kvm_migrate_hlt_timer(vcpu);
-       }
-}
-
-#define SAVE_REGS(_x)  regs->_x = vcpu->arch._x
-
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-       int i;
-
-       vcpu_load(vcpu);
-
-       for (i = 0; i < 16; i++) {
-               regs->vpd.vgr[i] = vpd->vgr[i];
-               regs->vpd.vbgr[i] = vpd->vbgr[i];
-       }
-       for (i = 0; i < 128; i++)
-               regs->vpd.vcr[i] = vpd->vcr[i];
-       regs->vpd.vhpi = vpd->vhpi;
-       regs->vpd.vnat = vpd->vnat;
-       regs->vpd.vbnat = vpd->vbnat;
-       regs->vpd.vpsr = vpd->vpsr;
-       regs->vpd.vpr = vpd->vpr;
-
-       memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
-
-       SAVE_REGS(mp_state);
-       SAVE_REGS(vmm_rr);
-       memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
-       memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
-       SAVE_REGS(itr_regions);
-       SAVE_REGS(dtr_regions);
-       SAVE_REGS(tc_regions);
-       SAVE_REGS(irq_check);
-       SAVE_REGS(itc_check);
-       SAVE_REGS(timer_check);
-       SAVE_REGS(timer_pending);
-       SAVE_REGS(last_itc);
-       for (i = 0; i < 8; i++) {
-               regs->vrr[i] = vcpu->arch.vrr[i];
-               regs->ibr[i] = vcpu->arch.ibr[i];
-               regs->dbr[i] = vcpu->arch.dbr[i];
-       }
-       for (i = 0; i < 4; i++)
-               regs->insvc[i] = vcpu->arch.insvc[i];
-       regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
-       SAVE_REGS(xtp);
-       SAVE_REGS(metaphysical_rr0);
-       SAVE_REGS(metaphysical_rr4);
-       SAVE_REGS(metaphysical_saved_rr0);
-       SAVE_REGS(metaphysical_saved_rr4);
-       SAVE_REGS(fp_psr);
-       SAVE_REGS(saved_gp);
-
-       vcpu_put(vcpu);
-       return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
-                                 struct kvm_ia64_vcpu_stack *stack)
-{
-       memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
-       return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
-                                 struct kvm_ia64_vcpu_stack *stack)
-{
-       memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
-              sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
-
-       vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
-       return 0;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-
-       hrtimer_cancel(&vcpu->arch.hlt_timer);
-       kfree(vcpu->arch.apic);
-}
-
-long kvm_arch_vcpu_ioctl(struct file *filp,
-                        unsigned int ioctl, unsigned long arg)
-{
-       struct kvm_vcpu *vcpu = filp->private_data;
-       void __user *argp = (void __user *)arg;
-       struct kvm_ia64_vcpu_stack *stack = NULL;
-       long r;
-
-       switch (ioctl) {
-       case KVM_IA64_VCPU_GET_STACK: {
-               struct kvm_ia64_vcpu_stack __user *user_stack;
-               void __user *first_p = argp;
-
-               r = -EFAULT;
-               if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-                       goto out;
-
-               if (!access_ok(VERIFY_WRITE, user_stack,
-                              sizeof(struct kvm_ia64_vcpu_stack))) {
-                       printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
-                              "Illegal user destination address for stack\n");
-                       goto out;
-               }
-               stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-               if (!stack) {
-                       r = -ENOMEM;
-                       goto out;
-               }
-
-               r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
-               if (r)
-                       goto out;
-
-               if (copy_to_user(user_stack, stack,
-                                sizeof(struct kvm_ia64_vcpu_stack))) {
-                       r = -EFAULT;
-                       goto out;
-               }
-
-               break;
-       }
-       case KVM_IA64_VCPU_SET_STACK: {
-               struct kvm_ia64_vcpu_stack __user *user_stack;
-               void __user *first_p = argp;
-
-               r = -EFAULT;
-               if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-                       goto out;
-
-               if (!access_ok(VERIFY_READ, user_stack,
-                           sizeof(struct kvm_ia64_vcpu_stack))) {
-                       printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
-                              "Illegal user address for stack\n");
-                       goto out;
-               }
-               stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-               if (!stack) {
-                       r = -ENOMEM;
-                       goto out;
-               }
-               if (copy_from_user(stack, user_stack,
-                                  sizeof(struct kvm_ia64_vcpu_stack)))
-                       goto out;
-
-               r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
-               break;
-       }
-
-       default:
-               r = -EINVAL;
-       }
-
-out:
-       kfree(stack);
-       return r;
-}
-
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
-{
-       return VM_FAULT_SIGBUS;
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-                           unsigned long npages)
-{
-       return 0;
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-               struct kvm_memory_slot *memslot,
-               struct kvm_userspace_memory_region *mem,
-               enum kvm_mr_change change)
-{
-       unsigned long i;
-       unsigned long pfn;
-       int npages = memslot->npages;
-       unsigned long base_gfn = memslot->base_gfn;
-
-       if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
-               return -ENOMEM;
-
-       for (i = 0; i < npages; i++) {
-               pfn = gfn_to_pfn(kvm, base_gfn + i);
-               if (!kvm_is_reserved_pfn(pfn)) {
-                       kvm_set_pmt_entry(kvm, base_gfn + i,
-                                       pfn << PAGE_SHIFT,
-                               _PAGE_AR_RWX | _PAGE_MA_WB);
-                       memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
-               } else {
-                       kvm_set_pmt_entry(kvm, base_gfn + i,
-                                       GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
-                                       _PAGE_MA_UC);
-                       memslot->rmap[i] = 0;
-                       }
-       }
-
-       return 0;
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-                                  struct kvm_memory_slot *slot)
-{
-       kvm_arch_flush_shadow_all();
-}
-
-long kvm_arch_dev_ioctl(struct file *filp,
-                       unsigned int ioctl, unsigned long arg)
-{
-       return -EINVAL;
-}
-
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-       kvm_vcpu_uninit(vcpu);
-}
-
-static int vti_cpu_has_kvm_support(void)
-{
-       long  avail = 1, status = 1, control = 1;
-       long ret;
-
-       ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
-       if (ret)
-               goto out;
-
-       if (!(avail & PAL_PROC_VM_BIT))
-               goto out;
-
-       printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
-
-       ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
-       if (ret)
-               goto out;
-       printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
-
-       if (!(vp_env_info & VP_OPCODE)) {
-               printk(KERN_WARNING"kvm: No opcode ability on hardware, "
-                               "vm_env_info:0x%lx\n", vp_env_info);
-       }
-
-       return 1;
-out:
-       return 0;
-}
-
-
-/*
- * On SN2, the ITC isn't stable, so copy in fast path code to use the
- * SN2 RTC, replacing the ITC based default verion.
- */
-static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
-                         struct module *module)
-{
-       unsigned long new_ar, new_ar_sn2;
-       unsigned long module_base;
-
-       if (!ia64_platform_is("sn2"))
-               return;
-
-       module_base = (unsigned long)module->module_core;
-
-       new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
-       new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
-
-       printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
-              "as source\n");
-
-       /*
-        * Copy the SN2 version of mov_ar into place. They are both
-        * the same size, so 6 bundles is sufficient (6 * 0x10).
-        */
-       memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
-}
-
-static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-                           struct module *module)
-{
-       unsigned long module_base;
-       unsigned long vmm_size;
-
-       unsigned long vmm_offset, func_offset, fdesc_offset;
-       struct fdesc *p_fdesc;
-
-       BUG_ON(!module);
-
-       if (!kvm_vmm_base) {
-               printk("kvm: kvm area hasn't been initialized yet!!\n");
-               return -EFAULT;
-       }
-
-       /*Calculate new position of relocated vmm module.*/
-       module_base = (unsigned long)module->module_core;
-       vmm_size = module->core_size;
-       if (unlikely(vmm_size > KVM_VMM_SIZE))
-               return -EFAULT;
-
-       memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
-       kvm_patch_vmm(vmm_info, module);
-       kvm_flush_icache(kvm_vmm_base, vmm_size);
-
-       /*Recalculate kvm_vmm_info based on new VMM*/
-       vmm_offset = vmm_info->vmm_ivt - module_base;
-       kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
-       printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
-                       kvm_vmm_info->vmm_ivt);
-
-       fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
-       kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
-                                                       fdesc_offset);
-       func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
-       p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-       p_fdesc->ip = KVM_VMM_BASE + func_offset;
-       p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
-
-       printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
-                       KVM_VMM_BASE+func_offset);
-
-       fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
-       kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
-                       fdesc_offset);
-       func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
-       p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-       p_fdesc->ip = KVM_VMM_BASE + func_offset;
-       p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
-
-       kvm_vmm_gp = p_fdesc->gp;
-
-       printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
-                                               kvm_vmm_info->vmm_entry);
-       printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
-                                               KVM_VMM_BASE + func_offset);
-
-       return 0;
-}
-
-int kvm_arch_init(void *opaque)
-{
-       int r;
-       struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
-
-       if (!vti_cpu_has_kvm_support()) {
-               printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
-               r = -EOPNOTSUPP;
-               goto out;
-       }
-
-       if (kvm_vmm_info) {
-               printk(KERN_ERR "kvm: Already loaded VMM module!\n");
-               r = -EEXIST;
-               goto out;
-       }
-
-       r = -ENOMEM;
-       kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
-       if (!kvm_vmm_info)
-               goto out;
-
-       if (kvm_alloc_vmm_area())
-               goto out_free0;
-
-       r = kvm_relocate_vmm(vmm_info, vmm_info->module);
-       if (r)
-               goto out_free1;
-
-       return 0;
-
-out_free1:
-       kvm_free_vmm_area();
-out_free0:
-       kfree(kvm_vmm_info);
-out:
-       return r;
-}
-
-void kvm_arch_exit(void)
-{
-       kvm_free_vmm_area();
-       kfree(kvm_vmm_info);
-       kvm_vmm_info = NULL;
-}
-
-static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
-                                   struct kvm_memory_slot *memslot)
-{
-       int i;
-       long base;
-       unsigned long n;
-       unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
-                       offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
-
-       n = kvm_dirty_bitmap_bytes(memslot);
-       base = memslot->base_gfn / BITS_PER_LONG;
-
-       spin_lock(&kvm->arch.dirty_log_lock);
-       for (i = 0; i < n/sizeof(long); ++i) {
-               memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
-               dirty_bitmap[base + i] = 0;
-       }
-       spin_unlock(&kvm->arch.dirty_log_lock);
-}
-
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-               struct kvm_dirty_log *log)
-{
-       int r;
-       unsigned long n;
-       struct kvm_memory_slot *memslot;
-       int is_dirty = 0;
-
-       mutex_lock(&kvm->slots_lock);
-
-       r = -EINVAL;
-       if (log->slot >= KVM_USER_MEM_SLOTS)
-               goto out;
-
-       memslot = id_to_memslot(kvm->memslots, log->slot);
-       r = -ENOENT;
-       if (!memslot->dirty_bitmap)
-               goto out;
-
-       kvm_ia64_sync_dirty_log(kvm, memslot);
-       r = kvm_get_dirty_log(kvm, log, &is_dirty);
-       if (r)
-               goto out;
-
-       /* If nothing is dirty, don't bother messing with page tables. */
-       if (is_dirty) {
-               kvm_flush_remote_tlbs(kvm);
-               n = kvm_dirty_bitmap_bytes(memslot);
-               memset(memslot->dirty_bitmap, 0, n);
-       }
-       r = 0;
-out:
-       mutex_unlock(&kvm->slots_lock);
-       return r;
-}
-
-int kvm_arch_hardware_setup(void)
-{
-       return 0;
-}
-
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
-{
-       return __apic_accept_irq(vcpu, irq->vector);
-}
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
-{
-       return apic->vcpu->vcpu_id == dest;
-}
-
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
-{
-       return 0;
-}
-
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
-{
-       return vcpu1->arch.xtp - vcpu2->arch.xtp;
-}
-
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-               int short_hand, int dest, int dest_mode)
-{
-       struct kvm_lapic *target = vcpu->arch.apic;
-       return (dest_mode == 0) ?
-               kvm_apic_match_physical_addr(target, dest) :
-               kvm_apic_match_logical_addr(target, dest);
-}
-
-static int find_highest_bits(int *dat)
-{
-       u32  bits, bitnum;
-       int i;
-
-       /* loop for all 256 bits */
-       for (i = 7; i >= 0 ; i--) {
-               bits = dat[i];
-               if (bits) {
-                       bitnum = fls(bits);
-                       return i * 32 + bitnum - 1;
-               }
-       }
-
-       return -1;
-}
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-    if (vpd->irr[0] & (1UL << NMI_VECTOR))
-               return NMI_VECTOR;
-    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
-               return ExtINT_VECTOR;
-
-    return find_highest_bits((int *)&vpd->irr[0]);
-}
-
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-       return vcpu->arch.timer_fired;
-}
-
-int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
-{
-       return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
-               (kvm_highest_pending_irq(vcpu) != -1);
-}
-
-int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
-{
-       return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
-}
-
-int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
-                                   struct kvm_mp_state *mp_state)
-{
-       mp_state->mp_state = vcpu->arch.mp_state;
-       return 0;
-}
-
-static int vcpu_reset(struct kvm_vcpu *vcpu)
-{
-       int r;
-       long psr;
-       local_irq_save(psr);
-       r = kvm_insert_vmm_mapping(vcpu);
-       local_irq_restore(psr);
-       if (r)
-               goto fail;
-
-       vcpu->arch.launched = 0;
-       kvm_arch_vcpu_uninit(vcpu);
-       r = kvm_arch_vcpu_init(vcpu);
-       if (r)
-               goto fail;
-
-       kvm_purge_vmm_mapping(vcpu);
-       r = 0;
-fail:
-       return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
-                                   struct kvm_mp_state *mp_state)
-{
-       int r = 0;
-
-       vcpu->arch.mp_state = mp_state->mp_state;
-       if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
-               r = vcpu_reset(vcpu);
-       return r;
-}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c

deleted file mode 100644 (file)

index cb548ee..0000000
--- a/arch/ia64/kvm/kvm_fw.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/*
- * PAL/SAL call delegation
- *
- * Copyright (c) 2004 Li Susie <susie.li@intel.com>
- * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
- * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/smp.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "vti.h"
-#include "misc.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/tlb.h>
-
-/*
- * Handy macros to make sure that the PAL return values start out
- * as something meaningful.
- */
-#define INIT_PAL_STATUS_UNIMPLEMENTED(x)               \
-       {                                               \
-               x.status = PAL_STATUS_UNIMPLEMENTED;    \
-               x.v0 = 0;                               \
-               x.v1 = 0;                               \
-               x.v2 = 0;                               \
-       }
-
-#define INIT_PAL_STATUS_SUCCESS(x)                     \
-       {                                               \
-               x.status = PAL_STATUS_SUCCESS;          \
-               x.v0 = 0;                               \
-               x.v1 = 0;                               \
-               x.v2 = 0;                               \
-    }
-
-static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
-               u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
-       struct exit_ctl_data *p;
-
-       if (vcpu) {
-               p = &vcpu->arch.exit_data;
-               if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-                       *gr28 = p->u.pal_data.gr28;
-                       *gr29 = p->u.pal_data.gr29;
-                       *gr30 = p->u.pal_data.gr30;
-                       *gr31 = p->u.pal_data.gr31;
-                       return ;
-               }
-       }
-       printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
-}
-
-static void set_pal_result(struct kvm_vcpu *vcpu,
-               struct ia64_pal_retval result) {
-
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-       if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-               p->u.pal_data.ret = result;
-               return ;
-       }
-       INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
-}
-
-static void set_sal_result(struct kvm_vcpu *vcpu,
-               struct sal_ret_values result) {
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-               p->u.sal_data.ret = result;
-               return ;
-       }
-       printk(KERN_WARNING"Failed to set sal result!!\n");
-}
-
-struct cache_flush_args {
-       u64 cache_type;
-       u64 operation;
-       u64 progress;
-       long status;
-};
-
-cpumask_t cpu_cache_coherent_map;
-
-static void remote_pal_cache_flush(void *data)
-{
-       struct cache_flush_args *args = data;
-       long status;
-       u64 progress = args->progress;
-
-       status = ia64_pal_cache_flush(args->cache_type, args->operation,
-                                       &progress, NULL);
-       if (status != 0)
-       args->status = status;
-}
-
-static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
-{
-       u64 gr28, gr29, gr30, gr31;
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       struct cache_flush_args args = {0, 0, 0, 0};
-       long psr;
-
-       gr28 = gr29 = gr30 = gr31 = 0;
-       kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
-
-       if (gr31 != 0)
-               printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
-
-       /* Always call Host Pal in int=1 */
-       gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
-       args.cache_type = gr29;
-       args.operation = gr30;
-       smp_call_function(remote_pal_cache_flush,
-                               (void *)&args, 1);
-       if (args.status != 0)
-               printk(KERN_ERR"pal_cache_flush error!,"
-                               "status:0x%lx\n", args.status);
-       /*
-        * Call Host PAL cache flush
-        * Clear psr.ic when call PAL_CACHE_FLUSH
-        */
-       local_irq_save(psr);
-       result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
-                                               &result.v0);
-       local_irq_restore(psr);
-       if (result.status != 0)
-               printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
-                               "in1:%lx,in2:%lx\n",
-                               vcpu, result.status, gr29, gr30);
-
-#if 0
-       if (gr29 == PAL_CACHE_TYPE_COHERENT) {
-               cpus_setall(vcpu->arch.cache_coherent_map);
-               cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
-               cpus_setall(cpu_cache_coherent_map);
-               cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
-       }
-#endif
-       return result;
-}
-
-struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result;
-
-       PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
-       return result;
-}
-
-static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result;
-
-       PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
-
-       /*
-        * PAL_FREQ_BASE may not be implemented in some platforms,
-        * call SAL instead.
-        */
-       if (result.v0 == 0) {
-               result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-                                                       &result.v0,
-                                                       &result.v1);
-               result.v2 = 0;
-       }
-
-       return result;
-}
-
-/*
- * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
- * RTC is used instead. This function patches the ratios from SAL
- * to match the RTC before providing them to the guest.
- */
-static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
-{
-       struct pal_freq_ratio *ratio;
-       unsigned long sal_freq, sal_drift, factor;
-
-       result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-                                           &sal_freq, &sal_drift);
-       ratio = (struct pal_freq_ratio *)&result->v2;
-       factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
-               sn_rtc_cycles_per_second;
-
-       ratio->num = 3;
-       ratio->den = factor;
-}
-
-static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result;
-
-       PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
-
-       if (vcpu->kvm->arch.is_sn2)
-               sn2_patch_itc_freq_ratios(&result);
-
-       return result;
-}
-
-static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result;
-
-       INIT_PAL_STATUS_UNIMPLEMENTED(result);
-       return result;
-}
-
-static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result;
-
-       INIT_PAL_STATUS_SUCCESS(result);
-       return result;
-}
-
-static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       long in0, in1, in2, in3;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
-                       &result.v2, in2);
-
-       return result;
-}
-
-static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       long in0, in1, in2, in3;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
-
-       return result;
-}
-
-static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
-{
-
-       pal_cache_config_info_t ci;
-       long status;
-       unsigned long in0, in1, in2, in3, r9, r10;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       status = ia64_pal_cache_config_info(in1, in2, &ci);
-       r9 = ci.pcci_info_1.pcci1_data;
-       r10 = ci.pcci_info_2.pcci2_data;
-       return ((struct ia64_pal_retval){status, r9, r10, 0});
-}
-
-#define GUEST_IMPL_VA_MSB      59
-#define GUEST_RID_BITS         18
-
-static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
-{
-
-       pal_vm_info_1_u_t vminfo1;
-       pal_vm_info_2_u_t vminfo2;
-       struct ia64_pal_retval result;
-
-       PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
-       if (!result.status) {
-               vminfo1.pvi1_val = result.v0;
-               vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
-               vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
-               result.v0 = vminfo1.pvi1_val;
-               vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
-               vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
-               result.v1 = vminfo2.pvi2_val;
-       }
-
-       return result;
-}
-
-static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result;
-       unsigned long in0, in1, in2, in3;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-       result.status = ia64_pal_vm_info(in1, in2,
-                       (pal_tc_info_u_t *)&result.v1, &result.v2);
-
-       return result;
-}
-
-static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
-{
-       u64 index = 0;
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-       if (p->exit_reason == EXIT_REASON_PAL_CALL)
-               index = p->u.pal_data.gr28;
-
-       return index;
-}
-
-static void prepare_for_halt(struct kvm_vcpu *vcpu)
-{
-       vcpu->arch.timer_pending = 1;
-       vcpu->arch.timer_fired = 0;
-}
-
-static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
-{
-       long status;
-       unsigned long in0, in1, in2, in3, r9;
-       unsigned long pm_buffer[16];
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       status = ia64_pal_perf_mon_info(pm_buffer,
-                               (pal_perf_mon_info_u_t *) &r9);
-       if (status != 0) {
-               printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
-       } else {
-               if (in1)
-                       memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
-               else {
-                       status = PAL_STATUS_EINVAL;
-                       printk(KERN_WARNING"Invalid parameters "
-                                               "for PAL call:0x%lx!\n", in0);
-               }
-       }
-       return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
-{
-       unsigned long in0, in1, in2, in3;
-       long status;
-       unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
-                                       | (1UL << 61) | (1UL << 60);
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       if (in1) {
-               memcpy((void *)in1, &res, sizeof(res));
-               status = 0;
-       } else{
-               status = PAL_STATUS_EINVAL;
-               printk(KERN_WARNING"Invalid parameters "
-                                       "for PAL call:0x%lx!\n", in0);
-       }
-
-       return (struct ia64_pal_retval){status, 0, 0, 0};
-}
-
-static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
-{
-       unsigned long r9;
-       long status;
-
-       status = ia64_pal_mem_attrib(&r9);
-
-       return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static void remote_pal_prefetch_visibility(void *v)
-{
-       s64 trans_type = (s64)v;
-       ia64_pal_prefetch_visibility(trans_type);
-}
-
-static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       unsigned long in0, in1, in2, in3;
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       result.status = ia64_pal_prefetch_visibility(in1);
-       if (result.status == 0) {
-               /* Must be performed on all remote processors
-               in the coherence domain. */
-               smp_call_function(remote_pal_prefetch_visibility,
-                                       (void *)in1, 1);
-               /* Unnecessary on remote processor for other vcpus!*/
-               result.status = 1;
-       }
-       return result;
-}
-
-static void remote_pal_mc_drain(void *v)
-{
-       ia64_pal_mc_drain();
-}
-
-static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       unsigned long in0, in1, in2, in3;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-       if (in1 == 0 && in2) {
-               char brand_info[128];
-               result.status = ia64_pal_get_brand_info(brand_info);
-               if (result.status == PAL_STATUS_SUCCESS)
-                       memcpy((void *)in2, brand_info, 128);
-       } else {
-               result.status = PAL_STATUS_REQUIRES_MEMORY;
-               printk(KERN_WARNING"Invalid parameters for "
-                                       "PAL call:0x%lx!\n", in0);
-       }
-
-       return result;
-}
-
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-
-       u64 gr28;
-       struct ia64_pal_retval result;
-       int ret = 1;
-
-       gr28 = kvm_get_pal_call_index(vcpu);
-       switch (gr28) {
-       case PAL_CACHE_FLUSH:
-               result = pal_cache_flush(vcpu);
-               break;
-       case PAL_MEM_ATTRIB:
-               result = pal_mem_attrib(vcpu);
-               break;
-       case PAL_CACHE_SUMMARY:
-               result = pal_cache_summary(vcpu);
-               break;
-       case PAL_PERF_MON_INFO:
-               result = pal_perf_mon_info(vcpu);
-               break;
-       case PAL_HALT_INFO:
-               result = pal_halt_info(vcpu);
-               break;
-       case PAL_HALT_LIGHT:
-       {
-               INIT_PAL_STATUS_SUCCESS(result);
-               prepare_for_halt(vcpu);
-               if (kvm_highest_pending_irq(vcpu) == -1)
-                       ret = kvm_emulate_halt(vcpu);
-       }
-               break;
-
-       case PAL_PREFETCH_VISIBILITY:
-               result = pal_prefetch_visibility(vcpu);
-               break;
-       case PAL_MC_DRAIN:
-               result.status = ia64_pal_mc_drain();
-               /* FIXME: All vcpus likely call PAL_MC_DRAIN.
-                  That causes the congestion. */
-               smp_call_function(remote_pal_mc_drain, NULL, 1);
-               break;
-
-       case PAL_FREQ_RATIOS:
-               result = pal_freq_ratios(vcpu);
-               break;
-
-       case PAL_FREQ_BASE:
-               result = pal_freq_base(vcpu);
-               break;
-
-       case PAL_LOGICAL_TO_PHYSICAL :
-               result = pal_logical_to_physica(vcpu);
-               break;
-
-       case PAL_VM_SUMMARY :
-               result = pal_vm_summary(vcpu);
-               break;
-
-       case PAL_VM_INFO :
-               result = pal_vm_info(vcpu);
-               break;
-       case PAL_PLATFORM_ADDR :
-               result = pal_platform_addr(vcpu);
-               break;
-       case PAL_CACHE_INFO:
-               result = pal_cache_info(vcpu);
-               break;
-       case PAL_PTCE_INFO:
-               INIT_PAL_STATUS_SUCCESS(result);
-               result.v1 = (1L << 32) | 1L;
-               break;
-       case PAL_REGISTER_INFO:
-               result = pal_register_info(vcpu);
-               break;
-       case PAL_VM_PAGE_SIZE:
-               result.status = ia64_pal_vm_page_size(&result.v0,
-                                                       &result.v1);
-               break;
-       case PAL_RSE_INFO:
-               result.status = ia64_pal_rse_info(&result.v0,
-                                       (pal_hints_u_t *)&result.v1);
-               break;
-       case PAL_PROC_GET_FEATURES:
-               result = pal_proc_get_features(vcpu);
-               break;
-       case PAL_DEBUG_INFO:
-               result.status = ia64_pal_debug_info(&result.v0,
-                                                       &result.v1);
-               break;
-       case PAL_VERSION:
-               result.status = ia64_pal_version(
-                               (pal_version_u_t *)&result.v0,
-                               (pal_version_u_t *)&result.v1);
-               break;
-       case PAL_FIXED_ADDR:
-               result.status = PAL_STATUS_SUCCESS;
-               result.v0 = vcpu->vcpu_id;
-               break;
-       case PAL_BRAND_INFO:
-               result = pal_get_brand_info(vcpu);
-               break;
-       case PAL_GET_PSTATE:
-       case PAL_CACHE_SHARED_INFO:
-               INIT_PAL_STATUS_UNIMPLEMENTED(result);
-               break;
-       default:
-               INIT_PAL_STATUS_UNIMPLEMENTED(result);
-               printk(KERN_WARNING"kvm: Unsupported pal call,"
-                                       " index:0x%lx\n", gr28);
-       }
-       set_pal_result(vcpu, result);
-       return ret;
-}
-
-static struct sal_ret_values sal_emulator(struct kvm *kvm,
-                               long index, unsigned long in1,
-                               unsigned long in2, unsigned long in3,
-                               unsigned long in4, unsigned long in5,
-                               unsigned long in6, unsigned long in7)
-{
-       unsigned long r9  = 0;
-       unsigned long r10 = 0;
-       long r11 = 0;
-       long status;
-
-       status = 0;
-       switch (index) {
-       case SAL_FREQ_BASE:
-               status = ia64_sal_freq_base(in1, &r9, &r10);
-               break;
-       case SAL_PCI_CONFIG_READ:
-               printk(KERN_WARNING"kvm: Not allowed to call here!"
-                       " SAL_PCI_CONFIG_READ\n");
-               break;
-       case SAL_PCI_CONFIG_WRITE:
-               printk(KERN_WARNING"kvm: Not allowed to call here!"
-                       " SAL_PCI_CONFIG_WRITE\n");
-               break;
-       case SAL_SET_VECTORS:
-               if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
-                       if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
-                               status = -2;
-                       } else {
-                               kvm->arch.rdv_sal_data.boot_ip = in2;
-                               kvm->arch.rdv_sal_data.boot_gp = in3;
-                       }
-                       printk("Rendvous called! iip:%lx\n\n", in2);
-               } else
-                       printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
-                                                       "ignored...\n", in1);
-               break;
-       case SAL_GET_STATE_INFO:
-               /* No more info.  */
-               status = -5;
-               r9 = 0;
-               break;
-       case SAL_GET_STATE_INFO_SIZE:
-               /* Return a dummy size.  */
-               status = 0;
-               r9 = 128;
-               break;
-       case SAL_CLEAR_STATE_INFO:
-               /* Noop.  */
-               break;
-       case SAL_MC_RENDEZ:
-               printk(KERN_WARNING
-                       "kvm: called SAL_MC_RENDEZ. ignored...\n");
-               break;
-       case SAL_MC_SET_PARAMS:
-               printk(KERN_WARNING
-                       "kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
-               break;
-       case SAL_CACHE_FLUSH:
-               if (1) {
-                       /*Flush using SAL.
-                       This method is faster but has a side
-                       effect on other vcpu running on
-                       this cpu.  */
-                       status = ia64_sal_cache_flush(in1);
-               } else {
-                       /*Maybe need to implement the method
-                       without side effect!*/
-                       status = 0;
-               }
-               break;
-       case SAL_CACHE_INIT:
-               printk(KERN_WARNING
-                       "kvm: called SAL_CACHE_INIT.  ignored...\n");
-               break;
-       case SAL_UPDATE_PAL:
-               printk(KERN_WARNING
-                       "kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
-               break;
-       default:
-               printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
-                                               " index:%ld\n", index);
-               status = -1;
-               break;
-       }
-       return ((struct sal_ret_values) {status, r9, r10, r11});
-}
-
-static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
-               u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
-
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-
-       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-               *in0 = p->u.sal_data.in0;
-               *in1 = p->u.sal_data.in1;
-               *in2 = p->u.sal_data.in2;
-               *in3 = p->u.sal_data.in3;
-               *in4 = p->u.sal_data.in4;
-               *in5 = p->u.sal_data.in5;
-               *in6 = p->u.sal_data.in6;
-               *in7 = p->u.sal_data.in7;
-               return ;
-       }
-       *in0 = 0;
-}
-
-void kvm_sal_emul(struct kvm_vcpu *vcpu)
-{
-
-       struct sal_ret_values result;
-       u64 index, in1, in2, in3, in4, in5, in6, in7;
-
-       kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
-                       &in3, &in4, &in5, &in6, &in7);
-       result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
-                                       in4, in5, in6, in7);
-       set_sal_result(vcpu, result);
-}
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c

deleted file mode 100644 (file)

index f1268b8..0000000
--- a/arch/ia64/kvm/kvm_lib.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * kvm_lib.c: Compile some libraries for kvm-intel module.
- *
- *     Just include kernel's library, and disable symbols export.
- *     Copyright (C) 2008, Intel Corporation.
- *     Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#undef CONFIG_MODULES
-#include <linux/module.h>
-#undef CONFIG_KALLSYMS
-#undef EXPORT_SYMBOL
-#undef EXPORT_SYMBOL_GPL
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#include "../../../lib/vsprintf.c"
-#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h

deleted file mode 100644 (file)

index b2bcaa2..0000000
--- a/arch/ia64/kvm/kvm_minstate.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- *  kvm_minstate.h: min save macros
- *  Copyright (c) 2007, Intel Corporation.
- *
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/types.h>
-#include <asm/kregs.h>
-#include <asm/kvm_host.h>
-
-#include "asm-offsets.h"
-
-#define KVM_MINSTATE_START_SAVE_MIN                                            \
-       mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
-       ;;                                                                      \
-       mov.m r28 = ar.rnat;                                                    \
-       addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */      \
-       ;;                                                                      \
-       lfetch.fault.excl.nt1 [r22];                                            \
-       addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
-       mov r23 = ar.bspstore;                  /* save ar.bspstore */          \
-       ;;                                                                      \
-       mov ar.bspstore = r22;                          /* switch to kernel RBS */\
-       ;;                                                                      \
-       mov r18 = ar.bsp;                                                       \
-       mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
-
-
-
-#define KVM_MINSTATE_END_SAVE_MIN                                              \
-       bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
-       ;;
-
-
-#define PAL_VSA_SYNC_READ                                              \
-       /* begin to call pal vps sync_read */                           \
-{.mii;                                                                 \
-       add r25 = VMM_VPD_BASE_OFFSET, r21;                             \
-       nop 0x0;                                                        \
-       mov r24=ip;                                                     \
-       ;;                                                              \
-}                                                                      \
-{.mmb                                                                  \
-       add r24=0x20, r24;                                              \
-       ld8 r25 = [r25];      /* read vpd base */                       \
-       br.cond.sptk kvm_vps_sync_read;         /*call the service*/    \
-       ;;                                                              \
-};                                                                     \
-
-
-#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
-
-/*
- * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- *  psr.ic: off
- *  r31:       contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- *  psr.ic: off
- *   r2 = points to &pt_regs.r16
- *   r8 = contents of ar.ccv
- *   r9 = contents of ar.csd
- *  r10 = contents of ar.ssd
- *  r11 = FPSR_DEFAULT
- *  r12 = kernel sp (kernel virtual address)
- *  r13 = points to current task_struct (kernel virtual address)
- *  p15 = TRUE if psr.i is set in cr.ipsr
- *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- *       preserved
- *
- * Note that psr.ic is NOT turned on by this macro.  This is so that
- * we can pass interruption state as arguments to a handler.
- */
-
-
-#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
-
-#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)                  \
-       KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */       \
-       mov r27 = ar.rsc;         /* M */                       \
-       mov r20 = r1;         /* A */                           \
-       mov r25 = ar.unat;        /* M */                       \
-       mov r29 = cr.ipsr;        /* M */                       \
-       mov r26 = ar.pfs;         /* I */                       \
-       mov r18 = cr.isr;                                       \
-       COVER;              /* B;; (or nothing) */              \
-       ;;                                                      \
-       tbit.z p0,p15 = r29,IA64_PSR_I_BIT;                     \
-       mov r1 = r16;                                           \
-/*     mov r21=r16;    */                                      \
-       /* switch from user to kernel RBS: */                   \
-       ;;                                                      \
-       invala;             /* M */                             \
-       SAVE_IFS;                                               \
-       ;;                                                      \
-       KVM_MINSTATE_START_SAVE_MIN                             \
-       adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */    \
-       adds r16 = PT(CR_IPSR),r1;                              \
-       ;;                                                      \
-       lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;             \
-       st8 [r16] = r29;      /* save cr.ipsr */                \
-       ;;                                                      \
-       lfetch.fault.excl.nt1 [r17];                            \
-       tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;                    \
-       mov r29 = b0                                            \
-       ;;                                                      \
-       adds r16 = PT(R8),r1; /* initialize first base pointer */\
-       adds r17 = PT(R9),r1; /* initialize second base pointer */\
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r8,16;                      \
-.mem.offset 8,0; st8.spill [r17] = r9,16;                      \
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r10,24;                     \
-.mem.offset 8,0; st8.spill [r17] = r11,24;                     \
-       ;;                                                      \
-       mov r9 = cr.iip;         /* M */                        \
-       mov r10 = ar.fpsr;        /* M */                       \
-       ;;                                                      \
-       st8 [r16] = r9,16;    /* save cr.iip */                 \
-       st8 [r17] = r30,16;   /* save cr.ifs */                 \
-       sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */            \
-       ;;                                                      \
-       st8 [r16] = r25,16;   /* save ar.unat */                \
-       st8 [r17] = r26,16;    /* save ar.pfs */                \
-       shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
-       ;;                                                      \
-       st8 [r16] = r27,16;   /* save ar.rsc */                 \
-       st8 [r17] = r28,16;   /* save ar.rnat */                \
-       ;;          /* avoid RAW on r16 & r17 */                \
-       st8 [r16] = r23,16;   /* save ar.bspstore */            \
-       st8 [r17] = r31,16;   /* save predicates */             \
-       ;;                                                      \
-       st8 [r16] = r29,16;   /* save b0 */                     \
-       st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
-.mem.offset 8,0; st8.spill [r17] = r12,16;                     \
-       adds r12 = -16,r1;    /* switch to kernel memory stack */  \
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r13,16;                     \
-.mem.offset 8,0; st8.spill [r17] = r10,16;     /* save ar.fpsr */\
-       mov r13 = r21;   /* establish `current' */              \
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r15,16;                     \
-.mem.offset 8,0; st8.spill [r17] = r14,16;                     \
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r2,16;                      \
-.mem.offset 8,0; st8.spill [r17] = r3,16;                      \
-       adds r2 = VMM_PT_REGS_R16_OFFSET,r1;                    \
-        ;;                                                     \
-       adds r16 = VMM_VCPU_IIPA_OFFSET,r13;                    \
-       adds r17 = VMM_VCPU_ISR_OFFSET,r13;                     \
-       mov r26 = cr.iipa;                                      \
-       mov r27 = cr.isr;                                       \
-       ;;                                                      \
-       st8 [r16] = r26;                                        \
-       st8 [r17] = r27;                                        \
-       ;;                                                      \
-       EXTRA;                                                  \
-       mov r8 = ar.ccv;                                        \
-       mov r9 = ar.csd;                                        \
-       mov r10 = ar.ssd;                                       \
-       movl r11 = FPSR_DEFAULT;   /* L-unit */                 \
-       adds r17 = VMM_VCPU_GP_OFFSET,r13;                      \
-       ;;                                                      \
-       ld8 r1 = [r17];/* establish kernel global pointer */    \
-       ;;                                                      \
-       PAL_VSA_SYNC_READ                                       \
-       KVM_MINSTATE_END_SAVE_MIN
-
-/*
- * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
- *
- * Assumed state upon entry:
- *  psr.ic: on
- *  r2: points to &pt_regs.f6
- *  r3: points to &pt_regs.f7
- *  r8: contents of ar.ccv
- *  r9: contents of ar.csd
- *  r10:       contents of ar.ssd
- *  r11:       FPSR_DEFAULT
- *
- * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
- */
-#define KVM_SAVE_REST                          \
-.mem.offset 0,0; st8.spill [r2] = r16,16;      \
-.mem.offset 8,0; st8.spill [r3] = r17,16;      \
-       ;;                              \
-.mem.offset 0,0; st8.spill [r2] = r18,16;      \
-.mem.offset 8,0; st8.spill [r3] = r19,16;      \
-       ;;                              \
-.mem.offset 0,0; st8.spill [r2] = r20,16;      \
-.mem.offset 8,0; st8.spill [r3] = r21,16;      \
-       mov r18=b6;                     \
-       ;;                              \
-.mem.offset 0,0; st8.spill [r2] = r22,16;      \
-.mem.offset 8,0; st8.spill [r3] = r23,16;      \
-       mov r19 = b7;                           \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r24,16;      \
-.mem.offset 8,0; st8.spill [r3] = r25,16;      \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r26,16;      \
-.mem.offset 8,0; st8.spill [r3] = r27,16;      \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r28,16;      \
-.mem.offset 8,0; st8.spill [r3] = r29,16;      \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r30,16;      \
-.mem.offset 8,0; st8.spill [r3] = r31,32;      \
-       ;;                                      \
-       mov ar.fpsr = r11;                      \
-       st8 [r2] = r8,8;                        \
-       adds r24 = PT(B6)-PT(F7),r3;            \
-       adds r25 = PT(B7)-PT(F7),r3;            \
-       ;;                                      \
-       st8 [r24] = r18,16;       /* b6 */      \
-       st8 [r25] = r19,16;       /* b7 */      \
-       adds r2 = PT(R4)-PT(F6),r2;             \
-       adds r3 = PT(R5)-PT(F7),r3;             \
-       ;;                                      \
-       st8 [r24] = r9; /* ar.csd */            \
-       st8 [r25] = r10;        /* ar.ssd */    \
-       ;;                                      \
-       mov r18 = ar.unat;                      \
-       adds r19 = PT(EML_UNAT)-PT(R4),r2;      \
-       ;;                                      \
-       st8 [r19] = r18; /* eml_unat */         \
-
-
-#define KVM_SAVE_EXTRA                         \
-.mem.offset 0,0; st8.spill [r2] = r4,16;       \
-.mem.offset 8,0; st8.spill [r3] = r5,16;       \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r6,16;       \
-.mem.offset 8,0; st8.spill [r3] = r7;          \
-       ;;                                      \
-       mov r26 = ar.unat;                      \
-       ;;                                      \
-       st8 [r2] = r26;/* eml_unat */           \
-
-#define KVM_SAVE_MIN_WITH_COVER                KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
-#define KVM_SAVE_MIN_WITH_COVER_R19    KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
-#define KVM_SAVE_MIN                   KVM_DO_SAVE_MIN(     , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h

deleted file mode 100644 (file)

index c5f92a9..0000000
--- a/arch/ia64/kvm/lapic.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef __KVM_IA64_LAPIC_H
-#define __KVM_IA64_LAPIC_H
-
-#include <linux/kvm_host.h>
-
-/*
- * vlsapic
- */
-struct kvm_lapic{
-       struct kvm_vcpu *vcpu;
-       uint64_t insvc[4];
-       uint64_t vhpi;
-       uint8_t xtp;
-       uint8_t pal_init_pending;
-       uint8_t pad[2];
-};
-
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
-void kvm_free_lapic(struct kvm_vcpu *vcpu);
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-               int short_hand, int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
-#define kvm_apic_present(x) (true)
-#define kvm_lapic_enabled(x) (true)
-
-#endif
diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S

deleted file mode 100644 (file)

index c04cdbe..0000000
--- a/arch/ia64/kvm/memcpy.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memcpy.S"
diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S

deleted file mode 100644 (file)

index 83c3066..0000000
--- a/arch/ia64/kvm/memset.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memset.S"
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h

deleted file mode 100644 (file)

index dd979e0..0000000
--- a/arch/ia64/kvm/misc.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef __KVM_IA64_MISC_H
-#define __KVM_IA64_MISC_H
-
-#include <linux/kvm_host.h>
-/*
- * misc.h
- *     Copyright (C) 2007, Intel Corporation.
- *     Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-/*
- *Return p2m base address at host side!
- */
-static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
-{
-       return (uint64_t *)(kvm->arch.vm_base +
-                               offsetof(struct kvm_vm_data, kvm_p2m));
-}
-
-static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
-               u64 paddr, u64 mem_flags)
-{
-       uint64_t *pmt_base = kvm_host_get_pmt(kvm);
-       unsigned long pte;
-
-       pte = PAGE_ALIGN(paddr) | mem_flags;
-       pmt_base[gfn] = pte;
-}
-
-/*Function for translating host address to guest address*/
-
-static inline void *to_guest(struct kvm *kvm, void *addr)
-{
-       return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
-                       KVM_VM_DATA_BASE);
-}
-
-/*Function for translating guest address to host address*/
-
-static inline void *to_host(struct kvm *kvm, void *addr)
-{
-       return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
-                       + kvm->arch.vm_base);
-}
-
-/* Get host context of the vcpu */
-static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
-{
-       union context *ctx = &vcpu->arch.host;
-       return to_guest(vcpu->kvm, ctx);
-}
-
-/* Get guest context of the vcpu */
-static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
-{
-       union context *ctx = &vcpu->arch.guest;
-       return  to_guest(vcpu->kvm, ctx);
-}
-
-/* kvm get exit data from gvmm! */
-static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
-{
-       return &vcpu->arch.exit_data;
-}
-
-/*kvm get vcpu ioreq for kvm module!*/
-static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p_ctl_data;
-
-       if (vcpu) {
-               p_ctl_data = kvm_get_exit_data(vcpu);
-               if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
-                       return &p_ctl_data->u.ioreq;
-       }
-
-       return NULL;
-}
-
-#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c

deleted file mode 100644 (file)

index f1e17d3..0000000
--- a/arch/ia64/kvm/mmio.c
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * mmio.c: MMIO emulation components.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
- *
- * Copyright (c) 2007 Intel Corporation  KVM support.
- * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- * Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-
-#include "vcpu.h"
-
-static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
-{
-       VLSAPIC_XTP(v) = val;
-}
-
-/*
- * LSAPIC OFFSET
- */
-#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
-#define PIB_OFST_INTA          0x1E0000
-#define PIB_OFST_XTP           0x1E0008
-
-/*
- * execute write IPI op.
- */
-static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
-                                       uint64_t addr, uint64_t data)
-{
-       struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
-       unsigned long psr;
-
-       local_irq_save(psr);
-
-       p->exit_reason = EXIT_REASON_IPI;
-       p->u.ipi_data.addr.val = addr;
-       p->u.ipi_data.data.val = data;
-       vmm_transition(current_vcpu);
-
-       local_irq_restore(psr);
-
-}
-
-void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
-                       unsigned long length, unsigned long val)
-{
-       addr &= (PIB_SIZE - 1);
-
-       switch (addr) {
-       case PIB_OFST_INTA:
-               panic_vm(v, "Undefined write on PIB INTA\n");
-               break;
-       case PIB_OFST_XTP:
-               if (length == 1) {
-                       vlsapic_write_xtp(v, val);
-               } else {
-                       panic_vm(v, "Undefined write on PIB XTP\n");
-               }
-               break;
-       default:
-               if (PIB_LOW_HALF(addr)) {
-                       /*Lower half */
-                       if (length != 8)
-                               panic_vm(v, "Can't LHF write with size %ld!\n",
-                                               length);
-                       else
-                               vlsapic_write_ipi(v, addr, val);
-               } else {   /*Upper half */
-                       panic_vm(v, "IPI-UHF write %lx\n", addr);
-               }
-               break;
-       }
-}
-
-unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
-               unsigned long length)
-{
-       uint64_t result = 0;
-
-       addr &= (PIB_SIZE - 1);
-
-       switch (addr) {
-       case PIB_OFST_INTA:
-               if (length == 1) /* 1 byte load */
-                       ; /* There is no i8259, there is no INTA access*/
-               else
-                       panic_vm(v, "Undefined read on PIB INTA\n");
-
-               break;
-       case PIB_OFST_XTP:
-               if (length == 1) {
-                       result = VLSAPIC_XTP(v);
-               } else {
-                       panic_vm(v, "Undefined read on PIB XTP\n");
-               }
-               break;
-       default:
-               panic_vm(v, "Undefined addr access for lsapic!\n");
-               break;
-       }
-       return result;
-}
-
-static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
-                                       u16 s, int ma, int dir)
-{
-       unsigned long iot;
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       unsigned long psr;
-
-       iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
-
-       local_irq_save(psr);
-
-       /*Intercept the access for PIB range*/
-       if (iot == GPFN_PIB) {
-               if (!dir)
-                       lsapic_write(vcpu, src_pa, s, *dest);
-               else
-                       *dest = lsapic_read(vcpu, src_pa, s);
-               goto out;
-       }
-       p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
-       p->u.ioreq.addr = src_pa;
-       p->u.ioreq.size = s;
-       p->u.ioreq.dir = dir;
-       if (dir == IOREQ_WRITE)
-               p->u.ioreq.data = *dest;
-       p->u.ioreq.state = STATE_IOREQ_READY;
-       vmm_transition(vcpu);
-
-       if (p->u.ioreq.state == STATE_IORESP_READY) {
-               if (dir == IOREQ_READ)
-                       /* it's necessary to ensure zero extending */
-                       *dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
-       } else
-               panic_vm(vcpu, "Unhandled mmio access returned!\n");
-out:
-       local_irq_restore(psr);
-       return ;
-}
-
-/*
-   dir 1: read 0:write
-   inst_type 0:integer 1:floating point
- */
-#define SL_INTEGER     0       /* store/load interger*/
-#define SL_FLOATING    1       /* store/load floating*/
-
-void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
-{
-       struct kvm_pt_regs *regs;
-       IA64_BUNDLE bundle;
-       int slot, dir = 0;
-       int inst_type = -1;
-       u16 size = 0;
-       u64 data, slot1a, slot1b, temp, update_reg;
-       s32 imm;
-       INST64 inst;
-
-       regs = vcpu_regs(vcpu);
-
-       if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
-               /* if fetch code fail, return and try again */
-               return;
-       }
-       slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
-       if (!slot)
-               inst.inst = bundle.slot0;
-       else if (slot == 1) {
-               slot1a = bundle.slot1a;
-               slot1b = bundle.slot1b;
-               inst.inst = slot1a + (slot1b << 18);
-       } else if (slot == 2)
-               inst.inst = bundle.slot2;
-
-       /* Integer Load/Store */
-       if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
-               inst_type = SL_INTEGER;
-               size = (inst.M1.x6 & 0x3);
-               if ((inst.M1.x6 >> 2) > 0xb) {
-                       /*write*/
-                       dir = IOREQ_WRITE;
-                       data = vcpu_get_gr(vcpu, inst.M4.r2);
-               } else if ((inst.M1.x6 >> 2) < 0xb) {
-                       /*read*/
-                       dir = IOREQ_READ;
-               }
-       } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
-               /* Integer Load + Reg update */
-               inst_type = SL_INTEGER;
-               dir = IOREQ_READ;
-               size = (inst.M2.x6 & 0x3);
-               temp = vcpu_get_gr(vcpu, inst.M2.r3);
-               update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
-               temp += update_reg;
-               vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
-       } else if (inst.M3.major == 5) {
-               /*Integer Load/Store + Imm update*/
-               inst_type = SL_INTEGER;
-               size = (inst.M3.x6&0x3);
-               if ((inst.M5.x6 >> 2) > 0xb) {
-                       /*write*/
-                       dir = IOREQ_WRITE;
-                       data = vcpu_get_gr(vcpu, inst.M5.r2);
-                       temp = vcpu_get_gr(vcpu, inst.M5.r3);
-                       imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
-                               (inst.M5.imm7 << 23);
-                       temp += imm >> 23;
-                       vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
-
-               } else if ((inst.M3.x6 >> 2) < 0xb) {
-                       /*read*/
-                       dir = IOREQ_READ;
-                       temp = vcpu_get_gr(vcpu, inst.M3.r3);
-                       imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
-                               (inst.M3.imm7 << 23);
-                       temp += imm >> 23;
-                       vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
-
-               }
-       } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
-                               && inst.M9.m == 0 && inst.M9.x == 0) {
-               /* Floating-point spill*/
-               struct ia64_fpreg v;
-
-               inst_type = SL_FLOATING;
-               dir = IOREQ_WRITE;
-               vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
-               /* Write high word. FIXME: this is a kludge!  */
-               v.u.bits[1] &= 0x3ffff;
-               mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
-                           ma, IOREQ_WRITE);
-               data = v.u.bits[0];
-               size = 3;
-       } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
-               /* Floating-point spill + Imm update */
-               struct ia64_fpreg v;
-
-               inst_type = SL_FLOATING;
-               dir = IOREQ_WRITE;
-               vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-               temp = vcpu_get_gr(vcpu, inst.M10.r3);
-               imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-                       (inst.M10.imm7 << 23);
-               temp += imm >> 23;
-               vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-
-               /* Write high word.FIXME: this is a kludge!  */
-               v.u.bits[1] &= 0x3ffff;
-               mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
-                           8, ma, IOREQ_WRITE);
-               data = v.u.bits[0];
-               size = 3;
-       } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
-               /* Floating-point stf8 + Imm update */
-               struct ia64_fpreg v;
-               inst_type = SL_FLOATING;
-               dir = IOREQ_WRITE;
-               size = 3;
-               vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-               data = v.u.bits[0]; /* Significand.  */
-               temp = vcpu_get_gr(vcpu, inst.M10.r3);
-               imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-                       (inst.M10.imm7 << 23);
-               temp += imm >> 23;
-               vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-       } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
-                       && inst.M15.x6 <= 0x2f) {
-               temp = vcpu_get_gr(vcpu, inst.M15.r3);
-               imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
-                       (inst.M15.imm7 << 23);
-               temp += imm >> 23;
-               vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
-
-               vcpu_increment_iip(vcpu);
-               return;
-       } else if (inst.M12.major == 6 && inst.M12.m == 1
-                       && inst.M12.x == 1 && inst.M12.x6 == 1) {
-               /* Floating-point Load Pair + Imm ldfp8 M12*/
-               struct ia64_fpreg v;
-
-               inst_type = SL_FLOATING;
-               dir = IOREQ_READ;
-               size = 8;     /*ldfd*/
-               mmio_access(vcpu, padr, &data, size, ma, dir);
-               v.u.bits[0] = data;
-               v.u.bits[1] = 0x1003E;
-               vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
-               padr += 8;
-               mmio_access(vcpu, padr, &data, size, ma, dir);
-               v.u.bits[0] = data;
-               v.u.bits[1] = 0x1003E;
-               vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
-               padr += 8;
-               vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
-               vcpu_increment_iip(vcpu);
-               return;
-       } else {
-               inst_type = -1;
-               panic_vm(vcpu, "Unsupported MMIO access instruction! "
-                               "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
-                               bundle.i64[0], bundle.i64[1]);
-       }
-
-       size = 1 << size;
-       if (dir == IOREQ_WRITE) {
-               mmio_access(vcpu, padr, &data, size, ma, dir);
-       } else {
-               mmio_access(vcpu, padr, &data, size, ma, dir);
-               if (inst_type == SL_INTEGER)
-                       vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
-               else
-                       panic_vm(vcpu, "Unsupported instruction type!\n");
-
-       }
-       vcpu_increment_iip(vcpu);
-}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S

deleted file mode 100644 (file)

index f793be3..0000000
--- a/arch/ia64/kvm/optvfault.S
+++ /dev/null
@@ -1,1090 +0,0 @@
-/*
- * arch/ia64/kvm/optvfault.S
- * optimize virtualization fault handler
- *
- * Copyright (C) 2006 Intel Co
- *     Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- * Copyright (C) 2008 Intel Co
- *      Add the support for Tukwila processors.
- *     Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/kvm_host.h>
-
-#include "vti.h"
-#include "asm-offsets.h"
-
-#define ACCE_MOV_FROM_AR
-#define ACCE_MOV_FROM_RR
-#define ACCE_MOV_TO_RR
-#define ACCE_RSM
-#define ACCE_SSM
-#define ACCE_MOV_TO_PSR
-#define ACCE_THASH
-
-#define VMX_VPS_SYNC_READ                      \
-       add r16=VMM_VPD_BASE_OFFSET,r21;        \
-       mov r17 = b0;                           \
-       mov r18 = r24;                          \
-       mov r19 = r25;                          \
-       mov r20 = r31;                          \
-       ;;                                      \
-{.mii;                                         \
-       ld8 r16 = [r16];                        \
-       nop 0x0;                                \
-       mov r24 = ip;                           \
-       ;;                                      \
-};                                             \
-{.mmb;                                         \
-       add r24=0x20, r24;                      \
-       mov r25 =r16;                           \
-       br.sptk.many kvm_vps_sync_read;         \
-};                                             \
-       mov b0 = r17;                           \
-       mov r24 = r18;                          \
-       mov r25 = r19;                          \
-       mov r31 = r20
-
-ENTRY(kvm_vps_entry)
-       adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
-       ;;
-       ld8 r29 = [r29]
-       ;;
-       add r29 = r29, r30
-       ;;
-       mov b0 = r29
-       br.sptk.many b0
-END(kvm_vps_entry)
-
-/*
- *     Inputs:
- *     r24 : return address
- *     r25 : vpd
- *     r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_read)
-       movl r30 = PAL_VPS_SYNC_READ
-       ;;
-       br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_read)
-
-/*
- *     Inputs:
- *     r24 : return address
- *     r25 : vpd
- *     r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_write)
-       movl r30 = PAL_VPS_SYNC_WRITE
-       ;;
-       br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_write)
-
-/*
- *     Inputs:
- *     r23 : pr
- *     r24 : guest b0
- *     r25 : vpd
- *
- */
-GLOBAL_ENTRY(kvm_vps_resume_normal)
-       movl r30 = PAL_VPS_RESUME_NORMAL
-       ;;
-       mov pr=r23,-2
-       br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_normal)
-
-/*
- *     Inputs:
- *     r23 : pr
- *     r24 : guest b0
- *     r25 : vpd
- *     r17 : isr
- */
-GLOBAL_ENTRY(kvm_vps_resume_handler)
-       movl r30 = PAL_VPS_RESUME_HANDLER
-       ;;
-       ld8 r26=[r25]
-       shr r17=r17,IA64_ISR_IR_BIT
-       ;;
-       dep r26=r17,r26,63,1   // bit 63 of r26 indicate whether enable CFLE
-       mov pr=r23,-2
-       br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_handler)
-
-//mov r1=ar3
-GLOBAL_ENTRY(kvm_asm_mov_from_ar)
-#ifndef ACCE_MOV_FROM_AR
-       br.many kvm_virtualization_fault_back
-#endif
-       add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-       add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-       extr.u r17=r25,6,7
-       ;;
-       ld8 r18=[r18]
-       mov r19=ar.itc
-       mov r24=b0
-       ;;
-       add r19=r19,r18
-       addl r20=@gprel(asm_mov_to_reg),gp
-       ;;
-       st8 [r16] = r19
-       adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-       shladd r17=r17,4,r20
-       ;;
-       mov b0=r17
-       br.sptk.few b0
-       ;;
-END(kvm_asm_mov_from_ar)
-
-/*
- * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
- * clock as it's source for emulating the ITC. This version will be
- * copied on top of the original version if the host is determined to
- * be an SN2.
- */
-GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
-       add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-       movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
-
-       add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-       extr.u r17=r25,6,7
-       mov r24=b0
-       ;;
-       ld8 r18=[r18]
-       ld8 r19=[r19]
-       addl r20=@gprel(asm_mov_to_reg),gp
-       ;;
-       add r19=r19,r18
-       shladd r17=r17,4,r20
-       ;;
-       adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-       st8 [r16] = r19
-       mov b0=r17
-       br.sptk.few b0
-       ;;
-END(kvm_asm_mov_from_ar_sn2)
-
-
-
-// mov r1=rr[r3]
-GLOBAL_ENTRY(kvm_asm_mov_from_rr)
-#ifndef ACCE_MOV_FROM_RR
-       br.many kvm_virtualization_fault_back
-#endif
-       extr.u r16=r25,20,7
-       extr.u r17=r25,6,7
-       addl r20=@gprel(asm_mov_from_reg),gp
-       ;;
-       adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
-       shladd r16=r16,4,r20
-       mov r24=b0
-       ;;
-       add r27=VMM_VCPU_VRR0_OFFSET,r21
-       mov b0=r16
-       br.many b0
-       ;;
-kvm_asm_mov_from_rr_back_1:
-       adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-       adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
-       shr.u r26=r19,61
-       ;;
-       shladd r17=r17,4,r22
-       shladd r27=r26,3,r27
-       ;;
-       ld8 r19=[r27]
-       mov b0=r17
-       br.many b0
-END(kvm_asm_mov_from_rr)
-
-
-// mov rr[r3]=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_rr)
-#ifndef ACCE_MOV_TO_RR
-       br.many kvm_virtualization_fault_back
-#endif
-       extr.u r16=r25,20,7
-       extr.u r17=r25,13,7
-       addl r20=@gprel(asm_mov_from_reg),gp
-       ;;
-       adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
-       shladd r16=r16,4,r20
-       mov r22=b0
-       ;;
-       add r27=VMM_VCPU_VRR0_OFFSET,r21
-       mov b0=r16
-       br.many b0
-       ;;
-kvm_asm_mov_to_rr_back_1:
-       adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
-       shr.u r23=r19,61
-       shladd r17=r17,4,r20
-       ;;
-       //if rr6, go back
-       cmp.eq p6,p0=6,r23
-       mov b0=r22
-       (p6) br.cond.dpnt.many kvm_virtualization_fault_back
-       ;;
-       mov r28=r19
-       mov b0=r17
-       br.many b0
-kvm_asm_mov_to_rr_back_2:
-       adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-       shladd r27=r23,3,r27
-       ;; // vrr.rid<<4 |0xe
-       st8 [r27]=r19
-       mov b0=r30
-       ;;
-       extr.u r16=r19,8,26
-       extr.u r18 =r19,2,6
-       mov r17 =0xe
-       ;;
-       shladd r16 = r16, 4, r17
-       extr.u r19 =r19,0,8
-       ;;
-       shl r16 = r16,8
-       ;;
-       add r19 = r19, r16
-       ;; //set ve 1
-       dep r19=-1,r19,0,1
-       cmp.lt p6,p0=14,r18
-       ;;
-       (p6) mov r18=14
-       ;;
-       (p6) dep r19=r18,r19,2,6
-       ;;
-       cmp.eq p6,p0=0,r23
-       ;;
-       cmp.eq.or p6,p0=4,r23
-       ;;
-       adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-       (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-       ;;
-       ld4 r16=[r16]
-       cmp.eq p7,p0=r0,r0
-       (p6) shladd r17=r23,1,r17
-       ;;
-       (p6) st8 [r17]=r19
-       (p6) tbit.nz p6,p7=r16,0
-       ;;
-       (p7) mov rr[r28]=r19
-       mov r24=r22
-       br.many b0
-END(kvm_asm_mov_to_rr)
-
-
-//rsm
-GLOBAL_ENTRY(kvm_asm_rsm)
-#ifndef ACCE_RSM
-       br.many kvm_virtualization_fault_back
-#endif
-       VMX_VPS_SYNC_READ
-       ;;
-       extr.u r26=r25,6,21
-       extr.u r27=r25,31,2
-       ;;
-       extr.u r28=r25,36,1
-       dep r26=r27,r26,21,2
-       ;;
-       add r17=VPD_VPSR_START_OFFSET,r16
-       add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-       //r26 is imm24
-       dep r26=r28,r26,23,1
-       ;;
-       ld8 r18=[r17]
-       movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
-       ld4 r23=[r22]
-       sub r27=-1,r26
-       mov r24=b0
-       ;;
-       mov r20=cr.ipsr
-       or r28=r27,r28
-       and r19=r18,r27
-       ;;
-       st8 [r17]=r19
-       and r20=r20,r28
-       /* Comment it out due to short of fp lazy alorgithm support
-       adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-       ;;
-       ld8 r27=[r27]
-       ;;
-       tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
-       ;;
-       (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-       */
-       ;;
-       mov cr.ipsr=r20
-       tbit.nz p6,p0=r23,0
-       ;;
-       tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
-       (p6) br.dptk kvm_resume_to_guest_with_sync
-       ;;
-       add r26=VMM_VCPU_META_RR0_OFFSET,r21
-       add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-       dep r23=-1,r23,0,1
-       ;;
-       ld8 r26=[r26]
-       ld8 r27=[r27]
-       st4 [r22]=r23
-       dep.z r28=4,61,3
-       ;;
-       mov rr[r0]=r26
-       ;;
-       mov rr[r28]=r27
-       ;;
-       srlz.d
-       br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_rsm)
-
-
-//ssm
-GLOBAL_ENTRY(kvm_asm_ssm)
-#ifndef ACCE_SSM
-       br.many kvm_virtualization_fault_back
-#endif
-       VMX_VPS_SYNC_READ
-       ;;
-       extr.u r26=r25,6,21
-       extr.u r27=r25,31,2
-       ;;
-       extr.u r28=r25,36,1
-       dep r26=r27,r26,21,2
-       ;;  //r26 is imm24
-       add r27=VPD_VPSR_START_OFFSET,r16
-       dep r26=r28,r26,23,1
-       ;;  //r19 vpsr
-       ld8 r29=[r27]
-       mov r24=b0
-       ;;
-       add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-       mov r20=cr.ipsr
-       or r19=r29,r26
-       ;;
-       ld4 r23=[r22]
-       st8 [r27]=r19
-       or r20=r20,r26
-       ;;
-       mov cr.ipsr=r20
-       movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-       ;;
-       and r19=r28,r19
-       tbit.z p6,p0=r23,0
-       ;;
-       cmp.ne.or p6,p0=r28,r19
-       (p6) br.dptk kvm_asm_ssm_1
-       ;;
-       add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-       add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-       dep r23=0,r23,0,1
-       ;;
-       ld8 r26=[r26]
-       ld8 r27=[r27]
-       st4 [r22]=r23
-       dep.z r28=4,61,3
-       ;;
-       mov rr[r0]=r26
-       ;;
-       mov rr[r28]=r27
-       ;;
-       srlz.d
-       ;;
-kvm_asm_ssm_1:
-       tbit.nz p6,p0=r29,IA64_PSR_I_BIT
-       ;;
-       tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
-       (p6) br.dptk kvm_resume_to_guest_with_sync
-       ;;
-       add r29=VPD_VTPR_START_OFFSET,r16
-       add r30=VPD_VHPI_START_OFFSET,r16
-       ;;
-       ld8 r29=[r29]
-       ld8 r30=[r30]
-       ;;
-       extr.u r17=r29,4,4
-       extr.u r18=r29,16,1
-       ;;
-       dep r17=r18,r17,4,1
-       ;;
-       cmp.gt p6,p0=r30,r17
-       (p6) br.dpnt.few kvm_asm_dispatch_vexirq
-       br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_ssm)
-
-
-//mov psr.l=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_psr)
-#ifndef ACCE_MOV_TO_PSR
-       br.many kvm_virtualization_fault_back
-#endif
-       VMX_VPS_SYNC_READ
-       ;;
-       extr.u r26=r25,13,7 //r2
-       addl r20=@gprel(asm_mov_from_reg),gp
-       ;;
-       adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
-       shladd r26=r26,4,r20
-       mov r24=b0
-       ;;
-       add r27=VPD_VPSR_START_OFFSET,r16
-       mov b0=r26
-       br.many b0
-       ;;
-kvm_asm_mov_to_psr_back:
-       ld8 r17=[r27]
-       add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-       dep r19=0,r19,32,32
-       ;;
-       ld4 r23=[r22]
-       dep r18=0,r17,0,32
-       ;;
-       add r30=r18,r19
-       movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-       ;;
-       st8 [r27]=r30
-       and r27=r28,r30
-       and r29=r28,r17
-       ;;
-       cmp.eq p5,p0=r29,r27
-       cmp.eq p6,p7=r28,r27
-       (p5) br.many kvm_asm_mov_to_psr_1
-       ;;
-       //virtual to physical
-       (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
-       (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-       (p7) dep r23=-1,r23,0,1
-       ;;
-       //physical to virtual
-       (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-       (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-       (p6) dep r23=0,r23,0,1
-       ;;
-       ld8 r26=[r26]
-       ld8 r27=[r27]
-       st4 [r22]=r23
-       dep.z r28=4,61,3
-       ;;
-       mov rr[r0]=r26
-       ;;
-       mov rr[r28]=r27
-       ;;
-       srlz.d
-       ;;
-kvm_asm_mov_to_psr_1:
-       mov r20=cr.ipsr
-       movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
-       ;;
-       or r19=r19,r28
-       dep r20=0,r20,0,32
-       ;;
-       add r20=r19,r20
-       mov b0=r24
-       ;;
-       /* Comment it out due to short of fp lazy algorithm support
-       adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-       ;;
-       ld8 r27=[r27]
-       ;;
-       tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
-       ;;
-       (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-       ;;
-       */
-       mov cr.ipsr=r20
-       cmp.ne p6,p0=r0,r0
-       ;;
-       tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
-       tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
-       (p6) br.dpnt.few kvm_resume_to_guest_with_sync
-       ;;
-       add r29=VPD_VTPR_START_OFFSET,r16
-       add r30=VPD_VHPI_START_OFFSET,r16
-       ;;
-       ld8 r29=[r29]
-       ld8 r30=[r30]
-       ;;
-       extr.u r17=r29,4,4
-       extr.u r18=r29,16,1
-       ;;
-       dep r17=r18,r17,4,1
-       ;;
-       cmp.gt p6,p0=r30,r17
-       (p6) br.dpnt.few kvm_asm_dispatch_vexirq
-       br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_mov_to_psr)
-
-
-ENTRY(kvm_asm_dispatch_vexirq)
-//increment iip
-       mov r17 = b0
-       mov r18 = r31
-{.mii
-       add r25=VMM_VPD_BASE_OFFSET,r21
-       nop 0x0
-       mov r24 = ip
-       ;;
-}
-{.mmb
-       add r24 = 0x20, r24
-       ld8 r25 = [r25]
-       br.sptk.many kvm_vps_sync_write
-}
-       mov b0 =r17
-       mov r16=cr.ipsr
-       mov r31 = r18
-       mov r19 = 37
-       ;;
-       extr.u r17=r16,IA64_PSR_RI_BIT,2
-       tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-       ;;
-       (p6) mov r18=cr.iip
-       (p6) mov r17=r0
-       (p7) add r17=1,r17
-       ;;
-       (p6) add r18=0x10,r18
-       dep r16=r17,r16,IA64_PSR_RI_BIT,2
-       ;;
-       (p6) mov cr.iip=r18
-       mov cr.ipsr=r16
-       mov r30 =1
-       br.many kvm_dispatch_vexirq
-END(kvm_asm_dispatch_vexirq)
-
-// thash
-// TODO: add support when pta.vf = 1
-GLOBAL_ENTRY(kvm_asm_thash)
-#ifndef ACCE_THASH
-       br.many kvm_virtualization_fault_back
-#endif
-       extr.u r17=r25,20,7             // get r3 from opcode in r25
-       extr.u r18=r25,6,7              // get r1 from opcode in r25
-       addl r20=@gprel(asm_mov_from_reg),gp
-       ;;
-       adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
-       shladd r17=r17,4,r20    // get addr of MOVE_FROM_REG(r17)
-       adds r16=VMM_VPD_BASE_OFFSET,r21        // get vcpu.arch.priveregs
-       ;;
-       mov r24=b0
-       ;;
-       ld8 r16=[r16]           // get VPD addr
-       mov b0=r17
-       br.many b0                      // r19 return value
-       ;;
-kvm_asm_thash_back1:
-       shr.u r23=r19,61                // get RR number
-       adds r28=VMM_VCPU_VRR0_OFFSET,r21       // get vcpu->arch.vrr[0]'s addr
-       adds r16=VMM_VPD_VPTA_OFFSET,r16        // get vpta
-       ;;
-       shladd r27=r23,3,r28    // get vcpu->arch.vrr[r23]'s addr
-       ld8 r17=[r16]           // get PTA
-       mov r26=1
-       ;;
-       extr.u r29=r17,2,6      // get pta.size
-       ld8 r28=[r27]           // get vcpu->arch.vrr[r23]'s value
-       ;;
-       mov b0=r24
-       //Fallback to C if pta.vf is set
-       tbit.nz p6,p0=r17, 8
-       ;;
-       (p6) mov r24=EVENT_THASH
-       (p6) br.cond.dpnt.many kvm_virtualization_fault_back
-       extr.u r28=r28,2,6      // get rr.ps
-       shl r22=r26,r29         // 1UL << pta.size
-       ;;
-       shr.u r23=r19,r28       // vaddr >> rr.ps
-       adds r26=3,r29          // pta.size + 3
-       shl r27=r17,3           // pta << 3
-       ;;
-       shl r23=r23,3           // (vaddr >> rr.ps) << 3
-       shr.u r27=r27,r26       // (pta << 3) >> (pta.size+3)
-       movl r16=7<<61
-       ;;
-       adds r22=-1,r22         // (1UL << pta.size) - 1
-       shl r27=r27,r29         // ((pta<<3)>>(pta.size+3))<<pta.size
-       and r19=r19,r16         // vaddr & VRN_MASK
-       ;;
-       and r22=r22,r23         // vhpt_offset
-       or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
-       adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
-       ;;
-       or r19=r19,r22          // calc pval
-       shladd r17=r18,4,r26
-       adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-       ;;
-       mov b0=r17
-       br.many b0
-END(kvm_asm_thash)
-
-#define MOV_TO_REG0    \
-{;                     \
-       nop.b 0x0;              \
-       nop.b 0x0;              \
-       nop.b 0x0;              \
-       ;;                      \
-};
-
-
-#define MOV_TO_REG(n)  \
-{;                     \
-       mov r##n##=r19; \
-       mov b0=r30;     \
-       br.sptk.many b0;        \
-       ;;                      \
-};
-
-
-#define MOV_FROM_REG(n)        \
-{;                             \
-       mov r19=r##n##;         \
-       mov b0=r30;             \
-       br.sptk.many b0;                \
-       ;;                              \
-};
-
-
-#define MOV_TO_BANK0_REG(n)                    \
-ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);    \
-{;                                             \
-       mov r26=r2;                             \
-       mov r2=r19;                             \
-       bsw.1;                                  \
-       ;;                                              \
-};                                             \
-{;                                             \
-       mov r##n##=r2;                          \
-       nop.b 0x0;                                      \
-       bsw.0;                                  \
-       ;;                                              \
-};                                             \
-{;                                             \
-       mov r2=r26;                             \
-       mov b0=r30;                             \
-       br.sptk.many b0;                                \
-       ;;                                              \
-};                                             \
-END(asm_mov_to_bank0_reg##n##)
-
-
-#define MOV_FROM_BANK0_REG(n)                  \
-ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);  \
-{;                                             \
-       mov r26=r2;                             \
-       nop.b 0x0;                                      \
-       bsw.1;                                  \
-       ;;                                              \
-};                                             \
-{;                                             \
-       mov r2=r##n##;                          \
-       nop.b 0x0;                                      \
-       bsw.0;                                  \
-       ;;                                              \
-};                                             \
-{;                                             \
-       mov r19=r2;                             \
-       mov r2=r26;                             \
-       mov b0=r30;                             \
-};                                             \
-{;                                             \
-       nop.b 0x0;                                      \
-       nop.b 0x0;                                      \
-       br.sptk.many b0;                                \
-       ;;                                              \
-};                                             \
-END(asm_mov_from_bank0_reg##n##)
-
-
-#define JMP_TO_MOV_TO_BANK0_REG(n)             \
-{;                                             \
-       nop.b 0x0;                                      \
-       nop.b 0x0;                                      \
-       br.sptk.many asm_mov_to_bank0_reg##n##; \
-       ;;                                              \
-}
-
-
-#define JMP_TO_MOV_FROM_BANK0_REG(n)           \
-{;                                             \
-       nop.b 0x0;                                      \
-       nop.b 0x0;                                      \
-       br.sptk.many asm_mov_from_bank0_reg##n##;       \
-       ;;                                              \
-}
-
-
-MOV_FROM_BANK0_REG(16)
-MOV_FROM_BANK0_REG(17)
-MOV_FROM_BANK0_REG(18)
-MOV_FROM_BANK0_REG(19)
-MOV_FROM_BANK0_REG(20)
-MOV_FROM_BANK0_REG(21)
-MOV_FROM_BANK0_REG(22)
-MOV_FROM_BANK0_REG(23)
-MOV_FROM_BANK0_REG(24)
-MOV_FROM_BANK0_REG(25)
-MOV_FROM_BANK0_REG(26)
-MOV_FROM_BANK0_REG(27)
-MOV_FROM_BANK0_REG(28)
-MOV_FROM_BANK0_REG(29)
-MOV_FROM_BANK0_REG(30)
-MOV_FROM_BANK0_REG(31)
-
-
-// mov from reg table
-ENTRY(asm_mov_from_reg)
-       MOV_FROM_REG(0)
-       MOV_FROM_REG(1)
-       MOV_FROM_REG(2)
-       MOV_FROM_REG(3)
-       MOV_FROM_REG(4)
-       MOV_FROM_REG(5)
-       MOV_FROM_REG(6)
-       MOV_FROM_REG(7)
-       MOV_FROM_REG(8)
-       MOV_FROM_REG(9)
-       MOV_FROM_REG(10)
-       MOV_FROM_REG(11)
-       MOV_FROM_REG(12)
-       MOV_FROM_REG(13)
-       MOV_FROM_REG(14)
-       MOV_FROM_REG(15)
-       JMP_TO_MOV_FROM_BANK0_REG(16)
-       JMP_TO_MOV_FROM_BANK0_REG(17)
-       JMP_TO_MOV_FROM_BANK0_REG(18)
-       JMP_TO_MOV_FROM_BANK0_REG(19)
-       JMP_TO_MOV_FROM_BANK0_REG(20)
-       JMP_TO_MOV_FROM_BANK0_REG(21)
-       JMP_TO_MOV_FROM_BANK0_REG(22)
-       JMP_TO_MOV_FROM_BANK0_REG(23)
-       JMP_TO_MOV_FROM_BANK0_REG(24)
-       JMP_TO_MOV_FROM_BANK0_REG(25)
-       JMP_TO_MOV_FROM_BANK0_REG(26)
-       JMP_TO_MOV_FROM_BANK0_REG(27)
-       JMP_TO_MOV_FROM_BANK0_REG(28)
-       JMP_TO_MOV_FROM_BANK0_REG(29)
-       JMP_TO_MOV_FROM_BANK0_REG(30)
-       JMP_TO_MOV_FROM_BANK0_REG(31)
-       MOV_FROM_REG(32)
-       MOV_FROM_REG(33)
-       MOV_FROM_REG(34)
-       MOV_FROM_REG(35)
-       MOV_FROM_REG(36)
-       MOV_FROM_REG(37)
-       MOV_FROM_REG(38)
-       MOV_FROM_REG(39)
-       MOV_FROM_REG(40)
-       MOV_FROM_REG(41)
-       MOV_FROM_REG(42)
-       MOV_FROM_REG(43)
-       MOV_FROM_REG(44)
-       MOV_FROM_REG(45)
-       MOV_FROM_REG(46)
-       MOV_FROM_REG(47)
-       MOV_FROM_REG(48)
-       MOV_FROM_REG(49)
-       MOV_FROM_REG(50)
-       MOV_FROM_REG(51)
-       MOV_FROM_REG(52)
-       MOV_FROM_REG(53)
-       MOV_FROM_REG(54)
-       MOV_FROM_REG(55)
-       MOV_FROM_REG(56)
-       MOV_FROM_REG(57)
-       MOV_FROM_REG(58)
-       MOV_FROM_REG(59)
-       MOV_FROM_REG(60)
-       MOV_FROM_REG(61)
-       MOV_FROM_REG(62)
-       MOV_FROM_REG(63)
-       MOV_FROM_REG(64)
-       MOV_FROM_REG(65)
-       MOV_FROM_REG(66)
-       MOV_FROM_REG(67)
-       MOV_FROM_REG(68)
-       MOV_FROM_REG(69)
-       MOV_FROM_REG(70)
-       MOV_FROM_REG(71)
-       MOV_FROM_REG(72)
-       MOV_FROM_REG(73)
-       MOV_FROM_REG(74)
-       MOV_FROM_REG(75)
-       MOV_FROM_REG(76)
-       MOV_FROM_REG(77)
-       MOV_FROM_REG(78)
-       MOV_FROM_REG(79)
-       MOV_FROM_REG(80)
-       MOV_FROM_REG(81)
-       MOV_FROM_REG(82)
-       MOV_FROM_REG(83)
-       MOV_FROM_REG(84)
-       MOV_FROM_REG(85)
-       MOV_FROM_REG(86)
-       MOV_FROM_REG(87)
-       MOV_FROM_REG(88)
-       MOV_FROM_REG(89)
-       MOV_FROM_REG(90)
-       MOV_FROM_REG(91)
-       MOV_FROM_REG(92)
-       MOV_FROM_REG(93)
-       MOV_FROM_REG(94)
-       MOV_FROM_REG(95)
-       MOV_FROM_REG(96)
-       MOV_FROM_REG(97)
-       MOV_FROM_REG(98)
-       MOV_FROM_REG(99)
-       MOV_FROM_REG(100)
-       MOV_FROM_REG(101)
-       MOV_FROM_REG(102)
-       MOV_FROM_REG(103)
-       MOV_FROM_REG(104)
-       MOV_FROM_REG(105)
-       MOV_FROM_REG(106)
-       MOV_FROM_REG(107)
-       MOV_FROM_REG(108)
-       MOV_FROM_REG(109)
-       MOV_FROM_REG(110)
-       MOV_FROM_REG(111)
-       MOV_FROM_REG(112)
-       MOV_FROM_REG(113)
-       MOV_FROM_REG(114)
-       MOV_FROM_REG(115)
-       MOV_FROM_REG(116)
-       MOV_FROM_REG(117)
-       MOV_FROM_REG(118)
-       MOV_FROM_REG(119)
-       MOV_FROM_REG(120)
-       MOV_FROM_REG(121)
-       MOV_FROM_REG(122)
-       MOV_FROM_REG(123)
-       MOV_FROM_REG(124)
-       MOV_FROM_REG(125)
-       MOV_FROM_REG(126)
-       MOV_FROM_REG(127)
-END(asm_mov_from_reg)
-
-
-/* must be in bank 0
- * parameter:
- * r31: pr
- * r24: b0
- */
-ENTRY(kvm_resume_to_guest_with_sync)
-       adds r19=VMM_VPD_BASE_OFFSET,r21
-       mov r16 = r31
-       mov r17 = r24
-       ;;
-{.mii
-       ld8 r25 =[r19]
-       nop 0x0
-       mov r24 = ip
-       ;;
-}
-{.mmb
-       add r24 =0x20, r24
-       nop 0x0
-       br.sptk.many kvm_vps_sync_write
-}
-
-       mov r31 = r16
-       mov r24 =r17
-       ;;
-       br.sptk.many kvm_resume_to_guest
-END(kvm_resume_to_guest_with_sync)
-
-ENTRY(kvm_resume_to_guest)
-       adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-       ;;
-       ld8 r1 =[r16]
-       adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
-       ;;
-       mov r16=cr.ipsr
-       ;;
-       ld8 r20 = [r20]
-       adds r19=VMM_VPD_BASE_OFFSET,r21
-       ;;
-       ld8 r25=[r19]
-       extr.u r17=r16,IA64_PSR_RI_BIT,2
-       tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-       ;;
-       (p6) mov r18=cr.iip
-       (p6) mov r17=r0
-       ;;
-       (p6) add r18=0x10,r18
-       (p7) add r17=1,r17
-       ;;
-       (p6) mov cr.iip=r18
-       dep r16=r17,r16,IA64_PSR_RI_BIT,2
-       ;;
-       mov cr.ipsr=r16
-       adds r19= VPD_VPSR_START_OFFSET,r25
-       add r28=PAL_VPS_RESUME_NORMAL,r20
-       add r29=PAL_VPS_RESUME_HANDLER,r20
-       ;;
-       ld8 r19=[r19]
-       mov b0=r29
-       mov r27=cr.isr
-       ;;
-       tbit.z p6,p7 = r19,IA64_PSR_IC_BIT              // p7=vpsr.ic
-       shr r27=r27,IA64_ISR_IR_BIT
-       ;;
-       (p6) ld8 r26=[r25]
-       (p7) mov b0=r28
-       ;;
-       (p6) dep r26=r27,r26,63,1
-       mov pr=r31,-2
-       br.sptk.many b0             // call pal service
-       ;;
-END(kvm_resume_to_guest)
-
-
-MOV_TO_BANK0_REG(16)
-MOV_TO_BANK0_REG(17)
-MOV_TO_BANK0_REG(18)
-MOV_TO_BANK0_REG(19)
-MOV_TO_BANK0_REG(20)
-MOV_TO_BANK0_REG(21)
-MOV_TO_BANK0_REG(22)
-MOV_TO_BANK0_REG(23)
-MOV_TO_BANK0_REG(24)
-MOV_TO_BANK0_REG(25)
-MOV_TO_BANK0_REG(26)
-MOV_TO_BANK0_REG(27)
-MOV_TO_BANK0_REG(28)
-MOV_TO_BANK0_REG(29)
-MOV_TO_BANK0_REG(30)
-MOV_TO_BANK0_REG(31)
-
-
-// mov to reg table
-ENTRY(asm_mov_to_reg)
-       MOV_TO_REG0
-       MOV_TO_REG(1)
-       MOV_TO_REG(2)
-       MOV_TO_REG(3)
-       MOV_TO_REG(4)
-       MOV_TO_REG(5)
-       MOV_TO_REG(6)
-       MOV_TO_REG(7)
-       MOV_TO_REG(8)
-       MOV_TO_REG(9)
-       MOV_TO_REG(10)
-       MOV_TO_REG(11)
-       MOV_TO_REG(12)
-       MOV_TO_REG(13)
-       MOV_TO_REG(14)
-       MOV_TO_REG(15)
-       JMP_TO_MOV_TO_BANK0_REG(16)
-       JMP_TO_MOV_TO_BANK0_REG(17)
-       JMP_TO_MOV_TO_BANK0_REG(18)
-       JMP_TO_MOV_TO_BANK0_REG(19)
-       JMP_TO_MOV_TO_BANK0_REG(20)
-       JMP_TO_MOV_TO_BANK0_REG(21)
-       JMP_TO_MOV_TO_BANK0_REG(22)
-       JMP_TO_MOV_TO_BANK0_REG(23)
-       JMP_TO_MOV_TO_BANK0_REG(24)
-       JMP_TO_MOV_TO_BANK0_REG(25)
-       JMP_TO_MOV_TO_BANK0_REG(26)
-       JMP_TO_MOV_TO_BANK0_REG(27)
-       JMP_TO_MOV_TO_BANK0_REG(28)
-       JMP_TO_MOV_TO_BANK0_REG(29)
-       JMP_TO_MOV_TO_BANK0_REG(30)
-       JMP_TO_MOV_TO_BANK0_REG(31)
-       MOV_TO_REG(32)
-       MOV_TO_REG(33)
-       MOV_TO_REG(34)
-       MOV_TO_REG(35)
-       MOV_TO_REG(36)
-       MOV_TO_REG(37)
-       MOV_TO_REG(38)
-       MOV_TO_REG(39)
-       MOV_TO_REG(40)
-       MOV_TO_REG(41)
-       MOV_TO_REG(42)
-       MOV_TO_REG(43)
-       MOV_TO_REG(44)
-       MOV_TO_REG(45)
-       MOV_TO_REG(46)
-       MOV_TO_REG(47)
-       MOV_TO_REG(48)
-       MOV_TO_REG(49)
-       MOV_TO_REG(50)
-       MOV_TO_REG(51)
-       MOV_TO_REG(52)
-       MOV_TO_REG(53)
-       MOV_TO_REG(54)
-       MOV_TO_REG(55)
-       MOV_TO_REG(56)
-       MOV_TO_REG(57)
-       MOV_TO_REG(58)
-       MOV_TO_REG(59)
-       MOV_TO_REG(60)
-       MOV_TO_REG(61)
-       MOV_TO_REG(62)
-       MOV_TO_REG(63)
-       MOV_TO_REG(64)
-       MOV_TO_REG(65)
-       MOV_TO_REG(66)
-       MOV_TO_REG(67)
-       MOV_TO_REG(68)
-       MOV_TO_REG(69)
-       MOV_TO_REG(70)
-       MOV_TO_REG(71)
-       MOV_TO_REG(72)
-       MOV_TO_REG(73)
-       MOV_TO_REG(74)
-       MOV_TO_REG(75)
-       MOV_TO_REG(76)
-       MOV_TO_REG(77)
-       MOV_TO_REG(78)
-       MOV_TO_REG(79)
-       MOV_TO_REG(80)
-       MOV_TO_REG(81)
-       MOV_TO_REG(82)
-       MOV_TO_REG(83)
-       MOV_TO_REG(84)
-       MOV_TO_REG(85)
-       MOV_TO_REG(86)
-       MOV_TO_REG(87)
-       MOV_TO_REG(88)
-       MOV_TO_REG(89)
-       MOV_TO_REG(90)
-       MOV_TO_REG(91)
-       MOV_TO_REG(92)
-       MOV_TO_REG(93)
-       MOV_TO_REG(94)
-       MOV_TO_REG(95)
-       MOV_TO_REG(96)
-       MOV_TO_REG(97)
-       MOV_TO_REG(98)
-       MOV_TO_REG(99)
-       MOV_TO_REG(100)
-       MOV_TO_REG(101)
-       MOV_TO_REG(102)
-       MOV_TO_REG(103)
-       MOV_TO_REG(104)
-       MOV_TO_REG(105)
-       MOV_TO_REG(106)
-       MOV_TO_REG(107)
-       MOV_TO_REG(108)
-       MOV_TO_REG(109)
-       MOV_TO_REG(110)
-       MOV_TO_REG(111)
-       MOV_TO_REG(112)
-       MOV_TO_REG(113)
-       MOV_TO_REG(114)
-       MOV_TO_REG(115)
-       MOV_TO_REG(116)
-       MOV_TO_REG(117)
-       MOV_TO_REG(118)
-       MOV_TO_REG(119)
-       MOV_TO_REG(120)
-       MOV_TO_REG(121)
-       MOV_TO_REG(122)
-       MOV_TO_REG(123)
-       MOV_TO_REG(124)
-       MOV_TO_REG(125)
-       MOV_TO_REG(126)
-       MOV_TO_REG(127)
-END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c

deleted file mode 100644 (file)

index b039874..0000000
--- a/arch/ia64/kvm/process.c
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * process.c: handle interruption inject for guests.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *     Shaofan Li (Susue Li) <susie.li@intel.com>
- *     Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
- *     Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *     Xiantao Zhang (xiantao.zhang@intel.com)
- */
-#include "vcpu.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/fpswa.h>
-#include <asm/kregs.h>
-#include <asm/tlb.h>
-
-fpswa_interface_t *vmm_fpswa_interface;
-
-#define IA64_VHPT_TRANS_VECTOR                 0x0000
-#define IA64_INST_TLB_VECTOR                   0x0400
-#define IA64_DATA_TLB_VECTOR                   0x0800
-#define IA64_ALT_INST_TLB_VECTOR               0x0c00
-#define IA64_ALT_DATA_TLB_VECTOR               0x1000
-#define IA64_DATA_NESTED_TLB_VECTOR            0x1400
-#define IA64_INST_KEY_MISS_VECTOR              0x1800
-#define IA64_DATA_KEY_MISS_VECTOR              0x1c00
-#define IA64_DIRTY_BIT_VECTOR                  0x2000
-#define IA64_INST_ACCESS_BIT_VECTOR            0x2400
-#define IA64_DATA_ACCESS_BIT_VECTOR            0x2800
-#define IA64_BREAK_VECTOR                      0x2c00
-#define IA64_EXTINT_VECTOR                     0x3000
-#define IA64_PAGE_NOT_PRESENT_VECTOR           0x5000
-#define IA64_KEY_PERMISSION_VECTOR             0x5100
-#define IA64_INST_ACCESS_RIGHTS_VECTOR         0x5200
-#define IA64_DATA_ACCESS_RIGHTS_VECTOR         0x5300
-#define IA64_GENEX_VECTOR                      0x5400
-#define IA64_DISABLED_FPREG_VECTOR             0x5500
-#define IA64_NAT_CONSUMPTION_VECTOR            0x5600
-#define IA64_SPECULATION_VECTOR                0x5700 /* UNUSED */
-#define IA64_DEBUG_VECTOR                      0x5900
-#define IA64_UNALIGNED_REF_VECTOR              0x5a00
-#define IA64_UNSUPPORTED_DATA_REF_VECTOR       0x5b00
-#define IA64_FP_FAULT_VECTOR                   0x5c00
-#define IA64_FP_TRAP_VECTOR                    0x5d00
-#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR    0x5e00
-#define IA64_TAKEN_BRANCH_TRAP_VECTOR          0x5f00
-#define IA64_SINGLE_STEP_TRAP_VECTOR           0x6000
-
-/* SDM vol2 5.5 - IVA based interruption handling */
-#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
-                       IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |      \
-                       IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
-
-#define DOMN_PAL_REQUEST    0x110000
-#define DOMN_SAL_REQUEST    0x110001
-
-static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
-       0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
-       0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
-       0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
-       0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
-       0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
-       0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
-       0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
-};
-
-static void collect_interruption(struct kvm_vcpu *vcpu)
-{
-       u64 ipsr;
-       u64 vdcr;
-       u64 vifs;
-       unsigned long vpsr;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       vpsr = vcpu_get_psr(vcpu);
-       vcpu_bsw0(vcpu);
-       if (vpsr & IA64_PSR_IC) {
-
-               /* Sync mpsr id/da/dd/ss/ed bits to vipsr
-                * since after guest do rfi, we still want these bits on in
-                * mpsr
-                */
-
-               ipsr = regs->cr_ipsr;
-               vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
-                                       | IA64_PSR_DD | IA64_PSR_SS
-                                       | IA64_PSR_ED));
-               vcpu_set_ipsr(vcpu, vpsr);
-
-               /* Currently, for trap, we do not advance IIP to next
-                * instruction. That's because we assume caller already
-                * set up IIP correctly
-                */
-
-               vcpu_set_iip(vcpu , regs->cr_iip);
-
-               /* set vifs.v to zero */
-               vifs = VCPU(vcpu, ifs);
-               vifs &= ~IA64_IFS_V;
-               vcpu_set_ifs(vcpu, vifs);
-
-               vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
-       }
-
-       vdcr = VCPU(vcpu, dcr);
-
-       /* Set guest psr
-        * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
-        * be: set to the value of dcr.be
-        * pp: set to the value of dcr.pp
-        */
-       vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
-       vpsr |= (vdcr & IA64_DCR_BE);
-
-       /* VDCR pp bit position is different from VPSR pp bit */
-       if (vdcr & IA64_DCR_PP) {
-               vpsr |= IA64_PSR_PP;
-       } else {
-               vpsr &= ~IA64_PSR_PP;
-       }
-
-       vcpu_set_psr(vcpu, vpsr);
-
-}
-
-void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
-{
-       u64 viva;
-       struct kvm_pt_regs *regs;
-       union ia64_isr pt_isr;
-
-       regs = vcpu_regs(vcpu);
-
-       /* clear cr.isr.ir (incomplete register frame)*/
-       pt_isr.val = VMX(vcpu, cr_isr);
-       pt_isr.ir = 0;
-       VMX(vcpu, cr_isr) = pt_isr.val;
-
-       collect_interruption(vcpu);
-
-       viva = vcpu_get_iva(vcpu);
-       regs->cr_iip = viva + vec;
-}
-
-static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
-{
-       union ia64_rr rr, rr1;
-
-       rr.val = vcpu_get_rr(vcpu, ifa);
-       rr1.val = 0;
-       rr1.ps = rr.ps;
-       rr1.rid = rr.rid;
-       return (rr1.val);
-}
-
-/*
- * Set vIFA & vITIR & vIHA, when vPSR.ic =1
- * Parameter:
- *  set_ifa: if true, set vIFA
- *  set_itir: if true, set vITIR
- *  set_iha: if true, set vIHA
- */
-void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
-               int set_ifa, int set_itir, int set_iha)
-{
-       long vpsr;
-       u64 value;
-
-       vpsr = VCPU(vcpu, vpsr);
-       /* Vol2, Table 8-1 */
-       if (vpsr & IA64_PSR_IC) {
-               if (set_ifa)
-                       vcpu_set_ifa(vcpu, vadr);
-               if (set_itir) {
-                       value = vcpu_get_itir_on_fault(vcpu, vadr);
-                       vcpu_set_itir(vcpu, value);
-               }
-
-               if (set_iha) {
-                       value = vcpu_thash(vcpu, vadr);
-                       vcpu_set_iha(vcpu, value);
-               }
-       }
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR, IHA */
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-       inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
-}
-
-/*
- * Instruction TLB Fault
- *  @ Instruction TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR, IHA */
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-       inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
-}
-
-/*
- * Data Nested TLB Fault
- *  @ Data Nested TLB Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void nested_dtlb(struct kvm_vcpu *vcpu)
-{
-       inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
-}
-
-/*
- * Alternate Data TLB Fault
- *  @ Alternate Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-       inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-       inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
-}
-
-/* Deal with:
- *  VHPT Translation Vector
- */
-static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR, IHA*/
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-       inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-}
-
-/*
- * VHPT Instruction Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       _vhpt_fault(vcpu, vadr);
-}
-
-/*
- * VHPT Data Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       _vhpt_fault(vcpu, vadr);
-}
-
-/*
- * Deal with:
- *  General Exception vector
- */
-void _general_exception(struct kvm_vcpu *vcpu)
-{
-       inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
-}
-
-/*
- * Illegal Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_op(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/*
- * Illegal Dependency Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_dep(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/*
- * Reserved Register/Field Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rsv_reg_field(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-/*
- * Privileged Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-
-void privilege_op(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/*
- * Unimplement Data Address Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void unimpl_daddr(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/*
- * Privileged Register Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void privilege_reg(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/* Deal with
- *  Nat consumption vector
- * Parameter:
- *  vaddr: Optional, if t == REGISTER
- */
-static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
-                                               enum tlb_miss_type t)
-{
-       /* If vPSR.ic && t == DATA/INST, IFA */
-       if (t == DATA || t == INSTRUCTION) {
-               /* IFA */
-               set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
-       }
-
-       inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
-}
-
-/*
- * Instruction Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
-}
-
-/*
- * Register Nat Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rnat_consumption(struct kvm_vcpu *vcpu)
-{
-       _nat_consumption_fault(vcpu, 0, REGISTER);
-}
-
-/*
- * Data Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       _nat_consumption_fault(vcpu, vadr, DATA);
-}
-
-/* Deal with
- *  Page not present vector
- */
-static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR */
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-       inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
-}
-
-void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       __page_not_present(vcpu, vadr);
-}
-
-void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       __page_not_present(vcpu, vadr);
-}
-
-/* Deal with
- *  Data access rights vector
- */
-void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR */
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-       inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
-}
-
-fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
-               unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
-               unsigned long *ifs, struct kvm_pt_regs *regs)
-{
-       fp_state_t fp_state;
-       fpswa_ret_t ret;
-       struct kvm_vcpu *vcpu = current_vcpu;
-
-       uint64_t old_rr7 = ia64_get_rr(7UL<<61);
-
-       if (!vmm_fpswa_interface)
-               return (fpswa_ret_t) {-1, 0, 0, 0};
-
-       memset(&fp_state, 0, sizeof(fp_state_t));
-
-       /*
-        * compute fp_state.  only FP registers f6 - f11 are used by the
-        * vmm, so set those bits in the mask and set the low volatile
-        * pointer to point to these registers.
-        */
-       fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
-
-       fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
-
-   /*
-        * unsigned long (*EFI_FPSWA) (
-        *      unsigned long    trap_type,
-        *      void             *Bundle,
-        *      unsigned long    *pipsr,
-        *      unsigned long    *pfsr,
-        *      unsigned long    *pisr,
-        *      unsigned long    *ppreds,
-        *      unsigned long    *pifs,
-        *      void             *fp_state);
-        */
-       /*Call host fpswa interface directly to virtualize
-        *guest fpswa request!
-        */
-       ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
-       ia64_srlz_d();
-
-       ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
-                       ipsr, fpsr, isr, pr, ifs, &fp_state);
-       ia64_set_rr(7UL << 61, old_rr7);
-       ia64_srlz_d();
-       return ret;
-}
-
-/*
- * Handle floating-point assist faults and traps for domain.
- */
-unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
-                                       unsigned long isr)
-{
-       struct kvm_vcpu *v = current_vcpu;
-       IA64_BUNDLE bundle;
-       unsigned long fault_ip;
-       fpswa_ret_t ret;
-
-       fault_ip = regs->cr_iip;
-       /*
-        * When the FP trap occurs, the trapping instruction is completed.
-        * If ipsr.ri == 0, there is the trapping instruction in previous
-        * bundle.
-        */
-       if (!fp_fault && (ia64_psr(regs)->ri == 0))
-               fault_ip -= 16;
-
-       if (fetch_code(v, fault_ip, &bundle))
-               return -EAGAIN;
-
-       if (!bundle.i64[0] && !bundle.i64[1])
-               return -EACCES;
-
-       ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
-                       &isr, &regs->pr, &regs->cr_ifs, regs);
-       return ret.status;
-}
-
-void reflect_interruption(u64 ifa, u64 isr, u64 iim,
-               u64 vec, struct kvm_pt_regs *regs)
-{
-       u64 vector;
-       int status ;
-       struct kvm_vcpu *vcpu = current_vcpu;
-       u64 vpsr = VCPU(vcpu, vpsr);
-
-       vector = vec2off[vec];
-
-       if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
-               panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
-                                               "with psr.ic = 0\n", vector);
-               return;
-       }
-
-       switch (vec) {
-       case 32:        /*IA64_FP_FAULT_VECTOR*/
-               status = vmm_handle_fpu_swa(1, regs, isr);
-               if (!status) {
-                       vcpu_increment_iip(vcpu);
-                       return;
-               } else if (-EAGAIN == status)
-                       return;
-               break;
-       case 33:        /*IA64_FP_TRAP_VECTOR*/
-               status = vmm_handle_fpu_swa(0, regs, isr);
-               if (!status)
-                       return ;
-               break;
-       }
-
-       VCPU(vcpu, isr) = isr;
-       VCPU(vcpu, iipa) = regs->cr_iip;
-       if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
-               VCPU(vcpu, iim) = iim;
-       else
-               set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
-
-       inject_guest_interruption(vcpu, vector);
-}
-
-static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
-                                               unsigned long arg)
-{
-       struct thash_data *data;
-       unsigned long gpa, poff;
-
-       if (!is_physical_mode(vcpu)) {
-               /* Depends on caller to provide the DTR or DTC mapping.*/
-               data = vtlb_lookup(vcpu, arg, D_TLB);
-               if (data)
-                       gpa = data->page_flags & _PAGE_PPN_MASK;
-               else {
-                       data = vhpt_lookup(arg);
-                       if (!data)
-                               return 0;
-                       gpa = data->gpaddr & _PAGE_PPN_MASK;
-               }
-
-               poff = arg & (PSIZE(data->ps) - 1);
-               arg = PAGEALIGN(gpa, data->ps) | poff;
-       }
-       arg = kvm_gpa_to_mpa(arg << 1 >> 1);
-
-       return (unsigned long)__va(arg);
-}
-
-static void set_pal_call_data(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       unsigned long gr28 = vcpu_get_gr(vcpu, 28);
-       unsigned long gr29 = vcpu_get_gr(vcpu, 29);
-       unsigned long gr30 = vcpu_get_gr(vcpu, 30);
-
-       /*FIXME:For static and stacked convention, firmware
-        * has put the parameters in gr28-gr31 before
-        * break to vmm  !!*/
-
-       switch (gr28) {
-       case PAL_PERF_MON_INFO:
-       case PAL_HALT_INFO:
-               p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
-               p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-               break;
-       case PAL_BRAND_INFO:
-               p->u.pal_data.gr29 = gr29;
-               p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
-               break;
-       default:
-               p->u.pal_data.gr29 = gr29;
-               p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-       }
-       p->u.pal_data.gr28 = gr28;
-       p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
-
-       p->exit_reason = EXIT_REASON_PAL_CALL;
-}
-
-static void get_pal_call_result(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-       if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-               vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
-               vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
-               vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
-               vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
-       } else
-               panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-static void set_sal_call_data(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-       p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
-       p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
-       p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
-       p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
-       p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
-       p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
-       p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
-       p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
-       p->exit_reason = EXIT_REASON_SAL_CALL;
-}
-
-static void get_sal_call_result(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-               vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
-               vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
-               vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
-               vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
-       } else
-               panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
-               unsigned long isr, unsigned long iim)
-{
-       struct kvm_vcpu *v = current_vcpu;
-       long psr;
-
-       if (ia64_psr(regs)->cpl == 0) {
-               /* Allow hypercalls only when cpl = 0.  */
-               if (iim == DOMN_PAL_REQUEST) {
-                       local_irq_save(psr);
-                       set_pal_call_data(v);
-                       vmm_transition(v);
-                       get_pal_call_result(v);
-                       vcpu_increment_iip(v);
-                       local_irq_restore(psr);
-                       return;
-               } else if (iim == DOMN_SAL_REQUEST) {
-                       local_irq_save(psr);
-                       set_sal_call_data(v);
-                       vmm_transition(v);
-                       get_sal_call_result(v);
-                       vcpu_increment_iip(v);
-                       local_irq_restore(psr);
-                       return;
-               }
-       }
-       reflect_interruption(ifa, isr, iim, 11, regs);
-}
-
-void check_pending_irq(struct kvm_vcpu *vcpu)
-{
-       int  mask, h_pending, h_inservice;
-       u64 isr;
-       unsigned long  vpsr;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       h_pending = highest_pending_irq(vcpu);
-       if (h_pending == NULL_VECTOR) {
-               update_vhpi(vcpu, NULL_VECTOR);
-               return;
-       }
-       h_inservice = highest_inservice_irq(vcpu);
-
-       vpsr = VCPU(vcpu, vpsr);
-       mask = irq_masked(vcpu, h_pending, h_inservice);
-       if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
-               isr = vpsr & IA64_PSR_RI;
-               update_vhpi(vcpu, h_pending);
-               reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-       } else if (mask == IRQ_MASKED_BY_INSVC) {
-               if (VCPU(vcpu, vhpi))
-                       update_vhpi(vcpu, NULL_VECTOR);
-       } else {
-               /* masked by vpsr.i or vtpr.*/
-               update_vhpi(vcpu, h_pending);
-       }
-}
-
-static void generate_exirq(struct kvm_vcpu *vcpu)
-{
-       unsigned  vpsr;
-       uint64_t isr;
-
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       vpsr = VCPU(vcpu, vpsr);
-       isr = vpsr & IA64_PSR_RI;
-       if (!(vpsr & IA64_PSR_IC))
-               panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
-       reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-}
-
-void vhpi_detection(struct kvm_vcpu *vcpu)
-{
-       uint64_t    threshold, vhpi;
-       union ia64_tpr       vtpr;
-       struct ia64_psr vpsr;
-
-       vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-       vtpr.val = VCPU(vcpu, tpr);
-
-       threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
-       vhpi = VCPU(vcpu, vhpi);
-       if (vhpi > threshold) {
-               /* interrupt actived*/
-               generate_exirq(vcpu);
-       }
-}
-
-void leave_hypervisor_tail(void)
-{
-       struct kvm_vcpu *v = current_vcpu;
-
-       if (VMX(v, timer_check)) {
-               VMX(v, timer_check) = 0;
-               if (VMX(v, itc_check)) {
-                       if (vcpu_get_itc(v) > VCPU(v, itm)) {
-                               if (!(VCPU(v, itv) & (1 << 16))) {
-                                       vcpu_pend_interrupt(v, VCPU(v, itv)
-                                                       & 0xff);
-                                       VMX(v, itc_check) = 0;
-                               } else {
-                                       v->arch.timer_pending = 1;
-                               }
-                               VMX(v, last_itc) = VCPU(v, itm) + 1;
-                       }
-               }
-       }
-
-       rmb();
-       if (v->arch.irq_new_pending) {
-               v->arch.irq_new_pending = 0;
-               VMX(v, irq_check) = 0;
-               check_pending_irq(v);
-               return;
-       }
-       if (VMX(v, irq_check)) {
-               VMX(v, irq_check) = 0;
-               vhpi_detection(v);
-       }
-}
-
-static inline void handle_lds(struct kvm_pt_regs *regs)
-{
-       regs->cr_ipsr |= IA64_PSR_ED;
-}
-
-void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
-{
-       unsigned long pte;
-       union ia64_rr rr;
-
-       rr.val = ia64_get_rr(vadr);
-       pte =  vadr & _PAGE_PPN_MASK;
-       pte = pte | PHY_PAGE_WB;
-       thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
-       return;
-}
-
-void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
-{
-       unsigned long vpsr;
-       int type;
-
-       u64 vhpt_adr, gppa, pteval, rr, itir;
-       union ia64_isr misr;
-       union ia64_pta vpta;
-       struct thash_data *data;
-       struct kvm_vcpu *v = current_vcpu;
-
-       vpsr = VCPU(v, vpsr);
-       misr.val = VMX(v, cr_isr);
-
-       type = vec;
-
-       if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
-               if (vec == 2) {
-                       if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
-                               emulate_io_inst(v, ((vadr << 1) >> 1), 4);
-                               return;
-                       }
-               }
-               physical_tlb_miss(v, vadr, type);
-               return;
-       }
-       data = vtlb_lookup(v, vadr, type);
-       if (data != 0) {
-               if (type == D_TLB) {
-                       gppa = (vadr & ((1UL << data->ps) - 1))
-                               + (data->ppn >> (data->ps - 12) << data->ps);
-                       if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
-                               if (data->pl >= ((regs->cr_ipsr >>
-                                               IA64_PSR_CPL0_BIT) & 3))
-                                       emulate_io_inst(v, gppa, data->ma);
-                               else {
-                                       vcpu_set_isr(v, misr.val);
-                                       data_access_rights(v, vadr);
-                               }
-                               return ;
-                       }
-               }
-               thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
-
-       } else if (type == D_TLB) {
-               if (misr.sp) {
-                       handle_lds(regs);
-                       return;
-               }
-
-               rr = vcpu_get_rr(v, vadr);
-               itir = rr & (RR_RID_MASK | RR_PS_MASK);
-
-               if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
-                       if (vpsr & IA64_PSR_IC) {
-                               vcpu_set_isr(v, misr.val);
-                               alt_dtlb(v, vadr);
-                       } else {
-                               nested_dtlb(v);
-                       }
-                       return ;
-               }
-
-               vpta.val = vcpu_get_pta(v);
-               /* avoid recursively walking (short format) VHPT */
-
-               vhpt_adr = vcpu_thash(v, vadr);
-               if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-                       /* VHPT successfully read.  */
-                       if (!(pteval & _PAGE_P)) {
-                               if (vpsr & IA64_PSR_IC) {
-                                       vcpu_set_isr(v, misr.val);
-                                       dtlb_fault(v, vadr);
-                               } else {
-                                       nested_dtlb(v);
-                               }
-                       } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
-                               thash_purge_and_insert(v, pteval, itir,
-                                                               vadr, D_TLB);
-                       } else if (vpsr & IA64_PSR_IC) {
-                               vcpu_set_isr(v, misr.val);
-                               dtlb_fault(v, vadr);
-                       } else {
-                               nested_dtlb(v);
-                       }
-               } else {
-                       /* Can't read VHPT.  */
-                       if (vpsr & IA64_PSR_IC) {
-                               vcpu_set_isr(v, misr.val);
-                               dvhpt_fault(v, vadr);
-                       } else {
-                               nested_dtlb(v);
-                       }
-               }
-       } else if (type == I_TLB) {
-               if (!(vpsr & IA64_PSR_IC))
-                       misr.ni = 1;
-               if (!vhpt_enabled(v, vadr, INST_REF)) {
-                       vcpu_set_isr(v, misr.val);
-                       alt_itlb(v, vadr);
-                       return;
-               }
-
-               vpta.val = vcpu_get_pta(v);
-
-               vhpt_adr = vcpu_thash(v, vadr);
-               if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-                       /* VHPT successfully read.  */
-                       if (pteval & _PAGE_P) {
-                               if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
-                                       vcpu_set_isr(v, misr.val);
-                                       itlb_fault(v, vadr);
-                                       return ;
-                               }
-                               rr = vcpu_get_rr(v, vadr);
-                               itir = rr & (RR_RID_MASK | RR_PS_MASK);
-                               thash_purge_and_insert(v, pteval, itir,
-                                                       vadr, I_TLB);
-                       } else {
-                               vcpu_set_isr(v, misr.val);
-                               inst_page_not_present(v, vadr);
-                       }
-               } else {
-                       vcpu_set_isr(v, misr.val);
-                       ivhpt_fault(v, vadr);
-               }
-       }
-}
-
-void kvm_vexirq(struct kvm_vcpu *vcpu)
-{
-       u64 vpsr, isr;
-       struct kvm_pt_regs *regs;
-
-       regs = vcpu_regs(vcpu);
-       vpsr = VCPU(vcpu, vpsr);
-       isr = vpsr & IA64_PSR_RI;
-       reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
-}
-
-void kvm_ia64_handle_irq(struct kvm_vcpu *v)
-{
-       struct exit_ctl_data *p = &v->arch.exit_data;
-       long psr;
-
-       local_irq_save(psr);
-       p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
-       vmm_transition(v);
-       local_irq_restore(psr);
-
-       VMX(v, timer_check) = 1;
-
-}
-
-static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
-{
-       u64 oldrid, moldrid, oldpsbits, vaddr;
-       struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
-       vaddr = p->vaddr;
-
-       oldrid = VMX(v, vrr[0]);
-       VMX(v, vrr[0]) = p->rr;
-       oldpsbits = VMX(v, psbits[0]);
-       VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
-       moldrid = ia64_get_rr(0x0);
-       ia64_set_rr(0x0, vrrtomrr(p->rr));
-       ia64_srlz_d();
-
-       vaddr = PAGEALIGN(vaddr, p->ps);
-       thash_purge_entries_remote(v, vaddr, p->ps);
-
-       VMX(v, vrr[0]) = oldrid;
-       VMX(v, psbits[0]) = oldpsbits;
-       ia64_set_rr(0x0, moldrid);
-       ia64_dv_serialize_data();
-}
-
-static void vcpu_do_resume(struct kvm_vcpu *vcpu)
-{
-       /*Re-init VHPT and VTLB once from resume*/
-       vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
-       thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
-       vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
-       thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
-
-       ia64_set_pta(vcpu->arch.vhpt.pta.val);
-}
-
-static void vmm_sanity_check(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-       if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
-               panic_vm(vcpu, "Failed to do vmm sanity check,"
-                       "it maybe caused by crashed vmm!!\n\n");
-       }
-}
-
-static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
-{
-       vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/
-
-       if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
-               vcpu_do_resume(vcpu);
-               return;
-       }
-
-       if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
-               thash_purge_all(vcpu);
-               return;
-       }
-
-       if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
-               while (vcpu->arch.ptc_g_count > 0)
-                       ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
-       }
-}
-
-void vmm_transition(struct kvm_vcpu *vcpu)
-{
-       ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
-                       1, 0, 0, 0, 0, 0);
-       vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
-       ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
-                                               1, 0, 0, 0, 0, 0);
-       kvm_do_resume_op(vcpu);
-}
-
-void vmm_panic_handler(u64 vec)
-{
-       struct kvm_vcpu *vcpu = current_vcpu;
-       vmm_sanity = 0;
-       panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
-                       vec2off[vec]);
-}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S

deleted file mode 100644 (file)

index 30897d4..0000000
--- a/arch/ia64/kvm/trampoline.S
+++ /dev/null
@@ -1,1038 +0,0 @@
-/* Save all processor states
- *
- * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
- * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include "asm-offsets.h"
-
-
-#define CTX(name)    VMM_CTX_##name##_OFFSET
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_BRANCH_REGS                        \
-       add     r2 = CTX(B0),r32;               \
-       add     r3 = CTX(B1),r32;               \
-       mov     r16 = b0;                       \
-       mov     r17 = b1;                       \
-       ;;                                      \
-       st8     [r2]=r16,16;                    \
-       st8     [r3]=r17,16;                    \
-       ;;                                      \
-       mov     r16 = b2;                       \
-       mov     r17 = b3;                       \
-       ;;                                      \
-       st8     [r2]=r16,16;                    \
-       st8     [r3]=r17,16;                    \
-       ;;                                      \
-       mov     r16 = b4;                       \
-       mov     r17 = b5;                       \
-       ;;                                      \
-       st8     [r2]=r16;                       \
-       st8     [r3]=r17;                       \
-       ;;
-
-       /*
-        *      r33:            context_t base address
-        */
-#define        RESTORE_BRANCH_REGS                     \
-       add     r2 = CTX(B0),r33;               \
-       add     r3 = CTX(B1),r33;               \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     b0 = r16;                       \
-       mov     b1 = r17;                       \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     b2 = r16;                       \
-       mov     b3 = r17;                       \
-       ;;                                      \
-       ld8     r16=[r2];                       \
-       ld8     r17=[r3];                       \
-       ;;                                      \
-       mov     b4=r16;                         \
-       mov     b5=r17;                         \
-       ;;
-
-
-       /*
-        *      r32: context_t base address
-        *      bsw == 1
-        *      Save all bank1 general registers, r4 ~ r7
-        */
-#define        SAVE_GENERAL_REGS                       \
-       add     r2=CTX(R4),r32;                 \
-       add     r3=CTX(R5),r32;                 \
-       ;;                                      \
-.mem.offset 0,0;                               \
-       st8.spill       [r2]=r4,16;             \
-.mem.offset 8,0;                               \
-       st8.spill       [r3]=r5,16;             \
-       ;;                                      \
-.mem.offset 0,0;                               \
-       st8.spill       [r2]=r6,48;             \
-.mem.offset 8,0;                               \
-       st8.spill       [r3]=r7,48;             \
-       ;;                                      \
-.mem.offset 0,0;                               \
-    st8.spill    [r2]=r12;                     \
-.mem.offset 8,0;                               \
-    st8.spill    [r3]=r13;                     \
-    ;;
-
-       /*
-        *      r33: context_t base address
-        *      bsw == 1
-        */
-#define        RESTORE_GENERAL_REGS                    \
-       add     r2=CTX(R4),r33;                 \
-       add     r3=CTX(R5),r33;                 \
-       ;;                                      \
-       ld8.fill        r4=[r2],16;             \
-       ld8.fill        r5=[r3],16;             \
-       ;;                                      \
-       ld8.fill        r6=[r2],48;             \
-       ld8.fill        r7=[r3],48;             \
-       ;;                                      \
-       ld8.fill    r12=[r2];                   \
-       ld8.fill    r13 =[r3];                  \
-       ;;
-
-
-
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_KERNEL_REGS                        \
-       add     r2 = CTX(KR0),r32;              \
-       add     r3 = CTX(KR1),r32;              \
-       mov     r16 = ar.k0;                    \
-       mov     r17 = ar.k1;                    \
-       ;;                                      \
-       st8     [r2] = r16,16;                  \
-       st8     [r3] = r17,16;                  \
-       ;;                                      \
-       mov     r16 = ar.k2;                    \
-       mov     r17 = ar.k3;                    \
-       ;;                                      \
-       st8     [r2] = r16,16;                  \
-       st8     [r3] = r17,16;                  \
-       ;;                                      \
-       mov     r16 = ar.k4;                    \
-       mov     r17 = ar.k5;                    \
-       ;;                                      \
-       st8     [r2] = r16,16;                  \
-       st8     [r3] = r17,16;                  \
-       ;;                                      \
-       mov     r16 = ar.k6;                    \
-       mov     r17 = ar.k7;                    \
-       ;;                                      \
-       st8     [r2] = r16;                     \
-       st8     [r3] = r17;                     \
-       ;;
-
-
-
-       /*
-        *      r33:            context_t base address
-        */
-#define        RESTORE_KERNEL_REGS                     \
-       add     r2 = CTX(KR0),r33;              \
-       add     r3 = CTX(KR1),r33;              \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     ar.k0=r16;                      \
-       mov     ar.k1=r17;                      \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     ar.k2=r16;                      \
-       mov     ar.k3=r17;                      \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     ar.k4=r16;                      \
-       mov     ar.k5=r17;                      \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     ar.k6=r16;                      \
-       mov     ar.k7=r17;                      \
-       ;;
-
-
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_APP_REGS                           \
-       add  r2 = CTX(BSPSTORE),r32;            \
-       mov  r16 = ar.bspstore;                 \
-       ;;                                      \
-       st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
-       mov  r16 = ar.rnat;                     \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FCR)-CTX(RNAT);     \
-       mov  r16 = ar.fcr;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);    \
-       mov  r16 = ar.eflag;                    \
-       ;;                                      \
-       st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);   \
-       mov  r16 = ar.cflg;                     \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FSR)-CTX(CFLG);     \
-       mov  r16 = ar.fsr;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FIR)-CTX(FSR);      \
-       mov  r16 = ar.fir;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FDR)-CTX(FIR);      \
-       mov  r16 = ar.fdr;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(UNAT)-CTX(FDR);     \
-       mov  r16 = ar.unat;                     \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);    \
-       mov  r16 = ar.fpsr;                     \
-       ;;                                      \
-       st8  [r2] = r16,CTX(PFS)-CTX(FPSR);     \
-       mov  r16 = ar.pfs;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(LC)-CTX(PFS);       \
-       mov  r16 = ar.lc;                       \
-       ;;                                      \
-       st8  [r2] = r16;                        \
-       ;;
-
-       /*
-        *      r33:            context_t base address
-        */
-#define        RESTORE_APP_REGS                        \
-       add  r2=CTX(BSPSTORE),r33;              \
-       ;;                                      \
-       ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);  \
-       ;;                                      \
-       mov  ar.bspstore=r16;                   \
-       ld8  r16=[r2],CTX(FCR)-CTX(RNAT);       \
-       ;;                                      \
-       mov  ar.rnat=r16;                       \
-       ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);      \
-       ;;                                      \
-       mov  ar.fcr=r16;                        \
-       ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);     \
-       ;;                                      \
-       mov  ar.eflag=r16;                      \
-       ld8  r16=[r2],CTX(FSR)-CTX(CFLG);       \
-       ;;                                      \
-       mov  ar.cflg=r16;                       \
-       ld8  r16=[r2],CTX(FIR)-CTX(FSR);        \
-       ;;                                      \
-       mov  ar.fsr=r16;                        \
-       ld8  r16=[r2],CTX(FDR)-CTX(FIR);        \
-       ;;                                      \
-       mov  ar.fir=r16;                        \
-       ld8  r16=[r2],CTX(UNAT)-CTX(FDR);       \
-       ;;                                      \
-       mov  ar.fdr=r16;                        \
-       ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);      \
-       ;;                                      \
-       mov  ar.unat=r16;                       \
-       ld8  r16=[r2],CTX(PFS)-CTX(FPSR);       \
-       ;;                                      \
-       mov  ar.fpsr=r16;                       \
-       ld8  r16=[r2],CTX(LC)-CTX(PFS);         \
-       ;;                                      \
-       mov  ar.pfs=r16;                        \
-       ld8  r16=[r2];                          \
-       ;;                                      \
-       mov  ar.lc=r16;                         \
-       ;;
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_CTL_REGS                           \
-       add     r2 = CTX(DCR),r32;              \
-       mov     r16 = cr.dcr;                   \
-       ;;                                      \
-       st8     [r2] = r16,CTX(IVA)-CTX(DCR);   \
-       ;;                                      \
-       mov     r16 = cr.iva;                   \
-       ;;                                      \
-       st8     [r2] = r16,CTX(PTA)-CTX(IVA);   \
-       ;;                                      \
-       mov r16 = cr.pta;                       \
-       ;;                                      \
-       st8 [r2] = r16 ;                        \
-       ;;
-
-       /*
-        *      r33:            context_t base address
-        */
-#define        RESTORE_CTL_REGS                                \
-       add     r2 = CTX(DCR),r33;                      \
-       ;;                                              \
-       ld8     r16 = [r2],CTX(IVA)-CTX(DCR);           \
-       ;;                                              \
-       mov     cr.dcr = r16;                           \
-       dv_serialize_data;                              \
-       ;;                                              \
-       ld8     r16 = [r2],CTX(PTA)-CTX(IVA);           \
-       ;;                                              \
-       mov     cr.iva = r16;                           \
-       dv_serialize_data;                              \
-       ;;                                              \
-       ld8 r16 = [r2];                                 \
-       ;;                                              \
-       mov cr.pta = r16;                               \
-       dv_serialize_data;                              \
-       ;;
-
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_REGION_REGS                        \
-       add     r2=CTX(RR0),r32;                \
-       mov     r16=rr[r0];                     \
-       dep.z   r18=1,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       mov     r17=rr[r18];                    \
-       dep.z   r18=2,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r17,8;                     \
-       mov     r16=rr[r18];                    \
-       dep.z   r18=3,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       mov     r17=rr[r18];                    \
-       dep.z   r18=4,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r17,8;                     \
-       mov     r16=rr[r18];                    \
-       dep.z   r18=5,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       mov     r17=rr[r18];                    \
-       dep.z   r18=7,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r17,16;                    \
-       mov     r16=rr[r18];                    \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       ;;
-
-       /*
-        *      r33:context_t base address
-        */
-#define        RESTORE_REGION_REGS     \
-       add     r2=CTX(RR0),r33;\
-       mov r18=r0;             \
-       ;;                      \
-       ld8     r20=[r2],8;     \
-       ;;      /* rr0 */       \
-       ld8     r21=[r2],8;     \
-       ;;      /* rr1 */       \
-       ld8     r22=[r2],8;     \
-       ;;      /* rr2 */       \
-       ld8     r23=[r2],8;     \
-       ;;      /* rr3 */       \
-       ld8     r24=[r2],8;     \
-       ;;      /* rr4 */       \
-       ld8     r25=[r2],16;    \
-       ;;      /* rr5 */       \
-       ld8     r27=[r2];       \
-       ;;      /* rr7 */       \
-       mov rr[r18]=r20;        \
-       dep.z   r18=1,61,3;     \
-       ;;  /* rr1 */           \
-       mov rr[r18]=r21;        \
-       dep.z   r18=2,61,3;     \
-       ;;  /* rr2 */           \
-       mov rr[r18]=r22;        \
-       dep.z   r18=3,61,3;     \
-       ;;  /* rr3 */           \
-       mov rr[r18]=r23;        \
-       dep.z   r18=4,61,3;     \
-       ;;  /* rr4 */           \
-       mov rr[r18]=r24;        \
-       dep.z   r18=5,61,3;     \
-       ;;  /* rr5 */           \
-       mov rr[r18]=r25;        \
-       dep.z   r18=7,61,3;     \
-       ;;  /* rr7 */           \
-       mov rr[r18]=r27;        \
-       ;;                      \
-       srlz.i;                 \
-       ;;
-
-
-
-       /*
-        *      r32:    context_t base address
-        *      r36~r39:scratch registers
-        */
-#define        SAVE_DEBUG_REGS                         \
-       add     r2=CTX(IBR0),r32;               \
-       add     r3=CTX(DBR0),r32;               \
-       mov     r16=ibr[r0];                    \
-       mov     r17=dbr[r0];                    \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=1,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=2,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=2,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=3,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=4,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=5,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=6,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=7,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       ;;
-
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_DEBUG_REGS                     \
-       add     r2=CTX(IBR0),r33;               \
-       add     r3=CTX(DBR0),r33;               \
-       mov r16=7;                              \
-       mov r17=r0;                             \
-       ;;                                      \
-       mov ar.lc = r16;                        \
-       ;;                                      \
-1:                                             \
-       ld8 r18=[r2],8;                         \
-       ld8 r19=[r3],8;                         \
-       ;;                                      \
-       mov ibr[r17]=r18;                       \
-       mov dbr[r17]=r19;                       \
-       ;;                                      \
-       srlz.i;                                 \
-       ;;                                      \
-       add r17=1,r17;                          \
-       br.cloop.sptk 1b;                       \
-       ;;
-
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_FPU_LOW                            \
-       add     r2=CTX(F2),r32;                 \
-       add     r3=CTX(F3),r32;                 \
-       ;;                                      \
-       stf.spill.nta   [r2]=f2,32;             \
-       stf.spill.nta   [r3]=f3,32;             \
-       ;;                                      \
-       stf.spill.nta   [r2]=f4,32;             \
-       stf.spill.nta   [r3]=f5,32;             \
-       ;;                                      \
-       stf.spill.nta   [r2]=f6,32;             \
-       stf.spill.nta   [r3]=f7,32;             \
-       ;;                                      \
-       stf.spill.nta   [r2]=f8,32;             \
-       stf.spill.nta   [r3]=f9,32;             \
-       ;;                                      \
-       stf.spill.nta   [r2]=f10,32;            \
-       stf.spill.nta   [r3]=f11,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f12,32;            \
-       stf.spill.nta   [r3]=f13,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f14,32;            \
-       stf.spill.nta   [r3]=f15,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f16,32;            \
-       stf.spill.nta   [r3]=f17,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f18,32;            \
-       stf.spill.nta   [r3]=f19,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f20,32;            \
-       stf.spill.nta   [r3]=f21,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f22,32;            \
-       stf.spill.nta   [r3]=f23,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f24,32;            \
-       stf.spill.nta   [r3]=f25,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f26,32;            \
-       stf.spill.nta   [r3]=f27,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f28,32;            \
-       stf.spill.nta   [r3]=f29,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f30;               \
-       stf.spill.nta   [r3]=f31;               \
-       ;;
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_FPU_HIGH                           \
-       add     r2=CTX(F32),r32;                \
-       add     r3=CTX(F33),r32;                \
-       ;;                                      \
-       stf.spill.nta   [r2]=f32,32;            \
-       stf.spill.nta   [r3]=f33,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f34,32;            \
-       stf.spill.nta   [r3]=f35,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f36,32;            \
-       stf.spill.nta   [r3]=f37,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f38,32;            \
-       stf.spill.nta   [r3]=f39,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f40,32;            \
-       stf.spill.nta   [r3]=f41,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f42,32;            \
-       stf.spill.nta   [r3]=f43,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f44,32;            \
-       stf.spill.nta   [r3]=f45,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f46,32;            \
-       stf.spill.nta   [r3]=f47,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f48,32;            \
-       stf.spill.nta   [r3]=f49,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f50,32;            \
-       stf.spill.nta   [r3]=f51,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f52,32;            \
-       stf.spill.nta   [r3]=f53,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f54,32;            \
-       stf.spill.nta   [r3]=f55,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f56,32;            \
-       stf.spill.nta   [r3]=f57,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f58,32;            \
-       stf.spill.nta   [r3]=f59,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f60,32;            \
-       stf.spill.nta   [r3]=f61,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f62,32;            \
-       stf.spill.nta   [r3]=f63,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f64,32;            \
-       stf.spill.nta   [r3]=f65,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f66,32;            \
-       stf.spill.nta   [r3]=f67,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f68,32;            \
-       stf.spill.nta   [r3]=f69,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f70,32;            \
-       stf.spill.nta   [r3]=f71,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f72,32;            \
-       stf.spill.nta   [r3]=f73,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f74,32;            \
-       stf.spill.nta   [r3]=f75,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f76,32;            \
-       stf.spill.nta   [r3]=f77,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f78,32;            \
-       stf.spill.nta   [r3]=f79,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f80,32;            \
-       stf.spill.nta   [r3]=f81,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f82,32;            \
-       stf.spill.nta   [r3]=f83,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f84,32;            \
-       stf.spill.nta   [r3]=f85,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f86,32;            \
-       stf.spill.nta   [r3]=f87,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f88,32;            \
-       stf.spill.nta   [r3]=f89,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f90,32;            \
-       stf.spill.nta   [r3]=f91,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f92,32;            \
-       stf.spill.nta   [r3]=f93,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f94,32;            \
-       stf.spill.nta   [r3]=f95,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f96,32;            \
-       stf.spill.nta   [r3]=f97,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f98,32;            \
-       stf.spill.nta   [r3]=f99,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f100,32;           \
-       stf.spill.nta   [r3]=f101,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f102,32;           \
-       stf.spill.nta   [r3]=f103,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f104,32;           \
-       stf.spill.nta   [r3]=f105,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f106,32;           \
-       stf.spill.nta   [r3]=f107,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f108,32;           \
-       stf.spill.nta   [r3]=f109,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f110,32;           \
-       stf.spill.nta   [r3]=f111,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f112,32;           \
-       stf.spill.nta   [r3]=f113,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f114,32;           \
-       stf.spill.nta   [r3]=f115,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f116,32;           \
-       stf.spill.nta   [r3]=f117,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f118,32;           \
-       stf.spill.nta   [r3]=f119,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f120,32;           \
-       stf.spill.nta   [r3]=f121,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f122,32;           \
-       stf.spill.nta   [r3]=f123,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f124,32;           \
-       stf.spill.nta   [r3]=f125,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f126;              \
-       stf.spill.nta   [r3]=f127;              \
-       ;;
-
-     /*
-      *      r33:    point to context_t structure
-      */
-#define        RESTORE_FPU_LOW                         \
-    add     r2 = CTX(F2), r33;                 \
-    add     r3 = CTX(F3), r33;                 \
-    ;;                                         \
-    ldf.fill.nta f2 = [r2], 32;                        \
-    ldf.fill.nta f3 = [r3], 32;                        \
-    ;;                                         \
-    ldf.fill.nta f4 = [r2], 32;                        \
-    ldf.fill.nta f5 = [r3], 32;                        \
-    ;;                                         \
-    ldf.fill.nta f6 = [r2], 32;                        \
-    ldf.fill.nta f7 = [r3], 32;                        \
-    ;;                                         \
-    ldf.fill.nta f8 = [r2], 32;                        \
-    ldf.fill.nta f9 = [r3], 32;                        \
-    ;;                                         \
-    ldf.fill.nta f10 = [r2], 32;               \
-    ldf.fill.nta f11 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f12 = [r2], 32;               \
-    ldf.fill.nta f13 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f14 = [r2], 32;               \
-    ldf.fill.nta f15 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f16 = [r2], 32;               \
-    ldf.fill.nta f17 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f18 = [r2], 32;               \
-    ldf.fill.nta f19 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f20 = [r2], 32;               \
-    ldf.fill.nta f21 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f22 = [r2], 32;               \
-    ldf.fill.nta f23 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f24 = [r2], 32;               \
-    ldf.fill.nta f25 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f26 = [r2], 32;               \
-    ldf.fill.nta f27 = [r3], 32;               \
-       ;;                                      \
-    ldf.fill.nta f28 = [r2], 32;               \
-    ldf.fill.nta f29 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f30 = [r2], 32;               \
-    ldf.fill.nta f31 = [r3], 32;               \
-    ;;
-
-
-
-    /*
-     *      r33:    point to context_t structure
-     */
-#define        RESTORE_FPU_HIGH                        \
-    add     r2 = CTX(F32), r33;                        \
-    add     r3 = CTX(F33), r33;                        \
-    ;;                                         \
-    ldf.fill.nta f32 = [r2], 32;               \
-    ldf.fill.nta f33 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f34 = [r2], 32;               \
-    ldf.fill.nta f35 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f36 = [r2], 32;               \
-    ldf.fill.nta f37 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f38 = [r2], 32;               \
-    ldf.fill.nta f39 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f40 = [r2], 32;               \
-    ldf.fill.nta f41 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f42 = [r2], 32;               \
-    ldf.fill.nta f43 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f44 = [r2], 32;               \
-    ldf.fill.nta f45 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f46 = [r2], 32;               \
-    ldf.fill.nta f47 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f48 = [r2], 32;               \
-    ldf.fill.nta f49 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f50 = [r2], 32;               \
-    ldf.fill.nta f51 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f52 = [r2], 32;               \
-    ldf.fill.nta f53 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f54 = [r2], 32;               \
-    ldf.fill.nta f55 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f56 = [r2], 32;               \
-    ldf.fill.nta f57 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f58 = [r2], 32;               \
-    ldf.fill.nta f59 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f60 = [r2], 32;               \
-    ldf.fill.nta f61 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f62 = [r2], 32;               \
-    ldf.fill.nta f63 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f64 = [r2], 32;               \
-    ldf.fill.nta f65 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f66 = [r2], 32;               \
-    ldf.fill.nta f67 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f68 = [r2], 32;               \
-    ldf.fill.nta f69 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f70 = [r2], 32;               \
-    ldf.fill.nta f71 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f72 = [r2], 32;               \
-    ldf.fill.nta f73 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f74 = [r2], 32;               \
-    ldf.fill.nta f75 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f76 = [r2], 32;               \
-    ldf.fill.nta f77 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f78 = [r2], 32;               \
-    ldf.fill.nta f79 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f80 = [r2], 32;               \
-    ldf.fill.nta f81 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f82 = [r2], 32;               \
-    ldf.fill.nta f83 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f84 = [r2], 32;               \
-    ldf.fill.nta f85 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f86 = [r2], 32;               \
-    ldf.fill.nta f87 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f88 = [r2], 32;               \
-    ldf.fill.nta f89 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f90 = [r2], 32;               \
-    ldf.fill.nta f91 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f92 = [r2], 32;               \
-    ldf.fill.nta f93 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f94 = [r2], 32;               \
-    ldf.fill.nta f95 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f96 = [r2], 32;               \
-    ldf.fill.nta f97 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f98 = [r2], 32;               \
-    ldf.fill.nta f99 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f100 = [r2], 32;              \
-    ldf.fill.nta f101 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f102 = [r2], 32;              \
-    ldf.fill.nta f103 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f104 = [r2], 32;              \
-    ldf.fill.nta f105 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f106 = [r2], 32;              \
-    ldf.fill.nta f107 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f108 = [r2], 32;              \
-    ldf.fill.nta f109 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f110 = [r2], 32;              \
-    ldf.fill.nta f111 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f112 = [r2], 32;              \
-    ldf.fill.nta f113 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f114 = [r2], 32;              \
-    ldf.fill.nta f115 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f116 = [r2], 32;              \
-    ldf.fill.nta f117 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f118 = [r2], 32;              \
-    ldf.fill.nta f119 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f120 = [r2], 32;              \
-    ldf.fill.nta f121 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f122 = [r2], 32;              \
-    ldf.fill.nta f123 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f124 = [r2], 32;              \
-    ldf.fill.nta f125 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f126 = [r2], 32;              \
-    ldf.fill.nta f127 = [r3], 32;              \
-    ;;
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_PTK_REGS                           \
-    add r2=CTX(PKR0), r32;                     \
-    mov r16=7;                                 \
-    ;;                                                 \
-    mov ar.lc=r16;                             \
-    mov r17=r0;                                        \
-    ;;                                         \
-1:                                             \
-    mov r18=pkr[r17];                          \
-    ;;                                         \
-    srlz.i;                                    \
-    ;;                                                 \
-    st8 [r2]=r18, 8;                           \
-    ;;                                         \
-    add r17 =1,r17;                            \
-    ;;                                         \
-    br.cloop.sptk 1b;                          \
-    ;;
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_PTK_REGS                       \
-    add r2=CTX(PKR0), r33;                     \
-    mov r16=7;                                 \
-    ;;                                                 \
-    mov ar.lc=r16;                             \
-    mov r17=r0;                                        \
-    ;;                                         \
-1:                                             \
-    ld8 r18=[r2], 8;                           \
-    ;;                                         \
-    mov pkr[r17]=r18;                          \
-    ;;                                         \
-    srlz.i;                                    \
-    ;;                                                 \
-    add r17 =1,r17;                            \
-    ;;                                         \
-    br.cloop.sptk 1b;                          \
-    ;;
-
-
-/*
- * void vmm_trampoline( context_t * from,
- *                     context_t * to)
- *
- *     from:   r32
- *     to:     r33
- *  note: interrupt disabled before call this function.
- */
-GLOBAL_ENTRY(vmm_trampoline)
-    mov r16 = psr
-    adds r2 = CTX(PSR), r32
-    ;;
-    st8 [r2] = r16, 8       // psr
-    mov r17 = pr
-    ;;
-    st8 [r2] = r17, 8       // pr
-    mov r18 = ar.unat
-    ;;
-    st8 [r2] = r18
-    mov r17 = ar.rsc
-    ;;
-    adds r2 = CTX(RSC),r32
-    ;;
-    st8 [r2]= r17
-    mov ar.rsc =0
-    flushrs
-    ;;
-    SAVE_GENERAL_REGS
-    ;;
-    SAVE_KERNEL_REGS
-    ;;
-    SAVE_APP_REGS
-    ;;
-    SAVE_BRANCH_REGS
-    ;;
-    SAVE_CTL_REGS
-    ;;
-    SAVE_REGION_REGS
-    ;;
-    //SAVE_DEBUG_REGS
-    ;;
-    rsm  psr.dfl
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_LOW
-    ;;
-    rsm  psr.dfh
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_HIGH
-    ;;
-    SAVE_PTK_REGS
-    ;;
-    RESTORE_PTK_REGS
-    ;;
-    RESTORE_FPU_HIGH
-    ;;
-    RESTORE_FPU_LOW
-    ;;
-    //RESTORE_DEBUG_REGS
-    ;;
-    RESTORE_REGION_REGS
-    ;;
-    RESTORE_CTL_REGS
-    ;;
-    RESTORE_BRANCH_REGS
-    ;;
-    RESTORE_APP_REGS
-    ;;
-    RESTORE_KERNEL_REGS
-    ;;
-    RESTORE_GENERAL_REGS
-    ;;
-    adds r2=CTX(PSR), r33
-    ;;
-    ld8 r16=[r2], 8       // psr
-    ;;
-    mov psr.l=r16
-    ;;
-    srlz.d
-    ;;
-    ld8 r16=[r2], 8       // pr
-    ;;
-    mov pr =r16,-1
-    ld8 r16=[r2]       // unat
-    ;;
-    mov ar.unat=r16
-    ;;
-    adds r2=CTX(RSC),r33
-    ;;
-    ld8 r16 =[r2]
-    ;;
-    mov ar.rsc = r16
-    ;;
-    br.ret.sptk.few b0
-END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c

deleted file mode 100644 (file)

index 958815c..0000000
--- a/arch/ia64/kvm/vcpu.c
+++ /dev/null
@@ -1,2209 +0,0 @@
-/*
- * kvm_vcpu.c: handling all virtual cpu related thing.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  Shaofan Li (Susue Li) <susie.li@intel.com>
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-
-#include <asm/processor.h>
-#include <asm/ia64regs.h>
-#include <asm/gcc_intrin.h>
-#include <asm/kregs.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-
-/*
- * Special notes:
- * - Index by it/dt/rt sequence
- * - Only existing mode transitions are allowed in this table
- * - RSE is placed at lazy mode when emulating guest partial mode
- * - If gva happens to be rr0 and rr4, only allowed case is identity
- *   mapping (gva=gpa), or panic! (How?)
- */
-int mm_switch_table[8][8] = {
-       /*  2004/09/12(Kevin): Allow switch to self */
-       /*
-        *  (it,dt,rt): (0,0,0) -> (1,1,1)
-        *  This kind of transition usually occurs in the very early
-        *  stage of Linux boot up procedure. Another case is in efi
-        *  and pal calls. (see "arch/ia64/kernel/head.S")
-        *
-        *  (it,dt,rt): (0,0,0) -> (0,1,1)
-        *  This kind of transition is found when OSYa exits efi boot
-        *  service. Due to gva = gpa in this case (Same region),
-        *  data access can be satisfied though itlb entry for physical
-        *  emulation is hit.
-        */
-       {SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
-       {0,  0,  0,  0,  0,  0,  0,  0},
-       {0,  0,  0,  0,  0,  0,  0,  0},
-       /*
-        *  (it,dt,rt): (0,1,1) -> (1,1,1)
-        *  This kind of transition is found in OSYa.
-        *
-        *  (it,dt,rt): (0,1,1) -> (0,0,0)
-        *  This kind of transition is found in OSYa
-        */
-       {SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
-       /* (1,0,0)->(1,1,1) */
-       {0,  0,  0,  0,  0,  0,  0,  SW_P2V},
-       /*
-        *  (it,dt,rt): (1,0,1) -> (1,1,1)
-        *  This kind of transition usually occurs when Linux returns
-        *  from the low level TLB miss handlers.
-        *  (see "arch/ia64/kernel/ivt.S")
-        */
-       {0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
-       {0,  0,  0,  0,  0,  0,  0,  0},
-       /*
-        *  (it,dt,rt): (1,1,1) -> (1,0,1)
-        *  This kind of transition usually occurs in Linux low level
-        *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
-        *
-        *  (it,dt,rt): (1,1,1) -> (0,0,0)
-        *  This kind of transition usually occurs in pal and efi calls,
-        *  which requires running in physical mode.
-        *  (see "arch/ia64/kernel/head.S")
-        *  (1,1,1)->(1,0,0)
-        */
-
-       {SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
-};
-
-void physical_mode_init(struct kvm_vcpu  *vcpu)
-{
-       vcpu->arch.mode_flags = GUEST_IN_PHY;
-}
-
-void switch_to_physical_rid(struct kvm_vcpu *vcpu)
-{
-       unsigned long psr;
-
-       /* Save original virtual mode rr[0] and rr[4] */
-       psr = ia64_clear_ic();
-       ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
-       ia64_srlz_d();
-       ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
-       ia64_srlz_d();
-
-       ia64_set_psr(psr);
-       return;
-}
-
-void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
-{
-       unsigned long psr;
-
-       psr = ia64_clear_ic();
-       ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
-       ia64_srlz_d();
-       ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
-       ia64_srlz_d();
-       ia64_set_psr(psr);
-       return;
-}
-
-static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
-{
-       return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
-}
-
-void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
-                                       struct ia64_psr new_psr)
-{
-       int act;
-       act = mm_switch_action(old_psr, new_psr);
-       switch (act) {
-       case SW_V2P:
-               /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
-               old_psr.val, new_psr.val);*/
-               switch_to_physical_rid(vcpu);
-               /*
-                * Set rse to enforced lazy, to prevent active rse
-                *save/restor when guest physical mode.
-                */
-               vcpu->arch.mode_flags |= GUEST_IN_PHY;
-               break;
-       case SW_P2V:
-               switch_to_virtual_rid(vcpu);
-               /*
-                * recover old mode which is saved when entering
-                * guest physical mode
-                */
-               vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
-               break;
-       case SW_SELF:
-               break;
-       case SW_NOP:
-               break;
-       default:
-               /* Sanity check */
-               break;
-       }
-       return;
-}
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
-                                       struct ia64_psr new_psr)
-{
-
-       if ((old_psr.dt != new_psr.dt)
-                       || (old_psr.it != new_psr.it)
-                       || (old_psr.rt != new_psr.rt))
-               switch_mm_mode(vcpu, old_psr, new_psr);
-
-       return;
-}
-
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-       if (is_physical_mode(vcpu)) {
-               vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
-               switch_to_virtual_rid(vcpu);
-       }
-       return;
-}
-
-/* Recover always follows prepare */
-void recover_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-       if (is_physical_mode(vcpu))
-               switch_to_physical_rid(vcpu);
-       vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
-       return;
-}
-
-#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x)
-
-static u16 gr_info[32] = {
-       0,      /* r0 is read-only : WE SHOULD NEVER GET THIS */
-       RPT(r1), RPT(r2), RPT(r3),
-       RPT(r4), RPT(r5), RPT(r6), RPT(r7),
-       RPT(r8), RPT(r9), RPT(r10), RPT(r11),
-       RPT(r12), RPT(r13), RPT(r14), RPT(r15),
-       RPT(r16), RPT(r17), RPT(r18), RPT(r19),
-       RPT(r20), RPT(r21), RPT(r22), RPT(r23),
-       RPT(r24), RPT(r25), RPT(r26), RPT(r27),
-       RPT(r28), RPT(r29), RPT(r30), RPT(r31)
-};
-
-#define IA64_FIRST_STACKED_GR   32
-#define IA64_FIRST_ROTATING_FR  32
-
-static inline unsigned long
-rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
-{
-       reg += rrb;
-       if (reg >= sor)
-               reg -= sor;
-       return reg;
-}
-
-/*
- * Return the (rotated) index for floating point register
- * be in the REGNUM (REGNUM must range from 32-127,
- * result is in the range from 0-95.
- */
-static inline unsigned long fph_index(struct kvm_pt_regs *regs,
-                                               long regnum)
-{
-       unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
-       return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
-}
-
-/*
- * The inverse of the above: given bspstore and the number of
- * registers, calculate ar.bsp.
- */
-static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
-                                                       long num_regs)
-{
-       long delta = ia64_rse_slot_num(addr) + num_regs;
-       int i = 0;
-
-       if (num_regs < 0)
-               delta -= 0x3e;
-       if (delta < 0) {
-               while (delta <= -0x3f) {
-                       i--;
-                       delta += 0x3f;
-               }
-       } else {
-               while (delta >= 0x3f) {
-                       i++;
-                       delta -= 0x3f;
-               }
-       }
-
-       return addr + num_regs + i;
-}
-
-static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-                                       unsigned long *val, int *nat)
-{
-       unsigned long *bsp, *addr, *rnat_addr, *bspstore;
-       unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-       unsigned long nat_mask;
-       unsigned long old_rsc, new_rsc;
-       long sof = (regs->cr_ifs) & 0x7f;
-       long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-       long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-       long ridx = r1 - 32;
-
-       if (ridx < sor)
-               ridx = rotate_reg(sor, rrb_gr, ridx);
-
-       old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-       new_rsc = old_rsc&(~(0x3));
-       ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-
-       bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-       bsp = kbs + (regs->loadrs >> 19);
-
-       addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-       nat_mask = 1UL << ia64_rse_slot_num(addr);
-       rnat_addr = ia64_rse_rnat_addr(addr);
-
-       if (addr >= bspstore) {
-               ia64_flushrs();
-               ia64_mf();
-               bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-       }
-       *val = *addr;
-       if (nat) {
-               if (bspstore < rnat_addr)
-                       *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
-                                                       & nat_mask);
-               else
-                       *nat = (int)!!((*rnat_addr) & nat_mask);
-               ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-       }
-}
-
-void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-                               unsigned long val, unsigned long nat)
-{
-       unsigned long *bsp, *bspstore, *addr, *rnat_addr;
-       unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-       unsigned long nat_mask;
-       unsigned long old_rsc, new_rsc, psr;
-       unsigned long rnat;
-       long sof = (regs->cr_ifs) & 0x7f;
-       long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-       long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-       long ridx = r1 - 32;
-
-       if (ridx < sor)
-               ridx = rotate_reg(sor, rrb_gr, ridx);
-
-       old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-       /* put RSC to lazy mode, and set loadrs 0 */
-       new_rsc = old_rsc & (~0x3fff0003);
-       ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-       bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
-
-       addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-       nat_mask = 1UL << ia64_rse_slot_num(addr);
-       rnat_addr = ia64_rse_rnat_addr(addr);
-
-       local_irq_save(psr);
-       bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-       if (addr >= bspstore) {
-
-               ia64_flushrs();
-               ia64_mf();
-               *addr = val;
-               bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-               rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-               if (bspstore < rnat_addr)
-                       rnat = rnat & (~nat_mask);
-               else
-                       *rnat_addr = (*rnat_addr)&(~nat_mask);
-
-               ia64_mf();
-               ia64_loadrs();
-               ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-       } else {
-               rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-               *addr = val;
-               if (bspstore < rnat_addr)
-                       rnat = rnat&(~nat_mask);
-               else
-                       *rnat_addr = (*rnat_addr) & (~nat_mask);
-
-               ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
-               ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-       }
-       local_irq_restore(psr);
-       ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-}
-
-void getreg(unsigned long regnum, unsigned long *val,
-                               int *nat, struct kvm_pt_regs *regs)
-{
-       unsigned long addr, *unat;
-       if (regnum >= IA64_FIRST_STACKED_GR) {
-               get_rse_reg(regs, regnum, val, nat);
-               return;
-       }
-
-       /*
-        * Now look at registers in [0-31] range and init correct UNAT
-        */
-       addr = (unsigned long)regs;
-       unat = &regs->eml_unat;
-
-       addr += gr_info[regnum];
-
-       *val  = *(unsigned long *)addr;
-       /*
-        * do it only when requested
-        */
-       if (nat)
-               *nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
-}
-
-void setreg(unsigned long regnum, unsigned long val,
-                       int nat, struct kvm_pt_regs *regs)
-{
-       unsigned long addr;
-       unsigned long bitmask;
-       unsigned long *unat;
-
-       /*
-        * First takes care of stacked registers
-        */
-       if (regnum >= IA64_FIRST_STACKED_GR) {
-               set_rse_reg(regs, regnum, val, nat);
-               return;
-       }
-
-       /*
-        * Now look at registers in [0-31] range and init correct UNAT
-        */
-       addr = (unsigned long)regs;
-       unat = &regs->eml_unat;
-       /*
-        * add offset from base of struct
-        * and do it !
-        */
-       addr += gr_info[regnum];
-
-       *(unsigned long *)addr = val;
-
-       /*
-        * We need to clear the corresponding UNAT bit to fully emulate the load
-        * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
-        */
-       bitmask   = 1UL << ((addr >> 3) & 0x3f);
-       if (nat)
-               *unat |= bitmask;
-        else
-               *unat &= ~bitmask;
-
-}
-
-u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       unsigned long val;
-
-       if (!reg)
-               return 0;
-       getreg(reg, &val, 0, regs);
-       return val;
-}
-
-void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       long sof = (regs->cr_ifs) & 0x7f;
-
-       if (!reg)
-               return;
-       if (reg >= sof + 32)
-               return;
-       setreg(reg, value, nat, regs);  /* FIXME: handle NATs later*/
-}
-
-void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-                               struct kvm_pt_regs *regs)
-{
-       /* Take floating register rotation into consideration*/
-       if (regnum >= IA64_FIRST_ROTATING_FR)
-               regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-#define CASE_FIXED_FP(reg)                     \
-       case  (reg) :                           \
-               ia64_stf_spill(fpval, reg);     \
-       break
-
-       switch (regnum) {
-               CASE_FIXED_FP(0);
-               CASE_FIXED_FP(1);
-               CASE_FIXED_FP(2);
-               CASE_FIXED_FP(3);
-               CASE_FIXED_FP(4);
-               CASE_FIXED_FP(5);
-
-               CASE_FIXED_FP(6);
-               CASE_FIXED_FP(7);
-               CASE_FIXED_FP(8);
-               CASE_FIXED_FP(9);
-               CASE_FIXED_FP(10);
-               CASE_FIXED_FP(11);
-
-               CASE_FIXED_FP(12);
-               CASE_FIXED_FP(13);
-               CASE_FIXED_FP(14);
-               CASE_FIXED_FP(15);
-               CASE_FIXED_FP(16);
-               CASE_FIXED_FP(17);
-               CASE_FIXED_FP(18);
-               CASE_FIXED_FP(19);
-               CASE_FIXED_FP(20);
-               CASE_FIXED_FP(21);
-               CASE_FIXED_FP(22);
-               CASE_FIXED_FP(23);
-               CASE_FIXED_FP(24);
-               CASE_FIXED_FP(25);
-               CASE_FIXED_FP(26);
-               CASE_FIXED_FP(27);
-               CASE_FIXED_FP(28);
-               CASE_FIXED_FP(29);
-               CASE_FIXED_FP(30);
-               CASE_FIXED_FP(31);
-               CASE_FIXED_FP(32);
-               CASE_FIXED_FP(33);
-               CASE_FIXED_FP(34);
-               CASE_FIXED_FP(35);
-               CASE_FIXED_FP(36);
-               CASE_FIXED_FP(37);
-               CASE_FIXED_FP(38);
-               CASE_FIXED_FP(39);
-               CASE_FIXED_FP(40);
-               CASE_FIXED_FP(41);
-               CASE_FIXED_FP(42);
-               CASE_FIXED_FP(43);
-               CASE_FIXED_FP(44);
-               CASE_FIXED_FP(45);
-               CASE_FIXED_FP(46);
-               CASE_FIXED_FP(47);
-               CASE_FIXED_FP(48);
-               CASE_FIXED_FP(49);
-               CASE_FIXED_FP(50);
-               CASE_FIXED_FP(51);
-               CASE_FIXED_FP(52);
-               CASE_FIXED_FP(53);
-               CASE_FIXED_FP(54);
-               CASE_FIXED_FP(55);
-               CASE_FIXED_FP(56);
-               CASE_FIXED_FP(57);
-               CASE_FIXED_FP(58);
-               CASE_FIXED_FP(59);
-               CASE_FIXED_FP(60);
-               CASE_FIXED_FP(61);
-               CASE_FIXED_FP(62);
-               CASE_FIXED_FP(63);
-               CASE_FIXED_FP(64);
-               CASE_FIXED_FP(65);
-               CASE_FIXED_FP(66);
-               CASE_FIXED_FP(67);
-               CASE_FIXED_FP(68);
-               CASE_FIXED_FP(69);
-               CASE_FIXED_FP(70);
-               CASE_FIXED_FP(71);
-               CASE_FIXED_FP(72);
-               CASE_FIXED_FP(73);
-               CASE_FIXED_FP(74);
-               CASE_FIXED_FP(75);
-               CASE_FIXED_FP(76);
-               CASE_FIXED_FP(77);
-               CASE_FIXED_FP(78);
-               CASE_FIXED_FP(79);
-               CASE_FIXED_FP(80);
-               CASE_FIXED_FP(81);
-               CASE_FIXED_FP(82);
-               CASE_FIXED_FP(83);
-               CASE_FIXED_FP(84);
-               CASE_FIXED_FP(85);
-               CASE_FIXED_FP(86);
-               CASE_FIXED_FP(87);
-               CASE_FIXED_FP(88);
-               CASE_FIXED_FP(89);
-               CASE_FIXED_FP(90);
-               CASE_FIXED_FP(91);
-               CASE_FIXED_FP(92);
-               CASE_FIXED_FP(93);
-               CASE_FIXED_FP(94);
-               CASE_FIXED_FP(95);
-               CASE_FIXED_FP(96);
-               CASE_FIXED_FP(97);
-               CASE_FIXED_FP(98);
-               CASE_FIXED_FP(99);
-               CASE_FIXED_FP(100);
-               CASE_FIXED_FP(101);
-               CASE_FIXED_FP(102);
-               CASE_FIXED_FP(103);
-               CASE_FIXED_FP(104);
-               CASE_FIXED_FP(105);
-               CASE_FIXED_FP(106);
-               CASE_FIXED_FP(107);
-               CASE_FIXED_FP(108);
-               CASE_FIXED_FP(109);
-               CASE_FIXED_FP(110);
-               CASE_FIXED_FP(111);
-               CASE_FIXED_FP(112);
-               CASE_FIXED_FP(113);
-               CASE_FIXED_FP(114);
-               CASE_FIXED_FP(115);
-               CASE_FIXED_FP(116);
-               CASE_FIXED_FP(117);
-               CASE_FIXED_FP(118);
-               CASE_FIXED_FP(119);
-               CASE_FIXED_FP(120);
-               CASE_FIXED_FP(121);
-               CASE_FIXED_FP(122);
-               CASE_FIXED_FP(123);
-               CASE_FIXED_FP(124);
-               CASE_FIXED_FP(125);
-               CASE_FIXED_FP(126);
-               CASE_FIXED_FP(127);
-       }
-#undef CASE_FIXED_FP
-}
-
-void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-                                       struct kvm_pt_regs *regs)
-{
-       /* Take floating register rotation into consideration*/
-       if (regnum >= IA64_FIRST_ROTATING_FR)
-               regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-
-#define CASE_FIXED_FP(reg)                     \
-       case (reg) :                            \
-               ia64_ldf_fill(reg, fpval);      \
-       break
-
-       switch (regnum) {
-               CASE_FIXED_FP(2);
-               CASE_FIXED_FP(3);
-               CASE_FIXED_FP(4);
-               CASE_FIXED_FP(5);
-
-               CASE_FIXED_FP(6);
-               CASE_FIXED_FP(7);
-               CASE_FIXED_FP(8);
-               CASE_FIXED_FP(9);
-               CASE_FIXED_FP(10);
-               CASE_FIXED_FP(11);
-
-               CASE_FIXED_FP(12);
-               CASE_FIXED_FP(13);
-               CASE_FIXED_FP(14);
-               CASE_FIXED_FP(15);
-               CASE_FIXED_FP(16);
-               CASE_FIXED_FP(17);
-               CASE_FIXED_FP(18);
-               CASE_FIXED_FP(19);
-               CASE_FIXED_FP(20);
-               CASE_FIXED_FP(21);
-               CASE_FIXED_FP(22);
-               CASE_FIXED_FP(23);
-               CASE_FIXED_FP(24);
-               CASE_FIXED_FP(25);
-               CASE_FIXED_FP(26);
-               CASE_FIXED_FP(27);
-               CASE_FIXED_FP(28);
-               CASE_FIXED_FP(29);
-               CASE_FIXED_FP(30);
-               CASE_FIXED_FP(31);
-               CASE_FIXED_FP(32);
-               CASE_FIXED_FP(33);
-               CASE_FIXED_FP(34);
-               CASE_FIXED_FP(35);
-               CASE_FIXED_FP(36);
-               CASE_FIXED_FP(37);
-               CASE_FIXED_FP(38);
-               CASE_FIXED_FP(39);
-               CASE_FIXED_FP(40);
-               CASE_FIXED_FP(41);
-               CASE_FIXED_FP(42);
-               CASE_FIXED_FP(43);
-               CASE_FIXED_FP(44);
-               CASE_FIXED_FP(45);
-               CASE_FIXED_FP(46);
-               CASE_FIXED_FP(47);
-               CASE_FIXED_FP(48);
-               CASE_FIXED_FP(49);
-               CASE_FIXED_FP(50);
-               CASE_FIXED_FP(51);
-               CASE_FIXED_FP(52);
-               CASE_FIXED_FP(53);
-               CASE_FIXED_FP(54);
-               CASE_FIXED_FP(55);
-               CASE_FIXED_FP(56);
-               CASE_FIXED_FP(57);
-               CASE_FIXED_FP(58);
-               CASE_FIXED_FP(59);
-               CASE_FIXED_FP(60);
-               CASE_FIXED_FP(61);
-               CASE_FIXED_FP(62);
-               CASE_FIXED_FP(63);
-               CASE_FIXED_FP(64);
-               CASE_FIXED_FP(65);
-               CASE_FIXED_FP(66);
-               CASE_FIXED_FP(67);
-               CASE_FIXED_FP(68);
-               CASE_FIXED_FP(69);
-               CASE_FIXED_FP(70);
-               CASE_FIXED_FP(71);
-               CASE_FIXED_FP(72);
-               CASE_FIXED_FP(73);
-               CASE_FIXED_FP(74);
-               CASE_FIXED_FP(75);
-               CASE_FIXED_FP(76);
-               CASE_FIXED_FP(77);
-               CASE_FIXED_FP(78);
-               CASE_FIXED_FP(79);
-               CASE_FIXED_FP(80);
-               CASE_FIXED_FP(81);
-               CASE_FIXED_FP(82);
-               CASE_FIXED_FP(83);
-               CASE_FIXED_FP(84);
-               CASE_FIXED_FP(85);
-               CASE_FIXED_FP(86);
-               CASE_FIXED_FP(87);
-               CASE_FIXED_FP(88);
-               CASE_FIXED_FP(89);
-               CASE_FIXED_FP(90);
-               CASE_FIXED_FP(91);
-               CASE_FIXED_FP(92);
-               CASE_FIXED_FP(93);
-               CASE_FIXED_FP(94);
-               CASE_FIXED_FP(95);
-               CASE_FIXED_FP(96);
-               CASE_FIXED_FP(97);
-               CASE_FIXED_FP(98);
-               CASE_FIXED_FP(99);
-               CASE_FIXED_FP(100);
-               CASE_FIXED_FP(101);
-               CASE_FIXED_FP(102);
-               CASE_FIXED_FP(103);
-               CASE_FIXED_FP(104);
-               CASE_FIXED_FP(105);
-               CASE_FIXED_FP(106);
-               CASE_FIXED_FP(107);
-               CASE_FIXED_FP(108);
-               CASE_FIXED_FP(109);
-               CASE_FIXED_FP(110);
-               CASE_FIXED_FP(111);
-               CASE_FIXED_FP(112);
-               CASE_FIXED_FP(113);
-               CASE_FIXED_FP(114);
-               CASE_FIXED_FP(115);
-               CASE_FIXED_FP(116);
-               CASE_FIXED_FP(117);
-               CASE_FIXED_FP(118);
-               CASE_FIXED_FP(119);
-               CASE_FIXED_FP(120);
-               CASE_FIXED_FP(121);
-               CASE_FIXED_FP(122);
-               CASE_FIXED_FP(123);
-               CASE_FIXED_FP(124);
-               CASE_FIXED_FP(125);
-               CASE_FIXED_FP(126);
-               CASE_FIXED_FP(127);
-       }
-}
-
-void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-                                               struct ia64_fpreg *val)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-                                               struct ia64_fpreg *val)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       if (reg > 1)
-               setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-/*
- * The Altix RTC is mapped specially here for the vmm module
- */
-#define SN_RTC_BASE    (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
-static long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-       struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
-
-       if (kvm->arch.is_sn2)
-               return (*SN_RTC_BASE);
-       else
-#endif
-               return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-/************************************************************************
- * lsapic timer
- ***********************************************************************/
-u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
-{
-       unsigned long guest_itc;
-       guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
-
-       if (guest_itc >= VMX(vcpu, last_itc)) {
-               VMX(vcpu, last_itc) = guest_itc;
-               return  guest_itc;
-       } else
-               return VMX(vcpu, last_itc);
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
-static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
-{
-       struct kvm_vcpu *v;
-       struct kvm *kvm;
-       int i;
-       long itc_offset = val - kvm_get_itc(vcpu);
-       unsigned long vitv = VCPU(vcpu, itv);
-
-       kvm = (struct kvm *)KVM_VM_BASE;
-
-       if (kvm_vcpu_is_bsp(vcpu)) {
-               for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
-                       v = (struct kvm_vcpu *)((char *)vcpu +
-                                       sizeof(struct kvm_vcpu_data) * i);
-                       VMX(v, itc_offset) = itc_offset;
-                       VMX(v, last_itc) = 0;
-               }
-       }
-       VMX(vcpu, last_itc) = 0;
-       if (VCPU(vcpu, itm) <= val) {
-               VMX(vcpu, itc_check) = 0;
-               vcpu_unpend_interrupt(vcpu, vitv);
-       } else {
-               VMX(vcpu, itc_check) = 1;
-               vcpu_set_itm(vcpu, VCPU(vcpu, itm));
-       }
-
-}
-
-static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, itm));
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
-{
-       unsigned long vitv = VCPU(vcpu, itv);
-       VCPU(vcpu, itm) = val;
-
-       if (val > vcpu_get_itc(vcpu)) {
-               VMX(vcpu, itc_check) = 1;
-               vcpu_unpend_interrupt(vcpu, vitv);
-               VMX(vcpu, timer_pending) = 0;
-       } else
-               VMX(vcpu, itc_check) = 0;
-}
-
-#define  ITV_VECTOR(itv)    (itv&0xff)
-#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
-
-static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, itv) = val;
-       if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
-               vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
-               vcpu->arch.timer_pending = 0;
-       }
-}
-
-static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
-{
-       int vec;
-
-       vec = highest_inservice_irq(vcpu);
-       if (vec == NULL_VECTOR)
-               return;
-       VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
-       VCPU(vcpu, eoi) = 0;
-       vcpu->arch.irq_new_pending = 1;
-
-}
-
-/* See Table 5-8 in SDM vol2 for the definition */
-int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
-{
-       union ia64_tpr vtpr;
-
-       vtpr.val = VCPU(vcpu, tpr);
-
-       if (h_inservice == NMI_VECTOR)
-               return IRQ_MASKED_BY_INSVC;
-
-       if (h_pending == NMI_VECTOR) {
-               /* Non Maskable Interrupt */
-               return IRQ_NO_MASKED;
-       }
-
-       if (h_inservice == ExtINT_VECTOR)
-               return IRQ_MASKED_BY_INSVC;
-
-       if (h_pending == ExtINT_VECTOR) {
-               if (vtpr.mmi) {
-                       /* mask all external IRQ */
-                       return IRQ_MASKED_BY_VTPR;
-               } else
-                       return IRQ_NO_MASKED;
-       }
-
-       if (is_higher_irq(h_pending, h_inservice)) {
-               if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
-                       return IRQ_NO_MASKED;
-               else
-                       return IRQ_MASKED_BY_VTPR;
-       } else {
-               return IRQ_MASKED_BY_INSVC;
-       }
-}
-
-void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-       long spsr;
-       int ret;
-
-       local_irq_save(spsr);
-       ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
-       local_irq_restore(spsr);
-
-       vcpu->arch.irq_new_pending = 1;
-}
-
-void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-       long spsr;
-       int ret;
-
-       local_irq_save(spsr);
-       ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
-       local_irq_restore(spsr);
-       if (ret) {
-               vcpu->arch.irq_new_pending = 1;
-               wmb();
-       }
-}
-
-void update_vhpi(struct kvm_vcpu *vcpu, int vec)
-{
-       u64 vhpi;
-
-       if (vec == NULL_VECTOR)
-               vhpi = 0;
-       else if (vec == NMI_VECTOR)
-               vhpi = 32;
-       else if (vec == ExtINT_VECTOR)
-               vhpi = 16;
-       else
-               vhpi = vec >> 4;
-
-       VCPU(vcpu, vhpi) = vhpi;
-       if (VCPU(vcpu, vac).a_int)
-               ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
-                               (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
-}
-
-u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
-{
-       int vec, h_inservice, mask;
-
-       vec = highest_pending_irq(vcpu);
-       h_inservice = highest_inservice_irq(vcpu);
-       mask = irq_masked(vcpu, vec, h_inservice);
-       if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
-               if (VCPU(vcpu, vhpi))
-                       update_vhpi(vcpu, NULL_VECTOR);
-               return IA64_SPURIOUS_INT_VECTOR;
-       }
-       if (mask == IRQ_MASKED_BY_VTPR) {
-               update_vhpi(vcpu, vec);
-               return IA64_SPURIOUS_INT_VECTOR;
-       }
-       VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
-       vcpu_unpend_interrupt(vcpu, vec);
-       return  (u64)vec;
-}
-
-/**************************************************************************
-  Privileged operation emulation routines
- **************************************************************************/
-u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       union ia64_pta vpta;
-       union ia64_rr vrr;
-       u64 pval;
-       u64 vhpt_offset;
-
-       vpta.val = vcpu_get_pta(vcpu);
-       vrr.val = vcpu_get_rr(vcpu, vadr);
-       vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
-       if (vpta.vf) {
-               pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
-                               vpta.val, 0, 0, 0, 0);
-       } else {
-               pval = (vadr & VRN_MASK) | vhpt_offset |
-                       (vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
-       }
-       return  pval;
-}
-
-u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       union ia64_rr vrr;
-       union ia64_pta vpta;
-       u64 pval;
-
-       vpta.val = vcpu_get_pta(vcpu);
-       vrr.val = vcpu_get_rr(vcpu, vadr);
-       if (vpta.vf) {
-               pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
-                                               0, 0, 0, 0, 0);
-       } else
-               pval = 1;
-
-       return  pval;
-}
-
-u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       struct thash_data *data;
-       union ia64_pta vpta;
-       u64 key;
-
-       vpta.val = vcpu_get_pta(vcpu);
-       if (vpta.vf == 0) {
-               key = 1;
-               return key;
-       }
-       data = vtlb_lookup(vcpu, vadr, D_TLB);
-       if (!data || !data->p)
-               key = 1;
-       else
-               key = data->key;
-
-       return key;
-}
-
-void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long thash, vadr;
-
-       vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-       thash = vcpu_thash(vcpu, vadr);
-       vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
-}
-
-void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long tag, vadr;
-
-       vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-       tag = vcpu_ttag(vcpu, vadr);
-       vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
-}
-
-int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
-{
-       struct thash_data *data;
-       union ia64_isr visr, pt_isr;
-       struct kvm_pt_regs *regs;
-       struct ia64_psr vpsr;
-
-       regs = vcpu_regs(vcpu);
-       pt_isr.val = VMX(vcpu, cr_isr);
-       visr.val = 0;
-       visr.ei = pt_isr.ei;
-       visr.ir = pt_isr.ir;
-       vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-       visr.na = 1;
-
-       data = vhpt_lookup(vadr);
-       if (data) {
-               if (data->p == 0) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       data_page_not_present(vcpu, vadr);
-                       return IA64_FAULT;
-               } else if (data->ma == VA_MATTR_NATPAGE) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       dnat_page_consumption(vcpu, vadr);
-                       return IA64_FAULT;
-               } else {
-                       *padr = (data->gpaddr >> data->ps << data->ps) |
-                               (vadr & (PSIZE(data->ps) - 1));
-                       return IA64_NO_FAULT;
-               }
-       }
-
-       data = vtlb_lookup(vcpu, vadr, D_TLB);
-       if (data) {
-               if (data->p == 0) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       data_page_not_present(vcpu, vadr);
-                       return IA64_FAULT;
-               } else if (data->ma == VA_MATTR_NATPAGE) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       dnat_page_consumption(vcpu, vadr);
-                       return IA64_FAULT;
-               } else{
-                       *padr = ((data->ppn >> (data->ps - 12)) << data->ps)
-                               | (vadr & (PSIZE(data->ps) - 1));
-                       return IA64_NO_FAULT;
-               }
-       }
-       if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
-               if (vpsr.ic) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       alt_dtlb(vcpu, vadr);
-                       return IA64_FAULT;
-               } else {
-                       nested_dtlb(vcpu);
-                       return IA64_FAULT;
-               }
-       } else {
-               if (vpsr.ic) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       dvhpt_fault(vcpu, vadr);
-                       return IA64_FAULT;
-               } else{
-                       nested_dtlb(vcpu);
-                       return IA64_FAULT;
-               }
-       }
-
-       return IA64_NO_FAULT;
-}
-
-int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r1, r3;
-
-       r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-
-       if (vcpu_tpa(vcpu, r3, &r1))
-               return IA64_FAULT;
-
-       vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-       return(IA64_NO_FAULT);
-}
-
-void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r1, r3;
-
-       r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-       r1 = vcpu_tak(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-}
-
-/************************************
- * Insert/Purge translation register/cache
- ************************************/
-void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-       thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
-}
-
-void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-       thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
-}
-
-void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-       u64 ps, va, rid;
-       struct thash_data *p_itr;
-
-       ps = itir_ps(itir);
-       va = PAGEALIGN(ifa, ps);
-       pte &= ~PAGE_FLAGS_RV_MASK;
-       rid = vcpu_get_rr(vcpu, ifa);
-       rid = rid & RR_RID_MASK;
-       p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
-       vcpu_set_tr(p_itr, pte, itir, va, rid);
-       vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
-}
-
-
-void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-       u64 gpfn;
-       u64 ps, va, rid;
-       struct thash_data *p_dtr;
-
-       ps = itir_ps(itir);
-       va = PAGEALIGN(ifa, ps);
-       pte &= ~PAGE_FLAGS_RV_MASK;
-
-       if (ps != _PAGE_SIZE_16M)
-               thash_purge_entries(vcpu, va, ps);
-       gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-       if (__gpfn_is_io(gpfn))
-               pte |= VTLB_PTE_IO;
-       rid = vcpu_get_rr(vcpu, va);
-       rid = rid & RR_RID_MASK;
-       p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
-       vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
-                                                       pte, itir, va, rid);
-       vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
-}
-
-void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-       int index;
-       u64 va;
-
-       va = PAGEALIGN(ifa, ps);
-       while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
-               vcpu->arch.dtrs[index].page_flags = 0;
-
-       thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-       int index;
-       u64 va;
-
-       va = PAGEALIGN(ifa, ps);
-       while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
-               vcpu->arch.itrs[index].page_flags = 0;
-
-       thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-       va = PAGEALIGN(va, ps);
-       thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
-{
-       thash_purge_all(vcpu);
-}
-
-void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       long psr;
-       local_irq_save(psr);
-       p->exit_reason = EXIT_REASON_PTC_G;
-
-       p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
-       p->u.ptc_g_data.vaddr = va;
-       p->u.ptc_g_data.ps = ps;
-       vmm_transition(vcpu);
-       /* Do Local Purge Here*/
-       vcpu_ptc_l(vcpu, va, ps);
-       local_irq_restore(psr);
-}
-
-
-void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-       vcpu_ptc_ga(vcpu, va, ps);
-}
-
-void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       vcpu_ptc_e(vcpu, ifa);
-}
-
-void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long itir, ifa, pte, slot;
-
-       slot = vcpu_get_gr(vcpu, inst.M45.r3);
-       pte = vcpu_get_gr(vcpu, inst.M45.r2);
-       itir = vcpu_get_itir(vcpu);
-       ifa = vcpu_get_ifa(vcpu);
-       vcpu_itr_d(vcpu, slot, pte, itir, ifa);
-}
-
-
-
-void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long itir, ifa, pte, slot;
-
-       slot = vcpu_get_gr(vcpu, inst.M45.r3);
-       pte = vcpu_get_gr(vcpu, inst.M45.r2);
-       itir = vcpu_get_itir(vcpu);
-       ifa = vcpu_get_ifa(vcpu);
-       vcpu_itr_i(vcpu, slot, pte, itir, ifa);
-}
-
-void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long itir, ifa, pte;
-
-       itir = vcpu_get_itir(vcpu);
-       ifa = vcpu_get_ifa(vcpu);
-       pte = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_itc_d(vcpu, pte, itir, ifa);
-}
-
-void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long itir, ifa, pte;
-
-       itir = vcpu_get_itir(vcpu);
-       ifa = vcpu_get_ifa(vcpu);
-       pte = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_itc_i(vcpu, pte, itir, ifa);
-}
-
-/*************************************
- * Moves to semi-privileged registers
- *************************************/
-
-void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long imm;
-
-       if (inst.M30.s)
-               imm = -inst.M30.imm;
-       else
-               imm = inst.M30.imm;
-
-       vcpu_set_itc(vcpu, imm);
-}
-
-void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r2;
-
-       r2 = vcpu_get_gr(vcpu, inst.M29.r2);
-       vcpu_set_itc(vcpu, r2);
-}
-
-void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r1;
-
-       r1 = vcpu_get_itc(vcpu);
-       vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
-}
-
-/**************************************************************************
-  struct kvm_vcpu protection key register access routines
- **************************************************************************/
-
-unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-       return ((unsigned long)ia64_get_pkr(reg));
-}
-
-void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
-{
-       ia64_set_pkr(reg, val);
-}
-
-/********************************
- * Moves to privileged registers
- ********************************/
-unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
-                                       unsigned long val)
-{
-       union ia64_rr oldrr, newrr;
-       unsigned long rrval;
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       unsigned long psr;
-
-       oldrr.val = vcpu_get_rr(vcpu, reg);
-       newrr.val = val;
-       vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
-
-       switch ((unsigned long)(reg >> VRN_SHIFT)) {
-       case VRN6:
-               vcpu->arch.vmm_rr = vrrtomrr(val);
-               local_irq_save(psr);
-               p->exit_reason = EXIT_REASON_SWITCH_RR6;
-               vmm_transition(vcpu);
-               local_irq_restore(psr);
-               break;
-       case VRN4:
-               rrval = vrrtomrr(val);
-               vcpu->arch.metaphysical_saved_rr4 = rrval;
-               if (!is_physical_mode(vcpu))
-                       ia64_set_rr(reg, rrval);
-               break;
-       case VRN0:
-               rrval = vrrtomrr(val);
-               vcpu->arch.metaphysical_saved_rr0 = rrval;
-               if (!is_physical_mode(vcpu))
-                       ia64_set_rr(reg, rrval);
-               break;
-       default:
-               ia64_set_rr(reg, vrrtomrr(val));
-               break;
-       }
-
-       return (IA64_NO_FAULT);
-}
-
-void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r2;
-
-       r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-       r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-       vcpu_set_rr(vcpu, r3, r2);
-}
-
-void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r2;
-
-       r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-       r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-       vcpu_set_pmc(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r2;
-
-       r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-       r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-       vcpu_set_pmd(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       u64 r3, r2;
-
-       r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-       r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-       vcpu_set_pkr(vcpu, r3, r2);
-}
-
-void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_rr(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_pkr(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_dbr(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_ibr(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_pmc(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-       /* FIXME: This could get called as a result of a rsvd-reg fault */
-       if (reg > (ia64_get_cpuid(3) & 0xff))
-               return 0;
-       else
-               return ia64_get_cpuid(reg);
-}
-
-void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_cpuid(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-       VCPU(vcpu, tpr) = val;
-       vcpu->arch.irq_check = 1;
-}
-
-unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r2;
-
-       r2 = vcpu_get_gr(vcpu, inst.M32.r2);
-       VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
-
-       switch (inst.M32.cr3) {
-       case 0:
-               vcpu_set_dcr(vcpu, r2);
-               break;
-       case 1:
-               vcpu_set_itm(vcpu, r2);
-               break;
-       case 66:
-               vcpu_set_tpr(vcpu, r2);
-               break;
-       case 67:
-               vcpu_set_eoi(vcpu, r2);
-               break;
-       default:
-               break;
-       }
-
-       return 0;
-}
-
-unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long tgt = inst.M33.r1;
-       unsigned long val;
-
-       switch (inst.M33.cr3) {
-       case 65:
-               val = vcpu_get_ivr(vcpu);
-               vcpu_set_gr(vcpu, tgt, val, 0);
-               break;
-
-       case 67:
-               vcpu_set_gr(vcpu, tgt, 0L, 0);
-               break;
-       default:
-               val = VCPU(vcpu, vcr[inst.M33.cr3]);
-               vcpu_set_gr(vcpu, tgt, val, 0);
-               break;
-       }
-
-       return 0;
-}
-
-void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-
-       unsigned long mask;
-       struct kvm_pt_regs *regs;
-       struct ia64_psr old_psr, new_psr;
-
-       old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-       regs = vcpu_regs(vcpu);
-       /* We only support guest as:
-        *  vpsr.pk = 0
-        *  vpsr.is = 0
-        * Otherwise panic
-        */
-       if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-               panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
-                               "& vpsr.is=0\n");
-
-       /*
-        * For those IA64_PSR bits: id/da/dd/ss/ed/ia
-        * Since these bits will become 0, after success execution of each
-        * instruction, we will change set them to mIA64_PSR
-        */
-       VCPU(vcpu, vpsr) = val
-               & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
-                       IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
-
-       if (!old_psr.i && (val & IA64_PSR_I)) {
-               /* vpsr.i 0->1 */
-               vcpu->arch.irq_check = 1;
-       }
-       new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-       /*
-        * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
-        * , except for the following bits:
-        *  ic/i/dt/si/rt/mc/it/bn/vm
-        */
-       mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
-               IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
-               IA64_PSR_VM;
-
-       regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
-
-       check_mm_mode_switch(vcpu, old_psr, new_psr);
-
-       return ;
-}
-
-unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
-{
-       struct ia64_psr vpsr;
-
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-       if (!vpsr.ic)
-               VCPU(vcpu, ifs) = regs->cr_ifs;
-       regs->cr_ifs = IA64_IFS_V;
-       return (IA64_NO_FAULT);
-}
-
-
-
-/**************************************************************************
-  VCPU banked general register access routines
- **************************************************************************/
-#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
-       do {                                                            \
-               __asm__ __volatile__ (                                  \
-                               ";;extr.u %0 = %3,%6,16;;\n"            \
-                               "dep %1 = %0, %1, 0, 16;;\n"            \
-                               "st8 [%4] = %1\n"                       \
-                               "extr.u %0 = %2, 16, 16;;\n"            \
-                               "dep %3 = %0, %3, %6, 16;;\n"           \
-                               "st8 [%5] = %3\n"                       \
-                               ::"r"(i), "r"(*b1unat), "r"(*b0unat),   \
-                               "r"(*runat), "r"(b1unat), "r"(runat),   \
-                               "i"(VMM_PT_REGS_R16_SLOT) : "memory");  \
-       } while (0)
-
-void vcpu_bsw0(struct kvm_vcpu *vcpu)
-{
-       unsigned long i;
-
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       unsigned long *r = &regs->r16;
-       unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-       unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-       unsigned long *runat = &regs->eml_unat;
-       unsigned long *b0unat = &VCPU(vcpu, vbnat);
-       unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-
-       if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
-               for (i = 0; i < 16; i++) {
-                       *b1++ = *r;
-                       *r++ = *b0++;
-               }
-               vcpu_bsw0_unat(i, b0unat, b1unat, runat,
-                               VMM_PT_REGS_R16_SLOT);
-               VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
-       }
-}
-
-#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
-       do {                                                            \
-               __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"    \
-                               "dep %1 = %0, %1, 16, 16;;\n"           \
-                               "st8 [%4] = %1\n"                       \
-                               "extr.u %0 = %2, 0, 16;;\n"             \
-                               "dep %3 = %0, %3, %6, 16;;\n"           \
-                               "st8 [%5] = %3\n"                       \
-                               ::"r"(i), "r"(*b0unat), "r"(*b1unat),   \
-                               "r"(*runat), "r"(b0unat), "r"(runat),   \
-                               "i"(VMM_PT_REGS_R16_SLOT) : "memory");  \
-       } while (0)
-
-void vcpu_bsw1(struct kvm_vcpu *vcpu)
-{
-       unsigned long i;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       unsigned long *r = &regs->r16;
-       unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-       unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-       unsigned long *runat = &regs->eml_unat;
-       unsigned long *b0unat = &VCPU(vcpu, vbnat);
-       unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-       if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
-               for (i = 0; i < 16; i++) {
-                       *b0++ = *r;
-                       *r++ = *b1++;
-               }
-               vcpu_bsw1_unat(i, b0unat, b1unat, runat,
-                               VMM_PT_REGS_R16_SLOT);
-               VCPU(vcpu, vpsr) |= IA64_PSR_BN;
-       }
-}
-
-void vcpu_rfi(struct kvm_vcpu *vcpu)
-{
-       unsigned long ifs, psr;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       psr = VCPU(vcpu, ipsr);
-       if (psr & IA64_PSR_BN)
-               vcpu_bsw1(vcpu);
-       else
-               vcpu_bsw0(vcpu);
-       vcpu_set_psr(vcpu, psr);
-       ifs = VCPU(vcpu, ifs);
-       if (ifs >> 63)
-               regs->cr_ifs = ifs;
-       regs->cr_iip = VCPU(vcpu, iip);
-}
-
-/*
-   VPSR can't keep track of below bits of guest PSR
-   This function gets guest PSR
- */
-
-unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
-{
-       unsigned long mask;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
-               IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
-       return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
-}
-
-void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long vpsr;
-       unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
-                                       | inst.M44.imm;
-
-       vpsr = vcpu_get_psr(vcpu);
-       vpsr &= (~imm24);
-       vcpu_set_psr(vcpu, vpsr);
-}
-
-void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long vpsr;
-       unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
-                               | inst.M44.imm;
-
-       vpsr = vcpu_get_psr(vcpu);
-       vpsr |= imm24;
-       vcpu_set_psr(vcpu, vpsr);
-}
-
-/* Generate Mask
- * Parameter:
- *  bit -- starting bit
- *  len -- how many bits
- */
-#define MASK(bit,len)                                  \
-({                                                     \
-               __u64   ret;                            \
-                                                       \
-               __asm __volatile("dep %0=-1, r0, %1, %2"\
-                               : "=r" (ret):           \
-                 "M" (bit),                            \
-                 "M" (len));                           \
-               ret;                                    \
-})
-
-void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
-{
-       val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
-       vcpu_set_psr(vcpu, val);
-}
-
-void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long val;
-
-       val = vcpu_get_gr(vcpu, inst.M35.r2);
-       vcpu_set_psr_l(vcpu, val);
-}
-
-void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long val;
-
-       val = vcpu_get_psr(vcpu);
-       val = (val & MASK(0, 32)) | (val & MASK(35, 2));
-       vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
-}
-
-void vcpu_increment_iip(struct kvm_vcpu *vcpu)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-       if (ipsr->ri == 2) {
-               ipsr->ri = 0;
-               regs->cr_iip += 16;
-       } else
-               ipsr->ri++;
-}
-
-void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-
-       if (ipsr->ri == 0) {
-               ipsr->ri = 2;
-               regs->cr_iip -= 16;
-       } else
-               ipsr->ri--;
-}
-
-/** Emulate a privileged operation.
- *
- *
- * @param vcpu virtual cpu
- * @cause the reason cause virtualization fault
- * @opcode the instruction code which cause virtualization fault
- */
-
-void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
-{
-       unsigned long status, cause, opcode ;
-       INST64 inst;
-
-       status = IA64_NO_FAULT;
-       cause = VMX(vcpu, cause);
-       opcode = VMX(vcpu, opcode);
-       inst.inst = opcode;
-       /*
-        * Switch to actual virtual rid in rr0 and rr4,
-        * which is required by some tlb related instructions.
-        */
-       prepare_if_physical_mode(vcpu);
-
-       switch (cause) {
-       case EVENT_RSM:
-               kvm_rsm(vcpu, inst);
-               break;
-       case EVENT_SSM:
-               kvm_ssm(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_PSR:
-               kvm_mov_to_psr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_PSR:
-               kvm_mov_from_psr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_CR:
-               kvm_mov_from_cr(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_CR:
-               kvm_mov_to_cr(vcpu, inst);
-               break;
-       case EVENT_BSW_0:
-               vcpu_bsw0(vcpu);
-               break;
-       case EVENT_BSW_1:
-               vcpu_bsw1(vcpu);
-               break;
-       case EVENT_COVER:
-               vcpu_cover(vcpu);
-               break;
-       case EVENT_RFI:
-               vcpu_rfi(vcpu);
-               break;
-       case EVENT_ITR_D:
-               kvm_itr_d(vcpu, inst);
-               break;
-       case EVENT_ITR_I:
-               kvm_itr_i(vcpu, inst);
-               break;
-       case EVENT_PTR_D:
-               kvm_ptr_d(vcpu, inst);
-               break;
-       case EVENT_PTR_I:
-               kvm_ptr_i(vcpu, inst);
-               break;
-       case EVENT_ITC_D:
-               kvm_itc_d(vcpu, inst);
-               break;
-       case EVENT_ITC_I:
-               kvm_itc_i(vcpu, inst);
-               break;
-       case EVENT_PTC_L:
-               kvm_ptc_l(vcpu, inst);
-               break;
-       case EVENT_PTC_G:
-               kvm_ptc_g(vcpu, inst);
-               break;
-       case EVENT_PTC_GA:
-               kvm_ptc_ga(vcpu, inst);
-               break;
-       case EVENT_PTC_E:
-               kvm_ptc_e(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_RR:
-               kvm_mov_to_rr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_RR:
-               kvm_mov_from_rr(vcpu, inst);
-               break;
-       case EVENT_THASH:
-               kvm_thash(vcpu, inst);
-               break;
-       case EVENT_TTAG:
-               kvm_ttag(vcpu, inst);
-               break;
-       case EVENT_TPA:
-               status = kvm_tpa(vcpu, inst);
-               break;
-       case EVENT_TAK:
-               kvm_tak(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_AR_IMM:
-               kvm_mov_to_ar_imm(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_AR:
-               kvm_mov_to_ar_reg(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_AR:
-               kvm_mov_from_ar_reg(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_DBR:
-               kvm_mov_to_dbr(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_IBR:
-               kvm_mov_to_ibr(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_PMC:
-               kvm_mov_to_pmc(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_PMD:
-               kvm_mov_to_pmd(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_PKR:
-               kvm_mov_to_pkr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_DBR:
-               kvm_mov_from_dbr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_IBR:
-               kvm_mov_from_ibr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_PMC:
-               kvm_mov_from_pmc(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_PKR:
-               kvm_mov_from_pkr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_CPUID:
-               kvm_mov_from_cpuid(vcpu, inst);
-               break;
-       case EVENT_VMSW:
-               status = IA64_FAULT;
-               break;
-       default:
-               break;
-       };
-       /*Assume all status is NO_FAULT ?*/
-       if (status == IA64_NO_FAULT && cause != EVENT_RFI)
-               vcpu_increment_iip(vcpu);
-
-       recover_if_physical_mode(vcpu);
-}
-
-void init_vcpu(struct kvm_vcpu *vcpu)
-{
-       int i;
-
-       vcpu->arch.mode_flags = GUEST_IN_PHY;
-       VMX(vcpu, vrr[0]) = 0x38;
-       VMX(vcpu, vrr[1]) = 0x38;
-       VMX(vcpu, vrr[2]) = 0x38;
-       VMX(vcpu, vrr[3]) = 0x38;
-       VMX(vcpu, vrr[4]) = 0x38;
-       VMX(vcpu, vrr[5]) = 0x38;
-       VMX(vcpu, vrr[6]) = 0x38;
-       VMX(vcpu, vrr[7]) = 0x38;
-       VCPU(vcpu, vpsr) = IA64_PSR_BN;
-       VCPU(vcpu, dcr) = 0;
-       /* pta.size must not be 0.  The minimum is 15 (32k) */
-       VCPU(vcpu, pta) = 15 << 2;
-       VCPU(vcpu, itv) = 0x10000;
-       VCPU(vcpu, itm) = 0;
-       VMX(vcpu, last_itc) = 0;
-
-       VCPU(vcpu, lid) = VCPU_LID(vcpu);
-       VCPU(vcpu, ivr) = 0;
-       VCPU(vcpu, tpr) = 0x10000;
-       VCPU(vcpu, eoi) = 0;
-       VCPU(vcpu, irr[0]) = 0;
-       VCPU(vcpu, irr[1]) = 0;
-       VCPU(vcpu, irr[2]) = 0;
-       VCPU(vcpu, irr[3]) = 0;
-       VCPU(vcpu, pmv) = 0x10000;
-       VCPU(vcpu, cmcv) = 0x10000;
-       VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
-       VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
-       update_vhpi(vcpu, NULL_VECTOR);
-       VLSAPIC_XTP(vcpu) = 0x80;       /* disabled */
-
-       for (i = 0; i < 4; i++)
-               VLSAPIC_INSVC(vcpu, i) = 0;
-}
-
-void kvm_init_all_rr(struct kvm_vcpu *vcpu)
-{
-       unsigned long psr;
-
-       local_irq_save(psr);
-
-       /* WARNING: not allow co-exist of both virtual mode and physical
-        * mode in same region
-        */
-
-       vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
-       vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
-
-       if (is_physical_mode(vcpu)) {
-               if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-                       panic_vm(vcpu, "Machine Status conflicts!\n");
-
-               ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
-               ia64_dv_serialize_data();
-               ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
-               ia64_dv_serialize_data();
-       } else {
-               ia64_set_rr((VRN0 << VRN_SHIFT),
-                               vcpu->arch.metaphysical_saved_rr0);
-               ia64_dv_serialize_data();
-               ia64_set_rr((VRN4 << VRN_SHIFT),
-                               vcpu->arch.metaphysical_saved_rr4);
-               ia64_dv_serialize_data();
-       }
-       ia64_set_rr((VRN1 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN1])));
-       ia64_dv_serialize_data();
-       ia64_set_rr((VRN2 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN2])));
-       ia64_dv_serialize_data();
-       ia64_set_rr((VRN3 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN3])));
-       ia64_dv_serialize_data();
-       ia64_set_rr((VRN5 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN5])));
-       ia64_dv_serialize_data();
-       ia64_set_rr((VRN7 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN7])));
-       ia64_dv_serialize_data();
-       ia64_srlz_d();
-       ia64_set_psr(psr);
-}
-
-int vmm_entry(void)
-{
-       struct kvm_vcpu *v;
-       v = current_vcpu;
-
-       ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
-                                               0, 0, 0, 0, 0, 0);
-       kvm_init_vtlb(v);
-       kvm_init_vhpt(v);
-       init_vcpu(v);
-       kvm_init_all_rr(v);
-       vmm_reset_entry();
-
-       return 0;
-}
-
-static void kvm_show_registers(struct kvm_pt_regs *regs)
-{
-       unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
-
-       struct kvm_vcpu *vcpu = current_vcpu;
-       if (vcpu != NULL)
-               printk("vcpu 0x%p vcpu %d\n",
-                      vcpu, vcpu->vcpu_id);
-
-       printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
-              regs->cr_ipsr, regs->cr_ifs, ip);
-
-       printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
-              regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
-       printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
-              regs->ar_rnat, regs->ar_bspstore, regs->pr);
-       printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
-              regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
-       printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
-       printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
-                                                       regs->b6, regs->b7);
-       printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
-              regs->f6.u.bits[1], regs->f6.u.bits[0],
-              regs->f7.u.bits[1], regs->f7.u.bits[0]);
-       printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
-              regs->f8.u.bits[1], regs->f8.u.bits[0],
-              regs->f9.u.bits[1], regs->f9.u.bits[0]);
-       printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
-              regs->f10.u.bits[1], regs->f10.u.bits[0],
-              regs->f11.u.bits[1], regs->f11.u.bits[0]);
-
-       printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
-                                                       regs->r2, regs->r3);
-       printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
-                                                       regs->r9, regs->r10);
-       printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
-                                                       regs->r12, regs->r13);
-       printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
-                                                       regs->r15, regs->r16);
-       printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
-                                                       regs->r18, regs->r19);
-       printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
-                                                       regs->r21, regs->r22);
-       printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
-                                                       regs->r24, regs->r25);
-       printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
-                                                       regs->r27, regs->r28);
-       printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
-                                                       regs->r30, regs->r31);
-
-}
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
-{
-       va_list args;
-       char buf[256];
-
-       struct kvm_pt_regs *regs = vcpu_regs(v);
-       struct exit_ctl_data *p = &v->arch.exit_data;
-       va_start(args, fmt);
-       vsnprintf(buf, sizeof(buf), fmt, args);
-       va_end(args);
-       printk(buf);
-       kvm_show_registers(regs);
-       p->exit_reason = EXIT_REASON_VM_PANIC;
-       vmm_transition(v);
-       /*Never to return*/
-       while (1);
-}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h

deleted file mode 100644 (file)

index 988911b..0000000
--- a/arch/ia64/kvm/vcpu.h
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- *  vcpu.h: vcpu routines
- *     Copyright (c) 2005, Intel Corporation.
- *     Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *     Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *
- *     Copyright (c) 2007, Intel Corporation.
- *     Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *     Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#ifndef __KVM_VCPU_H__
-#define __KVM_VCPU_H__
-
-#include <asm/types.h>
-#include <asm/fpu.h>
-#include <asm/processor.h>
-
-#ifndef __ASSEMBLY__
-#include "vti.h"
-
-#include <linux/kvm_host.h>
-#include <linux/spinlock.h>
-
-typedef unsigned long IA64_INST;
-
-typedef union U_IA64_BUNDLE {
-       unsigned long i64[2];
-       struct { unsigned long template:5, slot0:41, slot1a:18,
-               slot1b:23, slot2:41; };
-       /* NOTE: following doesn't work because bitfields can't cross natural
-          size boundaries
-          struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
-} IA64_BUNDLE;
-
-typedef union U_INST64_A5 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
-               imm9d:9, s:1, major:4; };
-} INST64_A5;
-
-typedef union U_INST64_B4 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
-               wh:2, d:1, un1:1, major:4; };
-} INST64_B4;
-
-typedef union U_INST64_B8 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
-} INST64_B8;
-
-typedef union U_INST64_B9 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
-} INST64_B9;
-
-typedef union U_INST64_I19 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
-} INST64_I19;
-
-typedef union U_INST64_I26 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I26;
-
-typedef union U_INST64_I27 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
-} INST64_I27;
-
-typedef union U_INST64_I28 { /* not privileged (mov from AR) */
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I28;
-
-typedef union U_INST64_M28 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M28;
-
-typedef union U_INST64_M29 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M29;
-
-typedef union U_INST64_M30 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
-               x3:3, s:1, major:4; };
-} INST64_M30;
-
-typedef union U_INST64_M31 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M31;
-
-typedef union U_INST64_M32 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M32;
-
-typedef union U_INST64_M33 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M33;
-
-typedef union U_INST64_M35 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-
-} INST64_M35;
-
-typedef union U_INST64_M36 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
-} INST64_M36;
-
-typedef union U_INST64_M37 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
-               i:1, major:4; };
-} INST64_M37;
-
-typedef union U_INST64_M41 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-} INST64_M41;
-
-typedef union U_INST64_M42 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M42;
-
-typedef union U_INST64_M43 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M43;
-
-typedef union U_INST64_M44 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
-} INST64_M44;
-
-typedef union U_INST64_M45 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M45;
-
-typedef union U_INST64_M46 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
-               x3:3, un1:1, major:4; };
-} INST64_M46;
-
-typedef union U_INST64_M47 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
-} INST64_M47;
-
-typedef union U_INST64_M1{
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M1;
-
-typedef union U_INST64_M2{
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M2;
-
-typedef union U_INST64_M3{
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
-               x6:6, s:1, major:4; };
-} INST64_M3;
-
-typedef union U_INST64_M4 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M4;
-
-typedef union U_INST64_M5 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
-               x6:6, s:1, major:4; };
-} INST64_M5;
-
-typedef union U_INST64_M6 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M6;
-
-typedef union U_INST64_M9 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M9;
-
-typedef union U_INST64_M10 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
-               x6:6, s:1, major:4; };
-} INST64_M10;
-
-typedef union U_INST64_M12 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M12;
-
-typedef union U_INST64_M15 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
-               x6:6, s:1, major:4; };
-} INST64_M15;
-
-typedef union U_INST64 {
-       IA64_INST inst;
-       struct { unsigned long :37, major:4; } generic;
-       INST64_A5 A5;   /* used in build_hypercall_bundle only */
-       INST64_B4 B4;   /* used in build_hypercall_bundle only */
-       INST64_B8 B8;   /* rfi, bsw.[01] */
-       INST64_B9 B9;   /* break.b */
-       INST64_I19 I19; /* used in build_hypercall_bundle only */
-       INST64_I26 I26; /* mov register to ar (I unit) */
-       INST64_I27 I27; /* mov immediate to ar (I unit) */
-       INST64_I28 I28; /* mov from ar (I unit) */
-       INST64_M1  M1;  /* ld integer */
-       INST64_M2  M2;
-       INST64_M3  M3;
-       INST64_M4  M4;  /* st integer */
-       INST64_M5  M5;
-       INST64_M6  M6;  /* ldfd floating pointer                */
-       INST64_M9  M9;  /* stfd floating pointer                */
-       INST64_M10 M10; /* stfd floating pointer                */
-       INST64_M12 M12;     /* ldfd pair floating pointer               */
-       INST64_M15 M15; /* lfetch + imm update                  */
-       INST64_M28 M28; /* purge translation cache entry        */
-       INST64_M29 M29; /* mov register to ar (M unit)          */
-       INST64_M30 M30; /* mov immediate to ar (M unit)         */
-       INST64_M31 M31; /* mov from ar (M unit)                 */
-       INST64_M32 M32; /* mov reg to cr                        */
-       INST64_M33 M33; /* mov from cr                          */
-       INST64_M35 M35; /* mov to psr                           */
-       INST64_M36 M36; /* mov from psr                         */
-       INST64_M37 M37; /* break.m                              */
-       INST64_M41 M41; /* translation cache insert             */
-       INST64_M42 M42; /* mov to indirect reg/translation reg insert*/
-       INST64_M43 M43; /* mov from indirect reg                */
-       INST64_M44 M44; /* set/reset system mask                */
-       INST64_M45 M45; /* translation purge                    */
-       INST64_M46 M46; /* translation access (tpa,tak)         */
-       INST64_M47 M47; /* purge translation entry              */
-} INST64;
-
-#define MASK_41 ((unsigned long)0x1ffffffffff)
-
-/* Virtual address memory attributes encoding */
-#define VA_MATTR_WB         0x0
-#define VA_MATTR_UC         0x4
-#define VA_MATTR_UCE        0x5
-#define VA_MATTR_WC         0x6
-#define VA_MATTR_NATPAGE    0x7
-
-#define PMASK(size)         (~((size) - 1))
-#define PSIZE(size)         (1UL<<(size))
-#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
-#define PAGEALIGN(va, ps)      CLEARLSB(va, ps)
-#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
-#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
-
-#define ARCH_PAGE_SHIFT   12
-
-#define INVALID_TI_TAG (1UL << 63)
-
-#define VTLB_PTE_P_BIT      0
-#define VTLB_PTE_IO_BIT     60
-#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
-#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
-
-#define vcpu_quick_region_check(_tr_regions,_ifa)              \
-       (_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
-
-#define vcpu_quick_region_set(_tr_regions,_ifa)             \
-       do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
-
-static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
-               u64 va, u64 rid)
-{
-       trp->page_flags = pte;
-       trp->itir = itir;
-       trp->vadr = va;
-       trp->rid = rid;
-}
-
-extern u64 kvm_get_mpt_entry(u64 gpfn);
-
-/* Return I/ */
-static inline u64 __gpfn_is_io(u64 gpfn)
-{
-       u64  pte;
-       pte = kvm_get_mpt_entry(gpfn);
-       if (!(pte & GPFN_INV_MASK)) {
-               pte = pte & GPFN_IO_MASK;
-               if (pte != GPFN_PHYS_MMIO)
-                       return pte;
-       }
-       return 0;
-}
-#endif
-#define IA64_NO_FAULT  0
-#define IA64_FAULT     1
-
-#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
-
-#define SW_BAD  0   /* Bad mode transitition */
-#define SW_V2P  1   /* Physical emulatino is activated */
-#define SW_P2V  2   /* Exit physical mode emulation */
-#define SW_SELF 3   /* No mode transition */
-#define SW_NOP  4   /* Mode transition, but without action required */
-
-#define GUEST_IN_PHY    0x1
-#define GUEST_PHY_EMUL  0x2
-
-#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
-
-#define VRN_SHIFT      61
-#define VRN_MASK       0xe000000000000000
-#define VRN0           0x0UL
-#define VRN1           0x1UL
-#define VRN2           0x2UL
-#define VRN3           0x3UL
-#define VRN4           0x4UL
-#define VRN5           0x5UL
-#define VRN6           0x6UL
-#define VRN7           0x7UL
-
-#define IRQ_NO_MASKED         0
-#define IRQ_MASKED_BY_VTPR    1
-#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
-
-#define PTA_BASE_SHIFT      15
-
-#define IA64_PSR_VM_BIT     46
-#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
-
-/* Interruption Function State */
-#define IA64_IFS_V_BIT      63
-#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
-
-#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
-#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
-
-#ifndef __ASSEMBLY__
-
-#include <asm/gcc_intrin.h>
-
-#define is_physical_mode(v)            \
-       ((v->arch.mode_flags) & GUEST_IN_PHY)
-
-#define is_virtual_mode(v)     \
-       (!is_physical_mode(v))
-
-#define MODE_IND(psr)  \
-       (((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
-
-#ifndef CONFIG_SMP
-#define _vmm_raw_spin_lock(x)   do {}while(0)
-#define _vmm_raw_spin_unlock(x) do {}while(0)
-#else
-typedef struct {
-       volatile unsigned int lock;
-} vmm_spinlock_t;
-#define _vmm_raw_spin_lock(x)                                          \
-       do {                                                            \
-               __u32 *ia64_spinlock_ptr = (__u32 *) (x);               \
-               __u64 ia64_spinlock_val;                                \
-               ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-               if (unlikely(ia64_spinlock_val)) {                      \
-                       do {                                            \
-                               while (*ia64_spinlock_ptr)              \
-                               ia64_barrier();                         \
-                               ia64_spinlock_val =                     \
-                               ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-                       } while (ia64_spinlock_val);                    \
-               }                                                       \
-       } while (0)
-
-#define _vmm_raw_spin_unlock(x)                                \
-       do { barrier();                         \
-               ((vmm_spinlock_t *)x)->lock = 0; } \
-while (0)
-#endif
-
-void vmm_spin_lock(vmm_spinlock_t *lock);
-void vmm_spin_unlock(vmm_spinlock_t *lock);
-enum {
-       I_TLB = 1,
-       D_TLB = 2
-};
-
-union kvm_va {
-       struct {
-               unsigned long off : 60;         /* intra-region offset */
-               unsigned long reg :  4;         /* region number */
-       } f;
-       unsigned long l;
-       void *p;
-};
-
-#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);          \
-                                               _v.f.reg = 0; _v.l; })
-#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);          \
-                               _v.f.reg = -1; _v.p; })
-
-#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
-                                               _v.rid; })
-#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
-                                               _v.ps; })
-#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);        \
-                                               _v.ve; })
-
-enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
-enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
-
-#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
-#define VMX(_v, _x)  ((_v)->arch._x)
-
-#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
-#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
-
-static inline unsigned long itir_ps(unsigned long itir)
-{
-       return ((itir >> 2) & 0x3f);
-}
-
-
-/**************************************************************************
-  VCPU control register access routines
- **************************************************************************/
-
-static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, itir));
-}
-
-static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, itir) = val;
-}
-
-static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, ifa));
-}
-
-static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, ifa) = val;
-}
-
-static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, iva));
-}
-
-static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, pta));
-}
-
-static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, lid));
-}
-
-static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, tpr));
-}
-
-static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
-{
-       return (0UL);           /*reads of eoi always return 0 */
-}
-
-static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, irr[0]));
-}
-
-static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, irr[1]));
-}
-
-static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, irr[2]));
-}
-
-static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, irr[3]));
-}
-
-static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
-{
-       ia64_setreg(_IA64_REG_CR_DCR, val);
-}
-
-static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, isr) = val;
-}
-
-static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, lid) = val;
-}
-
-static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, ipsr) = val;
-}
-
-static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, iip) = val;
-}
-
-static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, ifs) = val;
-}
-
-static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, iipa) = val;
-}
-
-static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, iha) = val;
-}
-
-
-static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
-{
-       return vcpu->arch.vrr[reg>>61];
-}
-
-/**************************************************************************
-  VCPU debug breakpoint register access routines
- **************************************************************************/
-
-static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-       __ia64_set_dbr(reg, val);
-}
-
-static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-       ia64_set_ibr(reg, val);
-}
-
-static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
-{
-       return ((u64)__ia64_get_dbr(reg));
-}
-
-static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
-{
-       return ((u64)ia64_get_ibr(reg));
-}
-
-/**************************************************************************
-  VCPU performance monitor register access routines
- **************************************************************************/
-static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-       /* NOTE: Writes to unimplemented PMC registers are discarded */
-       ia64_set_pmc(reg, val);
-}
-
-static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-       /* NOTE: Writes to unimplemented PMD registers are discarded */
-       ia64_set_pmd(reg, val);
-}
-
-static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
-{
-       /* NOTE: Reads from unimplemented PMC registers return zero */
-       return ((u64)ia64_get_pmc(reg));
-}
-
-static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
-{
-       /* NOTE: Reads from unimplemented PMD registers return zero */
-       return ((u64)ia64_get_pmd(reg));
-}
-
-static inline unsigned long vrrtomrr(unsigned long val)
-{
-       union ia64_rr rr;
-       rr.val = val;
-       rr.rid = (rr.rid << 4) | 0xe;
-       if (rr.ps > PAGE_SHIFT)
-               rr.ps = PAGE_SHIFT;
-       rr.ve = 1;
-       return rr.val;
-}
-
-
-static inline int highest_bits(int *dat)
-{
-       u32  bits, bitnum;
-       int i;
-
-       /* loop for all 256 bits */
-       for (i = 7; i >= 0 ; i--) {
-               bits = dat[i];
-               if (bits) {
-                       bitnum = fls(bits);
-                       return i * 32 + bitnum - 1;
-               }
-       }
-       return NULL_VECTOR;
-}
-
-/*
- * The pending irq is higher than the inservice one.
- *
- */
-static inline int is_higher_irq(int pending, int inservice)
-{
-       return ((pending > inservice)
-                       || ((pending != NULL_VECTOR)
-                               && (inservice == NULL_VECTOR)));
-}
-
-static inline int is_higher_class(int pending, int mic)
-{
-       return ((pending >> 4) > mic);
-}
-
-/*
- * Return 0-255 for pending irq.
- *        NULL_VECTOR: when no pending.
- */
-static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-       if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
-               return NMI_VECTOR;
-       if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
-               return ExtINT_VECTOR;
-
-       return highest_bits((int *)&VCPU(vcpu, irr[0]));
-}
-
-static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
-{
-       if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
-               return NMI_VECTOR;
-       if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
-               return ExtINT_VECTOR;
-
-       return highest_bits((int *)&(VMX(vcpu, insvc[0])));
-}
-
-extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-                                       struct ia64_fpreg *val);
-extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-                                       struct ia64_fpreg *val);
-extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
-extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
-                       u64 val, int nat);
-extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
-extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
-extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
-extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
-extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
-                                       u64 itir, u64 va, int type);
-extern struct thash_data *vhpt_lookup(u64 va);
-extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
-extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
-extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
-extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
-extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
-               u64 itir, u64 ifa, int type);
-extern void thash_purge_all(struct kvm_vcpu *v);
-extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
-                                               u64 va, int is_data);
-extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
-                                               u64 ps, int is_data);
-
-extern void vcpu_increment_iip(struct kvm_vcpu *v);
-extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
-extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
-extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
-extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
-extern void nested_dtlb(struct kvm_vcpu *vcpu);
-extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
-extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
-
-extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
-extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
-
-extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
-extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
-extern void vmm_transition(struct kvm_vcpu *vcpu);
-extern void vmm_trampoline(union context *from, union context *to);
-extern int vmm_entry(void);
-extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
-
-extern void vmm_reset_entry(void);
-void kvm_init_vtlb(struct kvm_vcpu *v);
-void kvm_init_vhpt(struct kvm_vcpu *v);
-void thash_init(struct thash_cb *hcb, u64 sz);
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
-u64 kvm_gpa_to_mpa(u64 gpa);
-extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
-               u64 arg4, u64 arg5, u64 arg6, u64 arg7);
-
-extern long vmm_sanity;
-
-#endif
-#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c

deleted file mode 100644 (file)

index 176a12c..0000000
--- a/arch/ia64/kvm/vmm.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * vmm.c: vmm module interface with kvm module
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpswa.h>
-
-#include "vcpu.h"
-
-MODULE_AUTHOR("Intel");
-MODULE_LICENSE("GPL");
-
-extern char kvm_ia64_ivt;
-extern char kvm_asm_mov_from_ar;
-extern char kvm_asm_mov_from_ar_sn2;
-extern fpswa_interface_t *vmm_fpswa_interface;
-
-long vmm_sanity = 1;
-
-struct kvm_vmm_info vmm_info = {
-       .module                 = THIS_MODULE,
-       .vmm_entry              = vmm_entry,
-       .tramp_entry            = vmm_trampoline,
-       .vmm_ivt                = (unsigned long)&kvm_ia64_ivt,
-       .patch_mov_ar           = (unsigned long)&kvm_asm_mov_from_ar,
-       .patch_mov_ar_sn2       = (unsigned long)&kvm_asm_mov_from_ar_sn2,
-};
-
-static int __init  kvm_vmm_init(void)
-{
-
-       vmm_fpswa_interface = fpswa_interface;
-
-       /*Register vmm data to kvm side*/
-       return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
-}
-
-static void __exit kvm_vmm_exit(void)
-{
-       kvm_exit();
-       return ;
-}
-
-void vmm_spin_lock(vmm_spinlock_t *lock)
-{
-       _vmm_raw_spin_lock(lock);
-}
-
-void vmm_spin_unlock(vmm_spinlock_t *lock)
-{
-       _vmm_raw_spin_unlock(lock);
-}
-
-static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       long psr;
-
-       local_irq_save(psr);
-       p->exit_reason = EXIT_REASON_DEBUG;
-       vmm_transition(vcpu);
-       local_irq_restore(psr);
-}
-
-asmlinkage int printk(const char *fmt, ...)
-{
-       struct kvm_vcpu *vcpu = current_vcpu;
-       va_list args;
-       int r;
-
-       memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
-       va_start(args, fmt);
-       r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
-       va_end(args);
-       vcpu_debug_exit(vcpu);
-       return r;
-}
-
-module_init(kvm_vmm_init)
-module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S

deleted file mode 100644 (file)

index 397e34a..0000000
--- a/arch/ia64/kvm/vmm_ivt.S
+++ /dev/null
@@ -1,1392 +0,0 @@
-/*
- * arch/ia64/kvm/vmm_ivt.S
- *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
- *      Stephane Eranian <eranian@hpl.hp.com>
- *      David Mosberger <davidm@hpl.hp.com>
- * Copyright (C) 2000, 2002-2003 Intel Co
- *      Asit Mallick <asit.k.mallick@intel.com>
- *      Suresh Siddha <suresh.b.siddha@intel.com>
- *      Kenneth Chen <kenneth.w.chen@intel.com>
- *      Fenghua Yu <fenghua.yu@intel.com>
- *
- *
- * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
- * for SMP
- * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
- * handler now uses virtual PT.
- *
- * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
- *              Supporting Intel virtualization architecture
- *
- */
-
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for
- * critical
- * interruptions like TLB misses.
- *
- *  For each entry, the comment is as follows:
- *
- *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
- *              (12,51)
- *  entry offset ----/     /         /                  /
- *  /
- *  entry number ---------/         /                  /
- *  /
- *  size of the entry -------------/                  /
- *  /
- *  vector name -------------------------------------/
- *  /
- *  interruptions triggering this vector
- *  ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB
- * boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/pgtable.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-#include "kvm_minstate.h"
-#include "vti.h"
-
-#if 0
-# define PSR_DEFAULT_BITS   psr.ac
-#else
-# define PSR_DEFAULT_BITS   0
-#endif
-
-#define KVM_FAULT(n)    \
-       kvm_fault_##n:;          \
-       mov r19=n;;          \
-       br.sptk.many kvm_vmm_panic;         \
-       ;;                  \
-
-#define KVM_REFLECT(n)    \
-       mov r31=pr;           \
-       mov r19=n;       /* prepare to save predicates */ \
-       mov r29=cr.ipsr;      \
-       ;;      \
-       tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
-(p7)   br.sptk.many kvm_dispatch_reflection;        \
-       br.sptk.many kvm_vmm_panic;      \
-
-GLOBAL_ENTRY(kvm_vmm_panic)
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,1,0
-       mov out0=r15
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i    // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       br.call.sptk.many b6=vmm_panic_handler;
-END(kvm_vmm_panic)
-
-    .section .text..ivt,"ax"
-
-    .align 32768    // align on 32KB boundary
-    .global kvm_ia64_ivt
-kvm_ia64_ivt:
-///////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(kvm_vhpt_miss)
-       KVM_FAULT(0)
-END(kvm_vhpt_miss)
-
-    .org kvm_ia64_ivt+0x400
-////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(kvm_itlb_miss)
-       mov r31 = pr
-       mov r29=cr.ipsr;
-       ;;
-       tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)   br.sptk kvm_alt_itlb_miss
-       mov r19 = 1
-       br.sptk kvm_itlb_miss_dispatch
-       KVM_FAULT(1);
-END(kvm_itlb_miss)
-
-    .org kvm_ia64_ivt+0x0800
-//////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(kvm_dtlb_miss)
-       mov r31 = pr
-       mov r29=cr.ipsr;
-       ;;
-       tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)   br.sptk kvm_alt_dtlb_miss
-       br.sptk kvm_dtlb_miss_dispatch
-END(kvm_dtlb_miss)
-
-     .org kvm_ia64_ivt+0x0c00
-////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(kvm_alt_itlb_miss)
-       mov r16=cr.ifa    // get address that caused the TLB miss
-       ;;
-       movl r17=PAGE_KERNEL
-       mov r24=cr.ipsr
-       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-       ;;
-       and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-       ;;
-       or r19=r17,r19      // insert PTE control bits into r19
-       ;;
-       movl r20=IA64_GRANULE_SHIFT<<2
-       ;;
-       mov cr.itir=r20
-       ;;
-       itc.i r19               // insert the TLB entry
-       mov pr=r31,-1
-       rfi
-END(kvm_alt_itlb_miss)
-
-    .org kvm_ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(kvm_alt_dtlb_miss)
-       mov r16=cr.ifa          // get address that caused the TLB miss
-       ;;
-       movl r17=PAGE_KERNEL
-       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-       mov r24=cr.ipsr
-       ;;
-       and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-       ;;
-       or r19=r19,r17  // insert PTE control bits into r19
-       ;;
-       movl r20=IA64_GRANULE_SHIFT<<2
-       ;;
-       mov cr.itir=r20
-       ;;
-       itc.d r19               // insert the TLB entry
-       mov pr=r31,-1
-       rfi
-END(kvm_alt_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1400
-//////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(kvm_nested_dtlb_miss)
-       KVM_FAULT(5)
-END(kvm_nested_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(kvm_ikey_miss)
-       KVM_REFLECT(6)
-END(kvm_ikey_miss)
-
-    .org kvm_ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(kvm_dkey_miss)
-       KVM_REFLECT(7)
-END(kvm_dkey_miss)
-
-    .org kvm_ia64_ivt+0x2000
-////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(kvm_dirty_bit)
-       KVM_REFLECT(8)
-END(kvm_dirty_bit)
-
-    .org kvm_ia64_ivt+0x2400
-////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(kvm_iaccess_bit)
-       KVM_REFLECT(9)
-END(kvm_iaccess_bit)
-
-    .org kvm_ia64_ivt+0x2800
-///////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(kvm_daccess_bit)
-       KVM_REFLECT(10)
-END(kvm_daccess_bit)
-
-    .org kvm_ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(kvm_break_fault)
-       mov r31=pr
-       mov r19=11
-       mov r29=cr.ipsr
-       ;;
-       KVM_SAVE_MIN_WITH_COVER_R19
-       ;;
-       alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
-       mov out0=cr.ifa
-       mov out2=cr.isr     // FIXME: pity to make this slow access twice
-       mov out3=cr.iim     // FIXME: pity to make this slow access twice
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i         // guarantee that interruption collection is on
-       ;;
-       (p15)ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       adds out1=16,sp
-       br.call.sptk.many b6=kvm_ia64_handle_break
-       ;;
-END(kvm_break_fault)
-
-    .org kvm_ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(kvm_interrupt)
-       mov r31=pr              // prepare to save predicates
-       mov r19=12
-       mov r29=cr.ipsr
-       ;;
-       tbit.z p6,p7=r29,IA64_PSR_VM_BIT
-       tbit.z p0,p15=r29,IA64_PSR_I_BIT
-       ;;
-(p7)   br.sptk kvm_dispatch_interrupt
-       ;;
-       mov r27=ar.rsc          /* M */
-       mov r20=r1                      /* A */
-       mov r25=ar.unat         /* M */
-       mov r26=ar.pfs          /* I */
-       mov r28=cr.iip          /* M */
-       cover                   /* B (or nothing) */
-       ;;
-       mov r1=sp
-       ;;
-       invala                  /* M */
-       mov r30=cr.ifs
-       ;;
-       addl r1=-VMM_PT_REGS_SIZE,r1
-       ;;
-       adds r17=2*L1_CACHE_BYTES,r1    /* really: biggest cache-line size */
-       adds r16=PT(CR_IPSR),r1
-       ;;
-       lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
-       st8 [r16]=r29                   /* save cr.ipsr */
-       ;;
-       lfetch.fault.excl.nt1 [r17]
-       mov r29=b0
-       ;;
-       adds r16=PT(R8),r1      /* initialize first base pointer */
-       adds r17=PT(R9),r1      /* initialize second base pointer */
-       mov r18=r0                      /* make sure r18 isn't NaT */
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r8,16
-.mem.offset 8,0; st8.spill [r17]=r9,16
-        ;;
-.mem.offset 0,0; st8.spill [r16]=r10,24
-.mem.offset 8,0; st8.spill [r17]=r11,24
-        ;;
-       st8 [r16]=r28,16                /* save cr.iip */
-       st8 [r17]=r30,16                /* save cr.ifs */
-       mov r8=ar.fpsr          /* M */
-       mov r9=ar.csd
-       mov r10=ar.ssd
-       movl r11=FPSR_DEFAULT   /* L-unit */
-       ;;
-       st8 [r16]=r25,16                /* save ar.unat */
-       st8 [r17]=r26,16                /* save ar.pfs */
-       shl r18=r18,16          /* compute ar.rsc to be used for "loadrs" */
-       ;;
-       st8 [r16]=r27,16                /* save ar.rsc */
-       adds r17=16,r17         /* skip over ar_rnat field */
-       ;;
-       st8 [r17]=r31,16                /* save predicates */
-       adds r16=16,r16         /* skip over ar_bspstore field */
-       ;;
-       st8 [r16]=r29,16                /* save b0 */
-       st8 [r17]=r18,16                /* save ar.rsc value for "loadrs" */
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
-.mem.offset 8,0; st8.spill [r17]=r12,16
-       adds r12=-16,r1
-       /* switch to kernel memory stack (with 16 bytes of scratch) */
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r13,16
-.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r15,16
-.mem.offset 8,0; st8.spill [r17]=r14,16
-       dep r14=-1,r0,60,4
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r2,16
-.mem.offset 8,0; st8.spill [r17]=r3,16
-       adds r2=VMM_PT_REGS_R16_OFFSET,r1
-       adds r14 = VMM_VCPU_GP_OFFSET,r13
-       ;;
-       mov r8=ar.ccv
-       ld8 r14 = [r14]
-       ;;
-       mov r1=r14       /* establish kernel global pointer */
-       ;;                                          \
-       bsw.1
-       ;;
-       alloc r14=ar.pfs,0,0,1,0        // must be first in an insn group
-       mov out0=r13
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i
-       ;;
-       //(p15) ssm psr.i
-       adds r3=8,r2            // set up second base pointer for SAVE_REST
-       srlz.i                  // ensure everybody knows psr.ic is back on
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r16,16
-.mem.offset 8,0; st8.spill [r3]=r17,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r18,16
-.mem.offset 8,0; st8.spill [r3]=r19,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r20,16
-.mem.offset 8,0; st8.spill [r3]=r21,16
-       mov r18=b6
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r22,16
-.mem.offset 8,0; st8.spill [r3]=r23,16
-       mov r19=b7
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r24,16
-.mem.offset 8,0; st8.spill [r3]=r25,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r26,16
-.mem.offset 8,0; st8.spill [r3]=r27,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r28,16
-.mem.offset 8,0; st8.spill [r3]=r29,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r30,16
-.mem.offset 8,0; st8.spill [r3]=r31,32
-       ;;
-       mov ar.fpsr=r11       /* M-unit */
-       st8 [r2]=r8,8         /* ar.ccv */
-       adds r24=PT(B6)-PT(F7),r3
-       ;;
-       stf.spill [r2]=f6,32
-       stf.spill [r3]=f7,32
-       ;;
-       stf.spill [r2]=f8,32
-       stf.spill [r3]=f9,32
-       ;;
-       stf.spill [r2]=f10
-       stf.spill [r3]=f11
-       adds r25=PT(B7)-PT(F11),r3
-       ;;
-       st8 [r24]=r18,16       /* b6 */
-       st8 [r25]=r19,16       /* b7 */
-       ;;
-       st8 [r24]=r9           /* ar.csd */
-       st8 [r25]=r10          /* ar.ssd */
-       ;;
-       srlz.d          // make sure we see the effect of cr.ivr
-       addl r14=@gprel(ia64_leave_nested),gp
-       ;;
-       mov rp=r14
-       br.call.sptk.many b6=kvm_ia64_handle_irq
-       ;;
-END(kvm_interrupt)
-
-    .global kvm_dispatch_vexirq
-    .org kvm_ia64_ivt+0x3400
-//////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
-ENTRY(kvm_virtual_exirq)
-       mov r31=pr
-       mov r19=13
-       mov r30 =r0
-       ;;
-kvm_dispatch_vexirq:
-       cmp.eq p6,p0 = 1,r30
-       ;;
-(p6)   add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
-       ;;
-(p6)   ld8 r1 = [r29]
-       ;;
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,1,0
-       mov out0=r13
-
-       ssm psr.ic
-       ;;
-       srlz.i // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       KVM_SAVE_REST
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       mov rp=r14
-       br.call.sptk.many b6=kvm_vexirq
-END(kvm_virtual_exirq)
-
-    .org kvm_ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
-       KVM_FAULT(14)
-       // this code segment is from 2.6.16.13
-
-    .org kvm_ia64_ivt+0x3c00
-///////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
-       KVM_FAULT(15)
-
-    .org kvm_ia64_ivt+0x4000
-///////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
-       KVM_FAULT(16)
-
-    .org kvm_ia64_ivt+0x4400
-//////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
-       KVM_FAULT(17)
-
-    .org kvm_ia64_ivt+0x4800
-//////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
-       KVM_FAULT(18)
-
-    .org kvm_ia64_ivt+0x4c00
-//////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
-       KVM_FAULT(19)
-
-    .org kvm_ia64_ivt+0x5000
-//////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present
-ENTRY(kvm_page_not_present)
-       KVM_REFLECT(20)
-END(kvm_page_not_present)
-
-    .org kvm_ia64_ivt+0x5100
-///////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
-ENTRY(kvm_key_permission)
-       KVM_REFLECT(21)
-END(kvm_key_permission)
-
-    .org kvm_ia64_ivt+0x5200
-//////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(kvm_iaccess_rights)
-       KVM_REFLECT(22)
-END(kvm_iaccess_rights)
-
-    .org kvm_ia64_ivt+0x5300
-//////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(kvm_daccess_rights)
-       KVM_REFLECT(23)
-END(kvm_daccess_rights)
-
-    .org kvm_ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(kvm_general_exception)
-       KVM_REFLECT(24)
-       KVM_FAULT(24)
-END(kvm_general_exception)
-
-    .org kvm_ia64_ivt+0x5500
-//////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(kvm_disabled_fp_reg)
-       KVM_REFLECT(25)
-END(kvm_disabled_fp_reg)
-
-    .org kvm_ia64_ivt+0x5600
-////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(kvm_nat_consumption)
-       KVM_REFLECT(26)
-END(kvm_nat_consumption)
-
-    .org kvm_ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(kvm_speculation_vector)
-       KVM_REFLECT(27)
-END(kvm_speculation_vector)
-
-    .org kvm_ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
-       KVM_FAULT(28)
-
-    .org kvm_ia64_ivt+0x5900
-///////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(kvm_debug_vector)
-       KVM_FAULT(29)
-END(kvm_debug_vector)
-
-    .org kvm_ia64_ivt+0x5a00
-///////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(kvm_unaligned_access)
-       KVM_REFLECT(30)
-END(kvm_unaligned_access)
-
-    .org kvm_ia64_ivt+0x5b00
-//////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(kvm_unsupported_data_reference)
-       KVM_REFLECT(31)
-END(kvm_unsupported_data_reference)
-
-    .org kvm_ia64_ivt+0x5c00
-////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
-ENTRY(kvm_floating_point_fault)
-       KVM_REFLECT(32)
-END(kvm_floating_point_fault)
-
-    .org kvm_ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(kvm_floating_point_trap)
-       KVM_REFLECT(33)
-END(kvm_floating_point_trap)
-
-    .org kvm_ia64_ivt+0x5e00
-//////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(kvm_lower_privilege_trap)
-       KVM_REFLECT(34)
-END(kvm_lower_privilege_trap)
-
-    .org kvm_ia64_ivt+0x5f00
-//////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(kvm_taken_branch_trap)
-       KVM_REFLECT(35)
-END(kvm_taken_branch_trap)
-
-    .org kvm_ia64_ivt+0x6000
-////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(kvm_single_step_trap)
-       KVM_REFLECT(36)
-END(kvm_single_step_trap)
-    .global kvm_virtualization_fault_back
-    .org kvm_ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
-ENTRY(kvm_virtualization_fault)
-       mov r31=pr
-       adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-       ;;
-       st8 [r16] = r1
-       adds r17 = VMM_VCPU_GP_OFFSET, r21
-       ;;
-       ld8 r1 = [r17]
-       cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-       cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-       cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-       cmp.eq p9,p0=EVENT_RSM,r24
-       cmp.eq p10,p0=EVENT_SSM,r24
-       cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-       cmp.eq p12,p0=EVENT_THASH,r24
-(p6)   br.dptk.many kvm_asm_mov_from_ar
-(p7)   br.dptk.many kvm_asm_mov_from_rr
-(p8)   br.dptk.many kvm_asm_mov_to_rr
-(p9)   br.dptk.many kvm_asm_rsm
-(p10)  br.dptk.many kvm_asm_ssm
-(p11)  br.dptk.many kvm_asm_mov_to_psr
-(p12)  br.dptk.many kvm_asm_thash
-       ;;
-kvm_virtualization_fault_back:
-       adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-       ;;
-       ld8 r1 = [r16]
-       ;;
-       mov r19=37
-       adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-       adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-       ;;
-       st8 [r16] = r24
-       st8 [r17] = r25
-       ;;
-       cmp.ne p6,p0=EVENT_RFI, r24
-(p6)   br.sptk kvm_dispatch_virtualization_fault
-       ;;
-       adds r18=VMM_VPD_BASE_OFFSET,r21
-       ;;
-       ld8 r18=[r18]
-       ;;
-       adds r18=VMM_VPD_VIFS_OFFSET,r18
-       ;;
-       ld8 r18=[r18]
-       ;;
-       tbit.z p6,p0=r18,63
-(p6)   br.sptk kvm_dispatch_virtualization_fault
-       ;;
-//if vifs.v=1 desert current register frame
-       alloc r18=ar.pfs,0,0,0,0
-       br.sptk kvm_dispatch_virtualization_fault
-END(kvm_virtualization_fault)
-
-    .org kvm_ia64_ivt+0x6200
-//////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
-       KVM_FAULT(38)
-
-    .org kvm_ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
-       KVM_FAULT(39)
-
-    .org kvm_ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
-       KVM_FAULT(40)
-
-    .org kvm_ia64_ivt+0x6500
-//////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
-       KVM_FAULT(41)
-
-    .org kvm_ia64_ivt+0x6600
-//////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
-       KVM_FAULT(42)
-
-    .org kvm_ia64_ivt+0x6700
-//////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
-       KVM_FAULT(43)
-
-    .org kvm_ia64_ivt+0x6800
-//////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
-       KVM_FAULT(44)
-
-    .org kvm_ia64_ivt+0x6900
-///////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
-//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(kvm_ia32_exception)
-       KVM_FAULT(45)
-END(kvm_ia32_exception)
-
-    .org kvm_ia64_ivt+0x6a00
-////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
-ENTRY(kvm_ia32_intercept)
-       KVM_FAULT(47)
-END(kvm_ia32_intercept)
-
-    .org kvm_ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
-       KVM_FAULT(48)
-
-    .org kvm_ia64_ivt+0x6d00
-//////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
-       KVM_FAULT(49)
-
-    .org kvm_ia64_ivt+0x6e00
-//////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
-       KVM_FAULT(50)
-
-    .org kvm_ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
-       KVM_FAULT(52)
-
-    .org kvm_ia64_ivt+0x7100
-////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
-       KVM_FAULT(53)
-
-    .org kvm_ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
-       KVM_FAULT(54)
-
-    .org kvm_ia64_ivt+0x7300
-////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
-       KVM_FAULT(55)
-
-    .org kvm_ia64_ivt+0x7400
-////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
-       KVM_FAULT(56)
-
-    .org kvm_ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
-       KVM_FAULT(57)
-
-    .org kvm_ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
-       KVM_FAULT(58)
-
-    .org kvm_ia64_ivt+0x7700
-////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
-       KVM_FAULT(59)
-
-    .org kvm_ia64_ivt+0x7800
-////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
-       KVM_FAULT(60)
-
-    .org kvm_ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
-       KVM_FAULT(61)
-
-    .org kvm_ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
-       KVM_FAULT(62)
-
-    .org kvm_ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
-       KVM_FAULT(63)
-
-    .org kvm_ia64_ivt+0x7c00
-////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
-       KVM_FAULT(64)
-
-    .org kvm_ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
-       KVM_FAULT(65)
-
-    .org kvm_ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
-       KVM_FAULT(66)
-
-    .org kvm_ia64_ivt+0x7f00
-////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
-       KVM_FAULT(67)
-
-    .org kvm_ia64_ivt+0x8000
-// There is no particular reason for this code to be here, other than that
-// there happens to be space here that would go unused otherwise.  If this
-// fault ever gets "unreserved", simply moved the following code to a more
-// suitable spot...
-
-
-ENTRY(kvm_dtlb_miss_dispatch)
-       mov r19 = 2
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,3,0
-       mov out0=cr.ifa
-       mov out1=r15
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i     // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-       ;;
-       KVM_SAVE_REST
-       KVM_SAVE_EXTRA
-       mov rp=r14
-       ;;
-       adds out2=16,r12
-       br.call.sptk.many b6=kvm_page_fault
-END(kvm_dtlb_miss_dispatch)
-
-ENTRY(kvm_itlb_miss_dispatch)
-
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,3,0
-       mov out0=cr.ifa
-       mov out1=r15
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i   // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       adds out2=16,r12
-       br.call.sptk.many b6=kvm_page_fault
-END(kvm_itlb_miss_dispatch)
-
-ENTRY(kvm_dispatch_reflection)
-/*
- * Input:
- *  psr.ic: off
- *  r19:    intr type (offset into ivt, see ia64_int.h)
- *  r31:    contains saved predicates (pr)
- */
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,5,0
-       mov out0=cr.ifa
-       mov out1=cr.isr
-       mov out2=cr.iim
-       mov out3=r15
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i   // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       adds out4=16,r12
-       br.call.sptk.many b6=reflect_interruption
-END(kvm_dispatch_reflection)
-
-ENTRY(kvm_dispatch_virtualization_fault)
-       adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-       adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-       ;;
-       st8 [r16] = r24
-       st8 [r17] = r25
-       ;;
-       KVM_SAVE_MIN_WITH_COVER_R19
-       ;;
-       alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
-       mov out0=r13        //vcpu
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i    // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-       ;;
-       KVM_SAVE_REST
-       KVM_SAVE_EXTRA
-       mov rp=r14
-       ;;
-       adds out1=16,sp         //regs
-       br.call.sptk.many b6=kvm_emulate
-END(kvm_dispatch_virtualization_fault)
-
-
-ENTRY(kvm_dispatch_interrupt)
-       KVM_SAVE_MIN_WITH_COVER_R19     // uses r31; defines r2 and r3
-       ;;
-       alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-       adds r3=8,r2            // set up second base pointer for SAVE_REST
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i
-       ;;
-       (p15) ssm psr.i
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       mov out0=r13            // pass pointer to pt_regs as second arg
-       br.call.sptk.many b6=kvm_ia64_handle_irq
-END(kvm_dispatch_interrupt)
-
-GLOBAL_ENTRY(ia64_leave_nested)
-       rsm psr.i
-       ;;
-       adds r21=PT(PR)+16,r12
-       ;;
-       lfetch [r21],PT(CR_IPSR)-PT(PR)
-       adds r2=PT(B6)+16,r12
-       adds r3=PT(R16)+16,r12
-       ;;
-       lfetch [r21]
-       ld8 r28=[r2],8          // load b6
-       adds r29=PT(R24)+16,r12
-
-       ld8.fill r16=[r3]
-       adds r3=PT(AR_CSD)-PT(R16),r3
-       adds r30=PT(AR_CCV)+16,r12
-       ;;
-       ld8.fill r24=[r29]
-       ld8 r15=[r30]           // load ar.ccv
-       ;;
-       ld8 r29=[r2],16         // load b7
-       ld8 r30=[r3],16         // load ar.csd
-       ;;
-       ld8 r31=[r2],16         // load ar.ssd
-       ld8.fill r8=[r3],16
-       ;;
-       ld8.fill r9=[r2],16
-       ld8.fill r10=[r3],PT(R17)-PT(R10)
-       ;;
-       ld8.fill r11=[r2],PT(R18)-PT(R11)
-       ld8.fill r17=[r3],16
-       ;;
-       ld8.fill r18=[r2],16
-       ld8.fill r19=[r3],16
-       ;;
-       ld8.fill r20=[r2],16
-       ld8.fill r21=[r3],16
-       mov ar.csd=r30
-       mov ar.ssd=r31
-       ;;
-       rsm psr.i | psr.ic
-       // initiate turning off of interrupt and interruption collection
-       invala                  // invalidate ALAT
-       ;;
-       srlz.i
-       ;;
-       ld8.fill r22=[r2],24
-       ld8.fill r23=[r3],24
-       mov b6=r28
-       ;;
-       ld8.fill r25=[r2],16
-       ld8.fill r26=[r3],16
-       mov b7=r29
-       ;;
-       ld8.fill r27=[r2],16
-       ld8.fill r28=[r3],16
-       ;;
-       ld8.fill r29=[r2],16
-       ld8.fill r30=[r3],24
-       ;;
-       ld8.fill r31=[r2],PT(F9)-PT(R31)
-       adds r3=PT(F10)-PT(F6),r3
-       ;;
-       ldf.fill f9=[r2],PT(F6)-PT(F9)
-       ldf.fill f10=[r3],PT(F8)-PT(F10)
-       ;;
-       ldf.fill f6=[r2],PT(F7)-PT(F6)
-       ;;
-       ldf.fill f7=[r2],PT(F11)-PT(F7)
-       ldf.fill f8=[r3],32
-       ;;
-       srlz.i                  // ensure interruption collection is off
-       mov ar.ccv=r15
-       ;;
-       bsw.0   // switch back to bank 0 (no stop bit required beforehand...)
-       ;;
-       ldf.fill f11=[r2]
-//     mov r18=r13
-//     mov r21=r13
-       adds r16=PT(CR_IPSR)+16,r12
-       adds r17=PT(CR_IIP)+16,r12
-       ;;
-       ld8 r29=[r16],16        // load cr.ipsr
-       ld8 r28=[r17],16        // load cr.iip
-       ;;
-       ld8 r30=[r16],16        // load cr.ifs
-       ld8 r25=[r17],16        // load ar.unat
-       ;;
-       ld8 r26=[r16],16        // load ar.pfs
-       ld8 r27=[r17],16        // load ar.rsc
-       cmp.eq p9,p0=r0,r0
-       // set p9 to indicate that we should restore cr.ifs
-       ;;
-       ld8 r24=[r16],16        // load ar.rnat (may be garbage)
-       ld8 r23=[r17],16// load ar.bspstore (may be garbage)
-       ;;
-       ld8 r31=[r16],16        // load predicates
-       ld8 r22=[r17],16        // load b0
-       ;;
-       ld8 r19=[r16],16        // load ar.rsc value for "loadrs"
-       ld8.fill r1=[r17],16    // load r1
-       ;;
-       ld8.fill r12=[r16],16
-       ld8.fill r13=[r17],16
-       ;;
-       ld8 r20=[r16],16        // ar.fpsr
-       ld8.fill r15=[r17],16
-       ;;
-       ld8.fill r14=[r16],16
-       ld8.fill r2=[r17]
-       ;;
-       ld8.fill r3=[r16]
-       ;;
-       mov r16=ar.bsp          // get existing backing store pointer
-       ;;
-       mov b0=r22
-       mov ar.pfs=r26
-       mov cr.ifs=r30
-       mov cr.ipsr=r29
-       mov ar.fpsr=r20
-       mov cr.iip=r28
-       ;;
-       mov ar.rsc=r27
-       mov ar.unat=r25
-       mov pr=r31,-1
-       rfi
-END(ia64_leave_nested)
-
-GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-/*
- * work.need_resched etc. mustn't get changed
- *by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on:
- */
-       adds r2 = PT(R4)+16,r12
-       adds r3 = PT(R5)+16,r12
-       adds r8 = PT(EML_UNAT)+16,r12
-       ;;
-       ld8 r8 = [r8]
-       ;;
-       mov ar.unat=r8
-       ;;
-       ld8.fill r4=[r2],16    //load r4
-       ld8.fill r5=[r3],16    //load r5
-       ;;
-       ld8.fill r6=[r2]    //load r6
-       ld8.fill r7=[r3]    //load r7
-       ;;
-END(ia64_leave_hypervisor_prepare)
-//fall through
-GLOBAL_ENTRY(ia64_leave_hypervisor)
-       rsm psr.i
-       ;;
-       br.call.sptk.many b0=leave_hypervisor_tail
-       ;;
-       adds r20=PT(PR)+16,r12
-       adds r8=PT(EML_UNAT)+16,r12
-       ;;
-       ld8 r8=[r8]
-       ;;
-       mov ar.unat=r8
-       ;;
-       lfetch [r20],PT(CR_IPSR)-PT(PR)
-       adds r2 = PT(B6)+16,r12
-       adds r3 = PT(B7)+16,r12
-       ;;
-       lfetch [r20]
-       ;;
-       ld8 r24=[r2],16        /* B6 */
-       ld8 r25=[r3],16        /* B7 */
-       ;;
-       ld8 r26=[r2],16        /* ar_csd */
-       ld8 r27=[r3],16        /* ar_ssd */
-       mov b6 = r24
-       ;;
-       ld8.fill r8=[r2],16
-       ld8.fill r9=[r3],16
-       mov b7 = r25
-       ;;
-       mov ar.csd = r26
-       mov ar.ssd = r27
-       ;;
-       ld8.fill r10=[r2],PT(R15)-PT(R10)
-       ld8.fill r11=[r3],PT(R14)-PT(R11)
-       ;;
-       ld8.fill r15=[r2],PT(R16)-PT(R15)
-       ld8.fill r14=[r3],PT(R17)-PT(R14)
-       ;;
-       ld8.fill r16=[r2],16
-       ld8.fill r17=[r3],16
-       ;;
-       ld8.fill r18=[r2],16
-       ld8.fill r19=[r3],16
-       ;;
-       ld8.fill r20=[r2],16
-       ld8.fill r21=[r3],16
-       ;;
-       ld8.fill r22=[r2],16
-       ld8.fill r23=[r3],16
-       ;;
-       ld8.fill r24=[r2],16
-       ld8.fill r25=[r3],16
-       ;;
-       ld8.fill r26=[r2],16
-       ld8.fill r27=[r3],16
-       ;;
-       ld8.fill r28=[r2],16
-       ld8.fill r29=[r3],16
-       ;;
-       ld8.fill r30=[r2],PT(F6)-PT(R30)
-       ld8.fill r31=[r3],PT(F7)-PT(R31)
-       ;;
-       rsm psr.i | psr.ic
-       // initiate turning off of interrupt and interruption collection
-       invala          // invalidate ALAT
-       ;;
-       srlz.i          // ensure interruption collection is off
-       ;;
-       bsw.0
-       ;;
-       adds r16 = PT(CR_IPSR)+16,r12
-       adds r17 = PT(CR_IIP)+16,r12
-       mov r21=r13             // get current
-       ;;
-       ld8 r31=[r16],16    // load cr.ipsr
-       ld8 r30=[r17],16    // load cr.iip
-       ;;
-       ld8 r29=[r16],16    // load cr.ifs
-       ld8 r28=[r17],16    // load ar.unat
-       ;;
-       ld8 r27=[r16],16    // load ar.pfs
-       ld8 r26=[r17],16    // load ar.rsc
-       ;;
-       ld8 r25=[r16],16    // load ar.rnat
-       ld8 r24=[r17],16    // load ar.bspstore
-       ;;
-       ld8 r23=[r16],16    // load predicates
-       ld8 r22=[r17],16    // load b0
-       ;;
-       ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
-       ld8.fill r1=[r17],16    //load r1
-       ;;
-       ld8.fill r12=[r16],16    //load r12
-       ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
-       ;;
-       ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
-       ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
-       ;;
-       ld8.fill r3=[r16]       //load r3
-       ld8 r18=[r17]   //load ar_ccv
-       ;;
-       mov ar.fpsr=r19
-       mov ar.ccv=r18
-       shr.u r18=r20,16
-       ;;
-kvm_rbs_switch:
-       mov r19=96
-
-kvm_dont_preserve_current_frame:
-/*
-    * To prevent leaking bits between the hypervisor and guest domain,
-    * we must clear the stacked registers in the "invalid" partition here.
-    * 5 registers/cycle on McKinley).
-    */
-#   define pRecurse    p6
-#   define pReturn     p7
-#   define Nregs       14
-
-       alloc loc0=ar.pfs,2,Nregs-2,2,0
-       shr.u loc1=r18,9        // RNaTslots <= floor(dirtySize / (64*8))
-       sub r19=r19,r18         // r19 = (physStackedSize + 8) - dirtySize
-       ;;
-       mov ar.rsc=r20          // load ar.rsc to be used for "loadrs"
-       shladd in0=loc1,3,r19
-       mov in1=0
-       ;;
-       TEXT_ALIGN(32)
-kvm_rse_clear_invalid:
-       alloc loc0=ar.pfs,2,Nregs-2,2,0
-       cmp.lt pRecurse,p0=Nregs*8,in0
-       // if more than Nregs regs left to clear, (re)curse
-       add out0=-Nregs*8,in0
-       add out1=1,in1          // increment recursion count
-       mov loc1=0
-       mov loc2=0
-       ;;
-       mov loc3=0
-       mov loc4=0
-       mov loc5=0
-       mov loc6=0
-       mov loc7=0
-(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
-       ;;
-       mov loc8=0
-       mov loc9=0
-       cmp.ne pReturn,p0=r0,in1
-       // if recursion count != 0, we need to do a br.ret
-       mov loc10=0
-       mov loc11=0
-(pReturn) br.ret.dptk.many b0
-
-#      undef pRecurse
-#      undef pReturn
-
-// loadrs has already been shifted
-       alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-       ;;
-       loadrs
-       ;;
-       mov ar.bspstore=r24
-       ;;
-       mov ar.unat=r28
-       mov ar.rnat=r25
-       mov ar.rsc=r26
-       ;;
-       mov cr.ipsr=r31
-       mov cr.iip=r30
-       mov cr.ifs=r29
-       mov ar.pfs=r27
-       adds r18=VMM_VPD_BASE_OFFSET,r21
-       ;;
-       ld8 r18=[r18]   //vpd
-       adds r17=VMM_VCPU_ISR_OFFSET,r21
-       ;;
-       ld8 r17=[r17]
-       adds r19=VMM_VPD_VPSR_OFFSET,r18
-       ;;
-       ld8 r19=[r19]        //vpsr
-       mov r25=r18
-       adds r16= VMM_VCPU_GP_OFFSET,r21
-       ;;
-       ld8 r16= [r16] // Put gp in r24
-       movl r24=@gprel(ia64_vmm_entry)  // calculate return address
-       ;;
-       add  r24=r24,r16
-       ;;
-       br.sptk.many  kvm_vps_sync_write       // call the service
-       ;;
-END(ia64_leave_hypervisor)
-// fall through
-GLOBAL_ENTRY(ia64_vmm_entry)
-/*
- *  must be at bank 0
- *  parameter:
- *  r17:cr.isr
- *  r18:vpd
- *  r19:vpsr
- *  r22:b0
- *  r23:predicate
- */
-       mov r24=r22
-       mov r25=r18
-       tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
-(p1)   br.cond.sptk.few kvm_vps_resume_normal
-(p2)   br.cond.sptk.many kvm_vps_resume_handler
-       ;;
-END(ia64_vmm_entry)
-
-/*
- * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
- *                  u64 arg3, u64 arg4, u64 arg5,
- *                  u64 arg6, u64 arg7);
- *
- * XXX: The currently defined services use only 4 args at the max. The
- *  rest are not consumed.
- */
-GLOBAL_ENTRY(ia64_call_vsa)
-    .regstk 4,4,0,0
-
-rpsave  =   loc0
-pfssave =   loc1
-psrsave =   loc2
-entry   =   loc3
-hostret =   r24
-
-       alloc   pfssave=ar.pfs,4,4,0,0
-       mov rpsave=rp
-       adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
-       ;;
-       ld8 entry=[entry]
-1:     mov hostret=ip
-       mov r25=in1         // copy arguments
-       mov r26=in2
-       mov r27=in3
-       mov psrsave=psr
-       ;;
-       tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
-       tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
-       ;;
-       add hostret=2f-1b,hostret   // calculate return address
-       add entry=entry,in0
-       ;;
-       rsm psr.i | psr.ic
-       ;;
-       srlz.i
-       mov b6=entry
-       br.cond.sptk b6         // call the service
-2:
-// Architectural sequence for enabling interrupts if necessary
-(p7)    ssm psr.ic
-       ;;
-(p7)    srlz.i
-       ;;
-(p6)    ssm psr.i
-       ;;
-       mov rp=rpsave
-       mov ar.pfs=pfssave
-       mov r8=r31
-       ;;
-       srlz.d
-       br.ret.sptk rp
-
-END(ia64_call_vsa)
-
-#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
-
-GLOBAL_ENTRY(vmm_reset_entry)
-       //set up ipsr, iip, vpd.vpsr, dcr
-       // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
-       // For DCR: all bits 0
-       bsw.0
-       ;;
-       mov r21 =r13
-       adds r14=-VMM_PT_REGS_SIZE, r12
-       ;;
-       movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
-       movl r10=0x8000000000000000
-       adds r16=PT(CR_IIP), r14
-       adds r20=PT(R1), r14
-       ;;
-       rsm psr.ic | psr.i
-       ;;
-       srlz.i
-       ;;
-       mov ar.rsc = 0
-       ;;
-       flushrs
-       ;;
-       mov ar.bspstore = 0
-       // clear BSPSTORE
-       ;;
-       mov cr.ipsr=r6
-       mov cr.ifs=r10
-       ld8 r4 = [r16] // Set init iip for first run.
-       ld8 r1 = [r20]
-       ;;
-       mov cr.iip=r4
-       adds r16=VMM_VPD_BASE_OFFSET,r13
-       ;;
-       ld8 r18=[r16]
-       ;;
-       adds r19=VMM_VPD_VPSR_OFFSET,r18
-       ;;
-       ld8 r19=[r19]
-       mov r17=r0
-       mov r22=r0
-       mov r23=r0
-       br.cond.sptk ia64_vmm_entry
-       br.ret.sptk  b0
-END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h

deleted file mode 100644 (file)

index b214b5b..0000000
--- a/arch/ia64/kvm/vti.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * vti.h: prototype for generial vt related interface
- *     Copyright (c) 2004, Intel Corporation.
- *
- *     Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- *     Fred Yang (fred.yang@intel.com)
- *     Kun Tian (Kevin Tian) (kevin.tian@intel.com)
- *
- *     Copyright (c) 2007, Intel Corporation.
- *     Zhang xiantao <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#ifndef _KVM_VT_I_H
-#define _KVM_VT_I_H
-
-#ifndef __ASSEMBLY__
-#include <asm/page.h>
-
-#include <linux/kvm_host.h>
-
-/* define itr.i and itr.d  in ia64_itr function */
-#define        ITR     0x01
-#define        DTR     0x02
-#define        IaDTR   0x03
-
-#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
-#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
-
-#define RR6 (6UL<<61)
-#define RR7 (7UL<<61)
-
-
-/* config_options in pal_vp_init_env */
-#define        VP_INITIALIZE   1UL
-#define        VP_FR_PMC       1UL<<1
-#define        VP_OPCODE       1UL<<8
-#define        VP_CAUSE        1UL<<9
-#define VP_FW_ACC      1UL<<63
-
-/* init vp env with initializing vm_buffer */
-#define        VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
-       VP_OPCODE | VP_CAUSE | VP_FW_ACC)
-/* init vp env without initializing vm_buffer */
-#define        VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
-
-#define                PAL_VP_CREATE   265
-/* Stacked Virt. Initializes a new VPD for the operation of
- * a new virtual processor in the virtual environment.
- */
-#define                PAL_VP_ENV_INFO 266
-/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
-#define                PAL_VP_EXIT_ENV 267
-/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
-#define                PAL_VP_INIT_ENV 268
-/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
-#define                PAL_VP_REGISTER 269
-/*Stacked Virt. Register a different host IVT for the virtual processor.*/
-#define                PAL_VP_RESUME   270
-/* Renamed from PAL_VP_RESUME */
-#define                PAL_VP_RESTORE  270
-/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
-#define                PAL_VP_SUSPEND  271
-/* Renamed from PAL_VP_SUSPEND */
-#define                PAL_VP_SAVE     271
-/* Stacked Virt. Suspends operation for the specified virtual processor on
- * the logical processor.
- */
-#define                PAL_VP_TERMINATE 272
-/* Stacked Virt. Terminates operation for the specified virtual processor.*/
-
-union vac {
-       unsigned long value;
-       struct {
-               unsigned int a_int:1;
-               unsigned int a_from_int_cr:1;
-               unsigned int a_to_int_cr:1;
-               unsigned int a_from_psr:1;
-               unsigned int a_from_cpuid:1;
-               unsigned int a_cover:1;
-               unsigned int a_bsw:1;
-               long reserved:57;
-       };
-};
-
-union vdc {
-       unsigned long value;
-       struct {
-               unsigned int d_vmsw:1;
-               unsigned int d_extint:1;
-               unsigned int d_ibr_dbr:1;
-               unsigned int d_pmc:1;
-               unsigned int d_to_pmd:1;
-               unsigned int d_itm:1;
-               long reserved:58;
-       };
-};
-
-struct vpd {
-       union vac   vac;
-       union vdc   vdc;
-       unsigned long  virt_env_vaddr;
-       unsigned long  reserved1[29];
-       unsigned long  vhpi;
-       unsigned long  reserved2[95];
-       unsigned long  vgr[16];
-       unsigned long  vbgr[16];
-       unsigned long  vnat;
-       unsigned long  vbnat;
-       unsigned long  vcpuid[5];
-       unsigned long  reserved3[11];
-       unsigned long  vpsr;
-       unsigned long  vpr;
-       unsigned long  reserved4[76];
-       union {
-               unsigned long  vcr[128];
-               struct {
-                       unsigned long dcr;
-                       unsigned long itm;
-                       unsigned long iva;
-                       unsigned long rsv1[5];
-                       unsigned long pta;
-                       unsigned long rsv2[7];
-                       unsigned long ipsr;
-                       unsigned long isr;
-                       unsigned long rsv3;
-                       unsigned long iip;
-                       unsigned long ifa;
-                       unsigned long itir;
-                       unsigned long iipa;
-                       unsigned long ifs;
-                       unsigned long iim;
-                       unsigned long iha;
-                       unsigned long rsv4[38];
-                       unsigned long lid;
-                       unsigned long ivr;
-                       unsigned long tpr;
-                       unsigned long eoi;
-                       unsigned long irr[4];
-                       unsigned long itv;
-                       unsigned long pmv;
-                       unsigned long cmcv;
-                       unsigned long rsv5[5];
-                       unsigned long lrr0;
-                       unsigned long lrr1;
-                       unsigned long rsv6[46];
-               };
-       };
-       unsigned long  reserved5[128];
-       unsigned long  reserved6[3456];
-       unsigned long  vmm_avail[128];
-       unsigned long  reserved7[4096];
-};
-
-#define PAL_PROC_VM_BIT                (1UL << 40)
-#define PAL_PROC_VMSW_BIT      (1UL << 54)
-
-static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
-               u64 *vp_env_info)
-{
-       struct ia64_pal_retval iprv;
-       PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
-       *buffer_size = iprv.v0;
-       *vp_env_info = iprv.v1;
-       return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_exit_env(u64 iva)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
-       return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
-                       u64 vbase_addr, u64 *vsa_base)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
-                       vbase_addr);
-       *vsa_base = iprv.v0;
-
-       return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
-
-       return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
-
-       return iprv.status;
-}
-
-#endif
-
-/*VPD field offset*/
-#define VPD_VAC_START_OFFSET           0
-#define VPD_VDC_START_OFFSET           8
-#define VPD_VHPI_START_OFFSET          256
-#define VPD_VGR_START_OFFSET           1024
-#define VPD_VBGR_START_OFFSET          1152
-#define VPD_VNAT_START_OFFSET          1280
-#define VPD_VBNAT_START_OFFSET         1288
-#define VPD_VCPUID_START_OFFSET                1296
-#define VPD_VPSR_START_OFFSET          1424
-#define VPD_VPR_START_OFFSET           1432
-#define VPD_VRSE_CFLE_START_OFFSET     1440
-#define VPD_VCR_START_OFFSET           2048
-#define VPD_VTPR_START_OFFSET          2576
-#define VPD_VRR_START_OFFSET           3072
-#define VPD_VMM_VAIL_START_OFFSET      31744
-
-/*Virtualization faults*/
-
-#define EVENT_MOV_TO_AR                         1
-#define EVENT_MOV_TO_AR_IMM             2
-#define EVENT_MOV_FROM_AR               3
-#define EVENT_MOV_TO_CR                         4
-#define EVENT_MOV_FROM_CR               5
-#define EVENT_MOV_TO_PSR                6
-#define EVENT_MOV_FROM_PSR              7
-#define EVENT_ITC_D                     8
-#define EVENT_ITC_I                     9
-#define EVENT_MOV_TO_RR                         10
-#define EVENT_MOV_TO_DBR                11
-#define EVENT_MOV_TO_IBR                12
-#define EVENT_MOV_TO_PKR                13
-#define EVENT_MOV_TO_PMC                14
-#define EVENT_MOV_TO_PMD                15
-#define EVENT_ITR_D                     16
-#define EVENT_ITR_I                     17
-#define EVENT_MOV_FROM_RR               18
-#define EVENT_MOV_FROM_DBR              19
-#define EVENT_MOV_FROM_IBR              20
-#define EVENT_MOV_FROM_PKR              21
-#define EVENT_MOV_FROM_PMC              22
-#define EVENT_MOV_FROM_CPUID            23
-#define EVENT_SSM                       24
-#define EVENT_RSM                       25
-#define EVENT_PTC_L                     26
-#define EVENT_PTC_G                     27
-#define EVENT_PTC_GA                    28
-#define EVENT_PTR_D                     29
-#define EVENT_PTR_I                     30
-#define EVENT_THASH                     31
-#define EVENT_TTAG                      32
-#define EVENT_TPA                       33
-#define EVENT_TAK                       34
-#define EVENT_PTC_E                     35
-#define EVENT_COVER                     36
-#define EVENT_RFI                       37
-#define EVENT_BSW_0                     38
-#define EVENT_BSW_1                     39
-#define EVENT_VMSW                      40
-
-/**PAL virtual services offsets */
-#define PAL_VPS_RESUME_NORMAL           0x0000
-#define PAL_VPS_RESUME_HANDLER          0x0400
-#define PAL_VPS_SYNC_READ               0x0800
-#define PAL_VPS_SYNC_WRITE              0x0c00
-#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
-#define PAL_VPS_THASH                   0x1400
-#define PAL_VPS_TTAG                    0x1800
-#define PAL_VPS_RESTORE                 0x1c00
-#define PAL_VPS_SAVE                    0x2000
-
-#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c

deleted file mode 100644 (file)

index a7869f8..0000000
--- a/arch/ia64/kvm/vtlb.c
+++ /dev/null
@@ -1,640 +0,0 @@
-/*
- * vtlb.c: guest virtual tlb handling module.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *
- * Copyright (c) 2007, Intel Corporation.
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include "vcpu.h"
-
-#include <linux/rwsem.h>
-
-#include <asm/tlb.h>
-
-/*
- * Check to see if the address rid:va is translated by the TLB
- */
-
-static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
-{
-       return ((trp->p) && (trp->rid == rid)
-                               && ((va-trp->vadr) < PSIZE(trp->ps)));
-}
-
-/*
- * Only for GUEST TR format.
- */
-static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
-{
-       u64 sa1, ea1;
-
-       if (!trp->p || trp->rid != rid)
-               return 0;
-
-       sa1 = trp->vadr;
-       ea1 = sa1 + PSIZE(trp->ps) - 1;
-       eva -= 1;
-       if ((sva > ea1) || (sa1 > eva))
-               return 0;
-       else
-               return 1;
-
-}
-
-void machine_tlb_purge(u64 va, u64 ps)
-{
-       ia64_ptcl(va, ps << 2);
-}
-
-void local_flush_tlb_all(void)
-{
-       int i, j;
-       unsigned long flags, count0, count1;
-       unsigned long stride0, stride1, addr;
-
-       addr    = current_vcpu->arch.ptce_base;
-       count0  = current_vcpu->arch.ptce_count[0];
-       count1  = current_vcpu->arch.ptce_count[1];
-       stride0 = current_vcpu->arch.ptce_stride[0];
-       stride1 = current_vcpu->arch.ptce_stride[1];
-
-       local_irq_save(flags);
-       for (i = 0; i < count0; ++i) {
-               for (j = 0; j < count1; ++j) {
-                       ia64_ptce(addr);
-                       addr += stride1;
-               }
-               addr += stride0;
-       }
-       local_irq_restore(flags);
-       ia64_srlz_i();          /* srlz.i implies srlz.d */
-}
-
-int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
-{
-       union ia64_rr    vrr;
-       union ia64_pta   vpta;
-       struct  ia64_psr   vpsr;
-
-       vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-       vrr.val = vcpu_get_rr(vcpu, vadr);
-       vpta.val = vcpu_get_pta(vcpu);
-
-       if (vrr.ve & vpta.ve) {
-               switch (ref) {
-               case DATA_REF:
-               case NA_REF:
-                       return vpsr.dt;
-               case INST_REF:
-                       return vpsr.dt && vpsr.it && vpsr.ic;
-               case RSE_REF:
-                       return vpsr.dt && vpsr.rt;
-
-               }
-       }
-       return 0;
-}
-
-struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
-{
-       u64 index, pfn, rid, pfn_bits;
-
-       pfn_bits = vpta.size - 5 - 8;
-       pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
-       rid = _REGION_ID(vrr);
-       index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
-       *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
-
-       return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
-                               (index << 5));
-}
-
-struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
-{
-
-       struct thash_data *trp;
-       int  i;
-       u64 rid;
-
-       rid = vcpu_get_rr(vcpu, va);
-       rid = rid & RR_RID_MASK;
-       if (type == D_TLB) {
-               if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-                       for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-                                               i < NDTRS; i++, trp++) {
-                               if (__is_tr_translated(trp, rid, va))
-                                       return trp;
-                       }
-               }
-       } else {
-               if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-                       for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-                                       i < NITRS; i++, trp++) {
-                               if (__is_tr_translated(trp, rid, va))
-                                       return trp;
-                       }
-               }
-       }
-
-       return NULL;
-}
-
-static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
-{
-       union ia64_rr rr;
-       struct thash_data *head;
-       unsigned long ps, gpaddr;
-
-       ps = itir_ps(itir);
-       rr.val = ia64_get_rr(ifa);
-
-        gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
-                                       (ifa & ((1UL << ps) - 1));
-
-       head = (struct thash_data *)ia64_thash(ifa);
-       head->etag = INVALID_TI_TAG;
-       ia64_mf();
-       head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
-       head->itir = rr.ps << 2;
-       head->etag = ia64_ttag(ifa);
-       head->gpaddr = gpaddr;
-}
-
-void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
-{
-       u64 i, dirty_pages = 1;
-       u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
-       vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
-       void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
-
-       dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
-
-       vmm_spin_lock(lock);
-       for (i = 0; i < dirty_pages; i++) {
-               /* avoid RMW */
-               if (!test_bit(base_gfn + i, dirty_bitmap))
-                       set_bit(base_gfn + i , dirty_bitmap);
-       }
-       vmm_spin_unlock(lock);
-}
-
-void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
-{
-       u64 phy_pte, psr;
-       union ia64_rr mrr;
-
-       mrr.val = ia64_get_rr(va);
-       phy_pte = translate_phy_pte(&pte, itir, va);
-
-       if (itir_ps(itir) >= mrr.ps) {
-               vhpt_insert(phy_pte, itir, va, pte);
-       } else {
-               phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-               psr = ia64_clear_ic();
-               ia64_itc(type, va, phy_pte, itir_ps(itir));
-               paravirt_dv_serialize_data();
-               ia64_set_psr(psr);
-       }
-
-       if (!(pte&VTLB_PTE_IO))
-               mark_pages_dirty(v, pte, itir_ps(itir));
-}
-
-/*
- *   vhpt lookup
- */
-struct thash_data *vhpt_lookup(u64 va)
-{
-       struct thash_data *head;
-       u64 tag;
-
-       head = (struct thash_data *)ia64_thash(va);
-       tag = ia64_ttag(va);
-       if (head->etag == tag)
-               return head;
-       return NULL;
-}
-
-u64 guest_vhpt_lookup(u64 iha, u64 *pte)
-{
-       u64 ret;
-       struct thash_data *data;
-
-       data = __vtr_lookup(current_vcpu, iha, D_TLB);
-       if (data != NULL)
-               thash_vhpt_insert(current_vcpu, data->page_flags,
-                       data->itir, iha, D_TLB);
-
-       asm volatile ("rsm psr.ic|psr.i;;"
-                       "srlz.d;;"
-                       "ld8.s r9=[%1];;"
-                       "tnat.nz p6,p7=r9;;"
-                       "(p6) mov %0=1;"
-                       "(p6) mov r9=r0;"
-                       "(p7) extr.u r9=r9,0,53;;"
-                       "(p7) mov %0=r0;"
-                       "(p7) st8 [%2]=r9;;"
-                       "ssm psr.ic;;"
-                       "srlz.d;;"
-                       "ssm psr.i;;"
-                       "srlz.d;;"
-                       : "=&r"(ret) : "r"(iha), "r"(pte) : "memory");
-
-       return ret;
-}
-
-/*
- *  purge software guest tlb
- */
-
-static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-       struct thash_data *cur;
-       u64 start, curadr, size, psbits, tag, rr_ps, num;
-       union ia64_rr vrr;
-       struct thash_cb *hcb = &v->arch.vtlb;
-
-       vrr.val = vcpu_get_rr(v, va);
-       psbits = VMX(v, psbits[(va >> 61)]);
-       start = va & ~((1UL << ps) - 1);
-       while (psbits) {
-               curadr = start;
-               rr_ps = __ffs(psbits);
-               psbits &= ~(1UL << rr_ps);
-               num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
-               size = PSIZE(rr_ps);
-               vrr.ps = rr_ps;
-               while (num) {
-                       cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
-                       if (cur->etag == tag && cur->ps == rr_ps)
-                               cur->etag = INVALID_TI_TAG;
-                       curadr += size;
-                       num--;
-               }
-       }
-}
-
-
-/*
- *  purge VHPT and machine TLB
- */
-static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-       struct thash_data *cur;
-       u64 start, size, tag, num;
-       union ia64_rr rr;
-
-       start = va & ~((1UL << ps) - 1);
-       rr.val = ia64_get_rr(va);
-       size = PSIZE(rr.ps);
-       num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
-       while (num) {
-               cur = (struct thash_data *)ia64_thash(start);
-               tag = ia64_ttag(start);
-               if (cur->etag == tag)
-                       cur->etag = INVALID_TI_TAG;
-               start += size;
-               num--;
-       }
-       machine_tlb_purge(va, ps);
-}
-
-/*
- * Insert an entry into hash TLB or VHPT.
- * NOTES:
- *  1: When inserting VHPT to thash, "va" is a must covered
- *  address by the inserted machine VHPT entry.
- *  2: The format of entry is always in TLB.
- *  3: The caller need to make sure the new entry will not overlap
- *     with any existed entry.
- */
-void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
-{
-       struct thash_data *head;
-       union ia64_rr vrr;
-       u64 tag;
-       struct thash_cb *hcb = &v->arch.vtlb;
-
-       vrr.val = vcpu_get_rr(v, va);
-       vrr.ps = itir_ps(itir);
-       VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
-       head = vsa_thash(hcb->pta, va, vrr.val, &tag);
-       head->page_flags = pte;
-       head->itir = itir;
-       head->etag = tag;
-}
-
-int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
-{
-       struct thash_data  *trp;
-       int  i;
-       u64 end, rid;
-
-       rid = vcpu_get_rr(vcpu, va);
-       rid = rid & RR_RID_MASK;
-       end = va + PSIZE(ps);
-       if (type == D_TLB) {
-               if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-                       for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-                                       i < NDTRS; i++, trp++) {
-                               if (__is_tr_overlap(trp, rid, va, end))
-                                       return i;
-                       }
-               }
-       } else {
-               if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-                       for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-                                       i < NITRS; i++, trp++) {
-                               if (__is_tr_overlap(trp, rid, va, end))
-                                       return i;
-                       }
-               }
-       }
-       return -1;
-}
-
-/*
- * Purge entries in VTLB and VHPT
- */
-void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-       if (vcpu_quick_region_check(v->arch.tc_regions, va))
-               vtlb_purge(v, va, ps);
-       vhpt_purge(v, va, ps);
-}
-
-void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-       u64 old_va = va;
-       va = REGION_OFFSET(va);
-       if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
-               vtlb_purge(v, va, ps);
-       vhpt_purge(v, va, ps);
-}
-
-u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
-{
-       u64 ps, ps_mask, paddr, maddr, io_mask;
-       union pte_flags phy_pte;
-
-       ps = itir_ps(itir);
-       ps_mask = ~((1UL << ps) - 1);
-       phy_pte.val = *pte;
-       paddr = *pte;
-       paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
-       maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
-       io_mask = maddr & GPFN_IO_MASK;
-       if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
-               *pte |= VTLB_PTE_IO;
-               return -1;
-       }
-       maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
-                                       (paddr & ~PAGE_MASK);
-       phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
-       return phy_pte.val;
-}
-
-/*
- * Purge overlap TCs and then insert the new entry to emulate itc ops.
- * Notes: Only TC entry can purge and insert.
- */
-void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
-                                               u64 ifa, int type)
-{
-       u64 ps;
-       u64 phy_pte, io_mask, index;
-       union ia64_rr vrr, mrr;
-
-       ps = itir_ps(itir);
-       vrr.val = vcpu_get_rr(v, ifa);
-       mrr.val = ia64_get_rr(ifa);
-
-       index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-       io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
-       phy_pte = translate_phy_pte(&pte, itir, ifa);
-
-       /* Ensure WB attribute if pte is related to a normal mem page,
-        * which is required by vga acceleration since qemu maps shared
-        * vram buffer with WB.
-        */
-       if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
-                       io_mask != GPFN_PHYS_MMIO) {
-               pte &= ~_PAGE_MA_MASK;
-               phy_pte &= ~_PAGE_MA_MASK;
-       }
-
-       vtlb_purge(v, ifa, ps);
-       vhpt_purge(v, ifa, ps);
-
-       if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
-               vtlb_insert(v, pte, itir, ifa);
-               vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-       }
-       if (pte & VTLB_PTE_IO)
-               return;
-
-       if (ps >= mrr.ps)
-               vhpt_insert(phy_pte, itir, ifa, pte);
-       else {
-               u64 psr;
-               phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-               psr = ia64_clear_ic();
-               ia64_itc(type, ifa, phy_pte, ps);
-               paravirt_dv_serialize_data();
-               ia64_set_psr(psr);
-       }
-       if (!(pte&VTLB_PTE_IO))
-               mark_pages_dirty(v, pte, ps);
-
-}
-
-/*
- * Purge all TCs or VHPT entries including those in Hash table.
- *
- */
-
-void thash_purge_all(struct kvm_vcpu *v)
-{
-       int i;
-       struct thash_data *head;
-       struct thash_cb  *vtlb, *vhpt;
-       vtlb = &v->arch.vtlb;
-       vhpt = &v->arch.vhpt;
-
-       for (i = 0; i < 8; i++)
-               VMX(v, psbits[i]) = 0;
-
-       head = vtlb->hash;
-       for (i = 0; i < vtlb->num; i++) {
-               head->page_flags = 0;
-               head->etag = INVALID_TI_TAG;
-               head->itir = 0;
-               head->next = 0;
-               head++;
-       };
-
-       head = vhpt->hash;
-       for (i = 0; i < vhpt->num; i++) {
-               head->page_flags = 0;
-               head->etag = INVALID_TI_TAG;
-               head->itir = 0;
-               head->next = 0;
-               head++;
-       };
-
-       local_flush_tlb_all();
-}
-
-/*
- * Lookup the hash table and its collision chain to find an entry
- * covering this address rid:va or the entry.
- *
- * INPUT:
- *  in: TLB format for both VHPT & TLB.
- */
-struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
-{
-       struct thash_data  *cch;
-       u64    psbits, ps, tag;
-       union ia64_rr vrr;
-
-       struct thash_cb *hcb = &v->arch.vtlb;
-
-       cch = __vtr_lookup(v, va, is_data);
-       if (cch)
-               return cch;
-
-       if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
-               return NULL;
-
-       psbits = VMX(v, psbits[(va >> 61)]);
-       vrr.val = vcpu_get_rr(v, va);
-       while (psbits) {
-               ps = __ffs(psbits);
-               psbits &= ~(1UL << ps);
-               vrr.ps = ps;
-               cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
-               if (cch->etag == tag && cch->ps == ps)
-                       return cch;
-       }
-
-       return NULL;
-}
-
-/*
- * Initialize internal control data before service.
- */
-void thash_init(struct thash_cb *hcb, u64 sz)
-{
-       int i;
-       struct thash_data *head;
-
-       hcb->pta.val = (unsigned long)hcb->hash;
-       hcb->pta.vf = 1;
-       hcb->pta.ve = 1;
-       hcb->pta.size = sz;
-       head = hcb->hash;
-       for (i = 0; i < hcb->num; i++) {
-               head->page_flags = 0;
-               head->itir = 0;
-               head->etag = INVALID_TI_TAG;
-               head->next = 0;
-               head++;
-       }
-}
-
-u64 kvm_get_mpt_entry(u64 gpfn)
-{
-       u64 *base = (u64 *) KVM_P2M_BASE;
-
-       if (gpfn >= (KVM_P2M_SIZE >> 3))
-               panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
-
-       return *(base + gpfn);
-}
-
-u64 kvm_lookup_mpa(u64 gpfn)
-{
-       u64 maddr;
-       maddr = kvm_get_mpt_entry(gpfn);
-       return maddr&_PAGE_PPN_MASK;
-}
-
-u64 kvm_gpa_to_mpa(u64 gpa)
-{
-       u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
-       return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
-}
-
-/*
- * Fetch guest bundle code.
- * INPUT:
- *  gip: guest ip
- *  pbundle: used to return fetched bundle.
- */
-int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
-{
-       u64     gpip = 0;   /* guest physical IP*/
-       u64     *vpa;
-       struct thash_data    *tlb;
-       u64     maddr;
-
-       if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
-               /* I-side physical mode */
-               gpip = gip;
-       } else {
-               tlb = vtlb_lookup(vcpu, gip, I_TLB);
-               if (tlb)
-                       gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
-                               (gip & (PSIZE(tlb->ps) - 1));
-       }
-       if (gpip) {
-               maddr = kvm_gpa_to_mpa(gpip);
-       } else {
-               tlb = vhpt_lookup(gip);
-               if (tlb == NULL) {
-                       ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
-                       return IA64_FAULT;
-               }
-               maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
-                                       | (gip & (PSIZE(tlb->ps) - 1));
-       }
-       vpa = (u64 *)__kvm_va(maddr);
-
-       pbundle->i64[0] = *vpa++;
-       pbundle->i64[1] = *vpa;
-
-       return IA64_NO_FAULT;
-}
-
-void kvm_init_vhpt(struct kvm_vcpu *v)
-{
-       v->arch.vhpt.num = VHPT_NUM_ENTRIES;
-       thash_init(&v->arch.vhpt, VHPT_SHIFT);
-       ia64_set_pta(v->arch.vhpt.pta.val);
-       /*Enable VHPT here?*/
-}
-
-void kvm_init_vtlb(struct kvm_vcpu *v)
-{
-       v->arch.vtlb.num = VTLB_NUM_ENTRIES;
-       thash_init(&v->arch.vtlb, VTLB_SHIFT);
-}
diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile

index e142c9ee51fae7246fc72c65a061db4377e15b86..2328f82ba2a8a47527c45dc99ba69c97e8bf6f0a 100644 (file)
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -14,6 +14,8 @@
  # Nios2 port by Wind River Systems Inc trough:
  #   fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com
  
+KBUILD_DEFCONFIG := 3c120_defconfig
+
  UTS_SYSNAME = Linux
  
  export MMU
diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h

index 9102bfd3fa1c9edb37efd6842fcaa18535742933..6e24d7cceb0c1407db80f7354e662f11b5f7d339 100644 (file)
--- a/arch/nios2/include/asm/io.h
+++ b/arch/nios2/include/asm/io.h
@@ -45,6 +45,8 @@ static inline void iounmap(void __iomem *addr)
         __iounmap(addr);
  }
  
+#define ioremap_wc ioremap_nocache
+
  /* Pages to physical address... */
  #define page_to_phys(page)     virt_to_phys(page_to_virt(page))
  #define page_to_bus(page)      page_to_virt(page)
diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h

index acedc0a2860e689434e03691c6bcb3f51b6ff0fb..caa51ff85a3c7d0ac7c57cf034758185aad64724 100644 (file)
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h
@@ -168,7 +168,7 @@ do {                                                                        \
         const __typeof__(*(ptr)) __user *__gu_ptr = (ptr);              \
         unsigned long __gu_val;                                         \
         __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\
-       (x) = (__typeof__(x))__gu_val;                                  \
+       (x) = (__force __typeof__(x))__gu_val;                          \
         __gu_err;                                                       \
         })
  
@@ -180,7 +180,7 @@ do {                                                                        \
         if (access_ok(VERIFY_READ,  __gu_ptr, sizeof(*__gu_ptr)))       \
                 __get_user_common(__gu_val, sizeof(*__gu_ptr),          \
                         __gu_ptr, __gu_err);                            \
-       (x) = (__typeof__(x))__gu_val;                                  \
+       (x) = (__force __typeof__(x))__gu_val;                          \
         __gu_err;                                                       \
  })
  
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h

index 6acf0c2a0f99cf82474fbf0b2798700b6984bfff..942c7b1678e3f7ed2af0f9723e38c424c9287728 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
                         unsigned long *nb_ret);
  extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
                         unsigned long gpa, bool dirty);
-extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-                       long pte_index, unsigned long pteh, unsigned long ptel);
  extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                         long pte_index, unsigned long pteh, unsigned long ptel,
                         pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h

index 0aa817933e6a5c1fccdd790ab33d1668a2577ceb..2d81e202bdccb7e2d06b128e727da0fc84c4f125 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
  
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  #define KVM_DEFAULT_HPT_ORDER  24      /* 16MB HPT by default */
-extern unsigned long kvm_rma_pages;
  #endif
  
  #define VRMA_VSID      0x1ffffffUL     /* 1TB VSID reserved for VRMA */
@@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
         /* This covers 14..54 bits of va*/
         rb = (v & ~0x7fUL) << 16;               /* AVA field */
  
-       rb |= v >> (62 - 8);                    /*  B field */
+       rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;   /*  B field */
         /*
          * AVA in v had cleared lower 23 bits. We need to derive
          * that from pteg index
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h

index 047855619cc48138653f6a93cba3aabc4d9243da..7efd666a3fa7801f4a19e559541d7c7c7ebaf95c 100644 (file)
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table {
         struct page *pages[0];
  };
  
-struct kvm_rma_info {
-       atomic_t use_count;
-       unsigned long base_pfn;
-};
-
  /* XICS components, defined in book3s_xics.c */
  struct kvmppc_xics;
  struct kvmppc_icp;
@@ -214,16 +209,9 @@ struct revmap_entry {
  #define KVMPPC_RMAP_PRESENT    0x100000000ul
  #define KVMPPC_RMAP_INDEX      0xfffffffful
  
-/* Low-order bits in memslot->arch.slot_phys[] */
-#define KVMPPC_PAGE_ORDER_MASK 0x1f
-#define KVMPPC_PAGE_NO_CACHE   HPTE_R_I        /* 0x20 */
-#define KVMPPC_PAGE_WRITETHRU  HPTE_R_W        /* 0x40 */
-#define KVMPPC_GOT_PAGE                0x80
-
  struct kvm_arch_memory_slot {
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
         unsigned long *rmap;
-       unsigned long *slot_phys;
  #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  };
  
@@ -242,14 +230,12 @@ struct kvm_arch {
         struct kvm_rma_info *rma;
         unsigned long vrma_slb_v;
         int rma_setup_done;
-       int using_mmu_notifiers;
         u32 hpt_order;
         atomic_t vcpus_running;
         u32 online_vcores;
         unsigned long hpt_npte;
         unsigned long hpt_mask;
         atomic_t hpte_mod_interest;
-       spinlock_t slot_phys_lock;
         cpumask_t need_tlb_flush;
         int hpt_cma_alloc;
  #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
@@ -297,6 +283,7 @@ struct kvmppc_vcore {
         struct list_head runnable_threads;
         spinlock_t lock;
         wait_queue_head_t wq;
+       spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
         u64 stolen_tb;
         u64 preempt_tb;
         struct kvm_vcpu *runner;
@@ -308,6 +295,7 @@ struct kvmppc_vcore {
         ulong dpdes;            /* doorbell state (POWER8) */
         void *mpp_buffer; /* Micro Partition Prefetch buffer */
         bool mpp_buffer_is_valid;
+       ulong conferring_threads;
  };
  
  #define VCORE_ENTRY_COUNT(vc)  ((vc)->entry_exit_count & 0xff)
@@ -664,6 +652,8 @@ struct kvm_vcpu_arch {
         spinlock_t tbacct_lock;
         u64 busy_stolen;
         u64 busy_preempt;
+
+       u32 emul_inst;
  #endif
  };
  
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h

index a6dcdb6d13c1ffdf69e157a234a34daa6ecb7eb3..46bf652c91697c384e8a5323ad5ed428014f9980 100644 (file)
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                              unsigned long ioba, unsigned long tce);
  extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                              unsigned long ioba);
-extern struct kvm_rma_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvm_rma_info *ri);
  extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
  extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
  extern int kvmppc_core_init_vm(struct kvm *kvm);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c

index c161ef3f28a1c3d417c6bee15e5b76024e43569f..24d78e1871c9d5e9855e9aaddbf867d187fd697b 100644 (file)
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -489,7 +489,6 @@ int main(void)
         DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
         DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
         DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
-       DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
         DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
         DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
         DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
@@ -499,6 +498,7 @@ int main(void)
         DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
         DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
         DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+       DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
  #endif
  #ifdef CONFIG_PPC_BOOK3S
         DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig

index 602eb51d20bc67a0b608551825e6be93758d299c..f5769f19ae256906dd750dd6fcd4ad5448e55bbd 100644 (file)
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -172,6 +172,7 @@ config KVM_XICS
         depends on KVM_BOOK3S_64 && !KVM_MPIC
         select HAVE_KVM_IRQCHIP
         select HAVE_KVM_IRQFD
+       default y
         ---help---
           Include support for the XICS (eXternal Interrupt Controller
           Specification) interrupt controller architecture used on
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c

index b32db4b9536132fb4c6cbdbcaa8041b4b84b0cc7..888bf466d8c6bb8e33088472c5573081afe684a6 100644 (file)
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
         { NULL }
  };
  
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
  void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
  {
         if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c

index cd0b0730e29e511f1eb4654e7087ccd0ace4bc3b..a2eb6d354a57d093c3e46c3f038771176261c38c 100644 (file)
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw)
         return (sr_raw & 0x20000000) ? true: false;
  }
  
-static inline bool sr_nx(u32 sr_raw)
-{
-       return (sr_raw & 0x10000000) ? true: false;
-}
-
  static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
                                           struct kvmppc_pte *pte, bool data,
                                           bool iswrite);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c

index d40770248b6a10dbb9846b52cbf76de25bfef1d0..534acb3c6c3df87e2ff2a063fa1dfd6b795cf4a8 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,7 @@
  #include <asm/ppc-opcode.h>
  #include <asm/cputable.h>
  
-/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
-#define MAX_LPID_970   63
+#include "trace_hv.h"
  
  /* Power architecture requires HPT is at least 256kB */
  #define PPC_MIN_HPT_ORDER      18
@@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void)
         if (!cpu_has_feature(CPU_FTR_HVMODE))
                 return -EINVAL;
  
-       /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
-       if (cpu_has_feature(CPU_FTR_ARCH_206)) {
-               host_lpid = mfspr(SPRN_LPID);   /* POWER7 */
-               rsvd_lpid = LPID_RSVD;
-       } else {
-               host_lpid = 0;                  /* PPC970 */
-               rsvd_lpid = MAX_LPID_970;
-       }
+       /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
+       host_lpid = mfspr(SPRN_LPID);
+       rsvd_lpid = LPID_RSVD;
  
         kvmppc_init_lpid(rsvd_lpid + 1);
  
@@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
         kvmppc_set_msr(vcpu, msr);
  }
  
-/*
- * This is called to get a reference to a guest page if there isn't
- * one already in the memslot->arch.slot_phys[] array.
- */
-static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
-                                 struct kvm_memory_slot *memslot,
-                                 unsigned long psize)
-{
-       unsigned long start;
-       long np, err;
-       struct page *page, *hpage, *pages[1];
-       unsigned long s, pgsize;
-       unsigned long *physp;
-       unsigned int is_io, got, pgorder;
-       struct vm_area_struct *vma;
-       unsigned long pfn, i, npages;
-
-       physp = memslot->arch.slot_phys;
-       if (!physp)
-               return -EINVAL;
-       if (physp[gfn - memslot->base_gfn])
-               return 0;
-
-       is_io = 0;
-       got = 0;
-       page = NULL;
-       pgsize = psize;
-       err = -EINVAL;
-       start = gfn_to_hva_memslot(memslot, gfn);
-
-       /* Instantiate and get the page we want access to */
-       np = get_user_pages_fast(start, 1, 1, pages);
-       if (np != 1) {
-               /* Look up the vma for the page */
-               down_read(&current->mm->mmap_sem);
-               vma = find_vma(current->mm, start);
-               if (!vma || vma->vm_start > start ||
-                   start + psize > vma->vm_end ||
-                   !(vma->vm_flags & VM_PFNMAP))
-                       goto up_err;
-               is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
-               pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
-               /* check alignment of pfn vs. requested page size */
-               if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
-                       goto up_err;
-               up_read(&current->mm->mmap_sem);
-
-       } else {
-               page = pages[0];
-               got = KVMPPC_GOT_PAGE;
-
-               /* See if this is a large page */
-               s = PAGE_SIZE;
-               if (PageHuge(page)) {
-                       hpage = compound_head(page);
-                       s <<= compound_order(hpage);
-                       /* Get the whole large page if slot alignment is ok */
-                       if (s > psize && slot_is_aligned(memslot, s) &&
-                           !(memslot->userspace_addr & (s - 1))) {
-                               start &= ~(s - 1);
-                               pgsize = s;
-                               get_page(hpage);
-                               put_page(page);
-                               page = hpage;
-                       }
-               }
-               if (s < psize)
-                       goto out;
-               pfn = page_to_pfn(page);
-       }
-
-       npages = pgsize >> PAGE_SHIFT;
-       pgorder = __ilog2(npages);
-       physp += (gfn - memslot->base_gfn) & ~(npages - 1);
-       spin_lock(&kvm->arch.slot_phys_lock);
-       for (i = 0; i < npages; ++i) {
-               if (!physp[i]) {
-                       physp[i] = ((pfn + i) << PAGE_SHIFT) +
-                               got + is_io + pgorder;
-                       got = 0;
-               }
-       }
-       spin_unlock(&kvm->arch.slot_phys_lock);
-       err = 0;
-
- out:
-       if (got)
-               put_page(page);
-       return err;
-
- up_err:
-       up_read(&current->mm->mmap_sem);
-       return err;
-}
-
  long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
                                 long pte_index, unsigned long pteh,
                                 unsigned long ptel, unsigned long *pte_idx_ret)
  {
-       unsigned long psize, gpa, gfn;
-       struct kvm_memory_slot *memslot;
         long ret;
  
-       if (kvm->arch.using_mmu_notifiers)
-               goto do_insert;
-
-       psize = hpte_page_size(pteh, ptel);
-       if (!psize)
-               return H_PARAMETER;
-
-       pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
-
-       /* Find the memslot (if any) for this address */
-       gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
-       gfn = gpa >> PAGE_SHIFT;
-       memslot = gfn_to_memslot(kvm, gfn);
-       if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
-               if (!slot_is_aligned(memslot, psize))
-                       return H_PARAMETER;
-               if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
-                       return H_PARAMETER;
-       }
-
- do_insert:
         /* Protect linux PTE lookup from page table destruction */
         rcu_read_lock_sched();  /* this disables preemption too */
         ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
@@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
  
  }
  
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-                            long pte_index, unsigned long pteh,
-                            unsigned long ptel)
-{
-       return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
-                                         pteh, ptel, &vcpu->arch.gpr[4]);
-}
-
  static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
                                                          gva_t eaddr)
  {
@@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
         gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
  
         /* Storage key permission check for POWER7 */
-       if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+       if (data && virtmode) {
                 int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
                 if (amrfield & 1)
                         gpte->may_read = 0;
@@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
         gfn = gpa >> PAGE_SHIFT;
         memslot = gfn_to_memslot(kvm, gfn);
  
+       trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
         /* No memslot means it's an emulated MMIO region */
         if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
                 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
                                               dsisr & DSISR_ISSTORE);
  
-       if (!kvm->arch.using_mmu_notifiers)
-               return -EFAULT;         /* should never get here */
-
         /*
          * This should never happen, because of the slot_is_aligned()
          * check in kvmppc_do_h_enter().
@@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
         mmu_seq = kvm->mmu_notifier_seq;
         smp_rmb();
  
+       ret = -EFAULT;
         is_io = 0;
         pfn = 0;
         page = NULL;
@@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 }
                 up_read(&current->mm->mmap_sem);
                 if (!pfn)
-                       return -EFAULT;
+                       goto out_put;
         } else {
                 page = pages[0];
                 pfn = page_to_pfn(page);
@@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 }
         }
  
-       ret = -EFAULT;
         if (psize > pte_size)
                 goto out_put;
  
         /* Check WIMG vs. the actual page we're accessing */
         if (!hpte_cache_flags_ok(r, is_io)) {
                 if (is_io)
-                       return -EFAULT;
+                       goto out_put;
+
                 /*
                  * Allow guest to map emulated device memory as
                  * uncacheable, but actually make it cacheable.
@@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 SetPageDirty(page);
  
   out_put:
+       trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
         if (page) {
                 /*
                  * We drop pages[0] here, not page because page might
@@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
                 psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
                 if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
                     hpte_rpn(ptel, psize) == gfn) {
-                       if (kvm->arch.using_mmu_notifiers)
-                               hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+                       hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
                         kvmppc_invalidate_hpte(kvm, hptep, i);
                         /* Harvest R and C */
                         rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
@@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
  
  int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
  {
-       if (kvm->arch.using_mmu_notifiers)
-               kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+       kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
         return 0;
  }
  
  int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
  {
-       if (kvm->arch.using_mmu_notifiers)
-               kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+       kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
         return 0;
  }
  
@@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
  
  int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
  {
-       if (!kvm->arch.using_mmu_notifiers)
-               return 0;
         return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
  }
  
@@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
  
  int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
  {
-       if (!kvm->arch.using_mmu_notifiers)
-               return 0;
         return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
  }
  
  void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
  {
-       if (!kvm->arch.using_mmu_notifiers)
-               return;
         kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
  }
  
@@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
                 }
  
                 /* Now check and modify the HPTE */
-               if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
+               if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
+                       /* unlock and continue */
+                       hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
                         continue;
+               }
  
                 /* need to make it temporarily absent so C is stable */
                 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
         struct page *page, *pages[1];
         int npages;
         unsigned long hva, offset;
-       unsigned long pa;
-       unsigned long *physp;
         int srcu_idx;
  
         srcu_idx = srcu_read_lock(&kvm->srcu);
         memslot = gfn_to_memslot(kvm, gfn);
         if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
                 goto err;
-       if (!kvm->arch.using_mmu_notifiers) {
-               physp = memslot->arch.slot_phys;
-               if (!physp)
-                       goto err;
-               physp += gfn - memslot->base_gfn;
-               pa = *physp;
-               if (!pa) {
-                       if (kvmppc_get_guest_page(kvm, gfn, memslot,
-                                                 PAGE_SIZE) < 0)
-                               goto err;
-                       pa = *physp;
-               }
-               page = pfn_to_page(pa >> PAGE_SHIFT);
-               get_page(page);
-       } else {
-               hva = gfn_to_hva_memslot(memslot, gfn);
-               npages = get_user_pages_fast(hva, 1, 1, pages);
-               if (npages < 1)
-                       goto err;
-               page = pages[0];
-       }
+       hva = gfn_to_hva_memslot(memslot, gfn);
+       npages = get_user_pages_fast(hva, 1, 1, pages);
+       if (npages < 1)
+               goto err;
+       page = pages[0];
         srcu_read_unlock(&kvm->srcu, srcu_idx);
  
         offset = gpa & (PAGE_SIZE - 1);
@@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
  
         put_page(page);
  
-       if (!dirty || !kvm->arch.using_mmu_notifiers)
+       if (!dirty)
                 return;
  
         /* We need to mark this page dirty in the rmap chain */
@@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
                 hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
                 lbuf = (unsigned long __user *)buf;
                 for (j = 0; j < hdr.n_valid; ++j) {
+                       __be64 hpte_v;
+                       __be64 hpte_r;
+
                         err = -EFAULT;
-                       if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+                       if (__get_user(hpte_v, lbuf) ||
+                           __get_user(hpte_r, lbuf + 1))
                                 goto out;
+                       v = be64_to_cpu(hpte_v);
+                       r = be64_to_cpu(hpte_r);
                         err = -EINVAL;
                         if (!(v & HPTE_V_VALID))
                                 goto out;
@@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
  {
         struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
  
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
-               vcpu->arch.slb_nr = 32;         /* POWER7 */
-       else
-               vcpu->arch.slb_nr = 64;
+       vcpu->arch.slb_nr = 32;         /* POWER7/POWER8 */
  
         mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
         mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index e63587d30b70819b15bb215ca858a7b722795a21..de4018a1bc4bd290ecd0e970bd51960b281497c3 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -58,6 +58,9 @@
  
  #include "book3s.h"
  
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
  /* #define EXIT_DEBUG */
  /* #define EXIT_DEBUG_SIMPLE */
  /* #define EXIT_DEBUG_INT */
@@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
   * stolen.
   *
   * Updates to busy_stolen are protected by arch.tbacct_lock;
- * updates to vc->stolen_tb are protected by the arch.tbacct_lock
- * of the vcpu that has taken responsibility for running the vcore
- * (i.e. vc->runner).  The stolen times are measured in units of
- * timebase ticks.  (Note that the != TB_NIL checks below are
- * purely defensive; they should never fail.)
+ * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
+ * lock.  The stolen times are measured in units of timebase ticks.
+ * (Note that the != TB_NIL checks below are purely defensive;
+ * they should never fail.)
   */
  
  static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
         struct kvmppc_vcore *vc = vcpu->arch.vcore;
         unsigned long flags;
  
-       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
-           vc->preempt_tb != TB_NIL) {
-               vc->stolen_tb += mftb() - vc->preempt_tb;
-               vc->preempt_tb = TB_NIL;
+       /*
+        * We can test vc->runner without taking the vcore lock,
+        * because only this task ever sets vc->runner to this
+        * vcpu, and once it is set to this vcpu, only this task
+        * ever sets it to NULL.
+        */
+       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+               spin_lock_irqsave(&vc->stoltb_lock, flags);
+               if (vc->preempt_tb != TB_NIL) {
+                       vc->stolen_tb += mftb() - vc->preempt_tb;
+                       vc->preempt_tb = TB_NIL;
+               }
+               spin_unlock_irqrestore(&vc->stoltb_lock, flags);
         }
+       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
         if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
             vcpu->arch.busy_preempt != TB_NIL) {
                 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
         struct kvmppc_vcore *vc = vcpu->arch.vcore;
         unsigned long flags;
  
-       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+               spin_lock_irqsave(&vc->stoltb_lock, flags);
                 vc->preempt_tb = mftb();
+               spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+       }
+       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
         if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
                 vcpu->arch.busy_preempt = mftb();
         spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
         struct kvmppc_vcore *vc = vcpu->arch.vcore;
  
         if (arch_compat) {
-               if (!cpu_has_feature(CPU_FTR_ARCH_206))
-                       return -EINVAL; /* 970 has no compat mode support */
-
                 switch (arch_compat) {
                 case PVR_ARCH_205:
                         /*
@@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
  static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
  {
         u64 p;
+       unsigned long flags;
  
-       /*
-        * If we are the task running the vcore, then since we hold
-        * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
-        * can't be updated, so we don't need the tbacct_lock.
-        * If the vcore is inactive, it can't become active (since we
-        * hold the vcore lock), so the vcpu load/put functions won't
-        * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
-        */
+       spin_lock_irqsave(&vc->stoltb_lock, flags);
+       p = vc->stolen_tb;
         if (vc->vcore_state != VCORE_INACTIVE &&
-           vc->runner->arch.run_task != current) {
-               spin_lock_irq(&vc->runner->arch.tbacct_lock);
-               p = vc->stolen_tb;
-               if (vc->preempt_tb != TB_NIL)
-                       p += now - vc->preempt_tb;
-               spin_unlock_irq(&vc->runner->arch.tbacct_lock);
-       } else {
-               p = vc->stolen_tb;
-       }
+           vc->preempt_tb != TB_NIL)
+               p += now - vc->preempt_tb;
+       spin_unlock_irqrestore(&vc->stoltb_lock, flags);
         return p;
  }
  
@@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
         }
  }
  
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+       struct kvmppc_vcore *vcore = target->arch.vcore;
+
+       /*
+        * We expect to have been called by the real mode handler
+        * (kvmppc_rm_h_confer()) which would have directly returned
+        * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+        * have useful work to do and should not confer) so we don't
+        * recheck that here.
+        */
+
+       spin_lock(&vcore->lock);
+       if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+           vcore->vcore_state != VCORE_INACTIVE)
+               target = vcore->runner;
+       spin_unlock(&vcore->lock);
+
+       return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+       int yield_count = 0;
+       struct lppaca *lppaca;
+
+       spin_lock(&vcpu->arch.vpa_update_lock);
+       lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+       if (lppaca)
+               yield_count = lppaca->yield_count;
+       spin_unlock(&vcpu->arch.vpa_update_lock);
+       return yield_count;
+}
+
  int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
  {
         unsigned long req = kvmppc_get_gpr(vcpu, 3);
         unsigned long target, ret = H_SUCCESS;
+       int yield_count;
         struct kvm_vcpu *tvcpu;
         int idx, rc;
  
@@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                 return RESUME_HOST;
  
         switch (req) {
-       case H_ENTER:
-               idx = srcu_read_lock(&vcpu->kvm->srcu);
-               ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
-                                             kvmppc_get_gpr(vcpu, 5),
-                                             kvmppc_get_gpr(vcpu, 6),
-                                             kvmppc_get_gpr(vcpu, 7));
-               srcu_read_unlock(&vcpu->kvm->srcu, idx);
-               break;
         case H_CEDE:
                 break;
         case H_PROD:
@@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                         ret = H_PARAMETER;
                         break;
                 }
-               kvm_vcpu_yield_to(tvcpu);
+               yield_count = kvmppc_get_gpr(vcpu, 5);
+               if (kvmppc_get_yield_count(tvcpu) != yield_count)
+                       break;
+               kvm_arch_vcpu_yield_to(tvcpu);
                 break;
         case H_REGISTER_VPA:
                 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 vcpu->stat.ext_intr_exits++;
                 r = RESUME_GUEST;
                 break;
+       /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+       case BOOK3S_INTERRUPT_HMI:
         case BOOK3S_INTERRUPT_PERFMON:
                 r = RESUME_GUEST;
                 break;
@@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
          * Accordingly return to Guest or Host.
          */
         case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+               if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
+                       vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
+                               swab32(vcpu->arch.emul_inst) :
+                               vcpu->arch.emul_inst;
                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
                         r = kvmppc_emulate_debug_inst(run, vcpu);
                 } else {
@@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
  
         INIT_LIST_HEAD(&vcore->runnable_threads);
         spin_lock_init(&vcore->lock);
+       spin_lock_init(&vcore->stoltb_lock);
         init_waitqueue_head(&vcore->wq);
         vcore->preempt_tb = TB_NIL;
         vcore->lpcr = kvm->arch.lpcr;
@@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
         vc->n_woken = 0;
         vc->nap_count = 0;
         vc->entry_exit_count = 0;
+       vc->preempt_tb = TB_NIL;
         vc->vcore_state = VCORE_STARTING;
         vc->in_guest = 0;
         vc->napping_threads = 0;
+       vc->conferring_threads = 0;
  
         /*
          * Updating any of the vpas requires calling kvmppc_pin_guest_page,
@@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
         list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
                 kvmppc_start_thread(vcpu);
                 kvmppc_create_dtl_entry(vcpu, vc);
+               trace_kvm_guest_enter(vcpu);
         }
  
         /* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
  
         vc->vcore_state = VCORE_RUNNING;
         preempt_disable();
+
+       trace_kvmppc_run_core(vc, 0);
+
         spin_unlock(&vc->lock);
  
         kvm_guest_enter();
@@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
                     kvmppc_core_pending_dec(vcpu))
                         kvmppc_core_dequeue_dec(vcpu);
  
+               trace_kvm_guest_exit(vcpu);
+
                 ret = RESUME_GUEST;
                 if (vcpu->arch.trap)
                         ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
                         wake_up(&vcpu->arch.cpu_run);
                 }
         }
+
+       trace_kvmppc_run_core(vc, 1);
  }
  
  /*
@@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
   */
  static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
  {
+       struct kvm_vcpu *vcpu;
+       int do_sleep = 1;
+
         DEFINE_WAIT(wait);
  
         prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+       /*
+        * Check one last time for pending exceptions and ceded state after
+        * we put ourselves on the wait queue
+        */
+       list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+               if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
+                       do_sleep = 0;
+                       break;
+               }
+       }
+
+       if (!do_sleep) {
+               finish_wait(&vc->wq, &wait);
+               return;
+       }
+
         vc->vcore_state = VCORE_SLEEPING;
+       trace_kvmppc_vcore_blocked(vc, 0);
         spin_unlock(&vc->lock);
         schedule();
         finish_wait(&vc->wq, &wait);
         spin_lock(&vc->lock);
         vc->vcore_state = VCORE_INACTIVE;
+       trace_kvmppc_vcore_blocked(vc, 1);
  }
  
  static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
         struct kvmppc_vcore *vc;
         struct kvm_vcpu *v, *vn;
  
+       trace_kvmppc_run_vcpu_enter(vcpu);
+
         kvm_run->exit_reason = 0;
         vcpu->arch.ret = RESUME_GUEST;
         vcpu->arch.trap = 0;
@@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                     VCORE_EXIT_COUNT(vc) == 0) {
                         kvmppc_create_dtl_entry(vcpu, vc);
                         kvmppc_start_thread(vcpu);
+                       trace_kvm_guest_enter(vcpu);
                 } else if (vc->vcore_state == VCORE_SLEEPING) {
                         wake_up(&vc->wq);
                 }
@@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                 wake_up(&v->arch.cpu_run);
         }
  
+       trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
         spin_unlock(&vc->lock);
         return vcpu->arch.ret;
  }
@@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
         /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
         smp_mb();
  
-       /* On the first time here, set up HTAB and VRMA or RMA */
+       /* On the first time here, set up HTAB and VRMA */
         if (!vcpu->kvm->arch.rma_setup_done) {
                 r = kvmppc_hv_setup_htab_rma(vcpu);
                 if (r)
@@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
  
                 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
                     !(vcpu->arch.shregs.msr & MSR_PR)) {
+                       trace_kvm_hcall_enter(vcpu);
                         r = kvmppc_pseries_do_hcall(vcpu);
+                       trace_kvm_hcall_exit(vcpu, r);
                         kvmppc_core_prepare_to_enter(vcpu);
                 } else if (r == RESUME_PAGE_FAULT) {
                         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
         return r;
  }
  
-
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-       switch (rma_size) {
-       case 32ul << 20:        /* 32 MB */
-               if (cpu_has_feature(CPU_FTR_ARCH_206))
-                       return 8;       /* only supported on POWER7 */
-               return -1;
-       case 64ul << 20:        /* 64 MB */
-               return 3;
-       case 128ul << 20:       /* 128 MB */
-               return 7;
-       case 256ul << 20:       /* 256 MB */
-               return 4;
-       case 1ul << 30:         /* 1 GB */
-               return 2;
-       case 16ul << 30:        /* 16 GB */
-               return 1;
-       case 256ul << 30:       /* 256 GB */
-               return 0;
-       default:
-               return -1;
-       }
-}
-
-static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       struct page *page;
-       struct kvm_rma_info *ri = vma->vm_file->private_data;
-
-       if (vmf->pgoff >= kvm_rma_pages)
-               return VM_FAULT_SIGBUS;
-
-       page = pfn_to_page(ri->base_pfn + vmf->pgoff);
-       get_page(page);
-       vmf->page = page;
-       return 0;
-}
-
-static const struct vm_operations_struct kvm_rma_vm_ops = {
-       .fault = kvm_rma_fault,
-};
-
-static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
-{
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-       vma->vm_ops = &kvm_rma_vm_ops;
-       return 0;
-}
-
-static int kvm_rma_release(struct inode *inode, struct file *filp)
-{
-       struct kvm_rma_info *ri = filp->private_data;
-
-       kvm_release_rma(ri);
-       return 0;
-}
-
-static const struct file_operations kvm_rma_fops = {
-       .mmap           = kvm_rma_mmap,
-       .release        = kvm_rma_release,
-};
-
-static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
-                                     struct kvm_allocate_rma *ret)
-{
-       long fd;
-       struct kvm_rma_info *ri;
-       /*
-        * Only do this on PPC970 in HV mode
-        */
-       if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-           !cpu_has_feature(CPU_FTR_ARCH_201))
-               return -EINVAL;
-
-       if (!kvm_rma_pages)
-               return -EINVAL;
-
-       ri = kvm_alloc_rma();
-       if (!ri)
-               return -ENOMEM;
-
-       fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
-       if (fd < 0)
-               kvm_release_rma(ri);
-
-       ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
-       return fd;
-}
-
  static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
                                      int linux_psize)
  {
@@ -2167,26 +2150,6 @@ out:
         return r;
  }
  
-static void unpin_slot(struct kvm_memory_slot *memslot)
-{
-       unsigned long *physp;
-       unsigned long j, npages, pfn;
-       struct page *page;
-
-       physp = memslot->arch.slot_phys;
-       npages = memslot->npages;
-       if (!physp)
-               return;
-       for (j = 0; j < npages; j++) {
-               if (!(physp[j] & KVMPPC_GOT_PAGE))
-                       continue;
-               pfn = physp[j] >> PAGE_SHIFT;
-               page = pfn_to_page(pfn);
-               SetPageDirty(page);
-               put_page(page);
-       }
-}
-
  static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
                                         struct kvm_memory_slot *dont)
  {
@@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
                 vfree(free->arch.rmap);
                 free->arch.rmap = NULL;
         }
-       if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
-               unpin_slot(free);
-               vfree(free->arch.slot_phys);
-               free->arch.slot_phys = NULL;
-       }
  }
  
  static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
@@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
         slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
         if (!slot->arch.rmap)
                 return -ENOMEM;
-       slot->arch.slot_phys = NULL;
  
         return 0;
  }
@@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
                                         struct kvm_memory_slot *memslot,
                                         struct kvm_userspace_memory_region *mem)
  {
-       unsigned long *phys;
-
-       /* Allocate a slot_phys array if needed */
-       phys = memslot->arch.slot_phys;
-       if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
-               phys = vzalloc(memslot->npages * sizeof(unsigned long));
-               if (!phys)
-                       return -ENOMEM;
-               memslot->arch.slot_phys = phys;
-       }
-
         return 0;
  }
  
@@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
  {
         int err = 0;
         struct kvm *kvm = vcpu->kvm;
-       struct kvm_rma_info *ri = NULL;
         unsigned long hva;
         struct kvm_memory_slot *memslot;
         struct vm_area_struct *vma;
         unsigned long lpcr = 0, senc;
-       unsigned long lpcr_mask = 0;
         unsigned long psize, porder;
-       unsigned long rma_size;
-       unsigned long rmls;
-       unsigned long *physp;
-       unsigned long i, npages;
         int srcu_idx;
  
         mutex_lock(&kvm->lock);
@@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
         psize = vma_kernel_pagesize(vma);
         porder = __ilog2(psize);
  
-       /* Is this one of our preallocated RMAs? */
-       if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
-           hva == vma->vm_start)
-               ri = vma->vm_file->private_data;
-
         up_read(&current->mm->mmap_sem);
  
-       if (!ri) {
-               /* On POWER7, use VRMA; on PPC970, give up */
-               err = -EPERM;
-               if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-                       pr_err("KVM: CPU requires an RMO\n");
-                       goto out_srcu;
-               }
+       /* We can handle 4k, 64k or 16M pages in the VRMA */
+       err = -EINVAL;
+       if (!(psize == 0x1000 || psize == 0x10000 ||
+             psize == 0x1000000))
+               goto out_srcu;
  
-               /* We can handle 4k, 64k or 16M pages in the VRMA */
-               err = -EINVAL;
-               if (!(psize == 0x1000 || psize == 0x10000 ||
-                     psize == 0x1000000))
-                       goto out_srcu;
+       /* Update VRMASD field in the LPCR */
+       senc = slb_pgsize_encoding(psize);
+       kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+               (VRMA_VSID << SLB_VSID_SHIFT_1T);
+       /* the -4 is to account for senc values starting at 0x10 */
+       lpcr = senc << (LPCR_VRMASD_SH - 4);
  
-               /* Update VRMASD field in the LPCR */
-               senc = slb_pgsize_encoding(psize);
-               kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
-                       (VRMA_VSID << SLB_VSID_SHIFT_1T);
-               lpcr_mask = LPCR_VRMASD;
-               /* the -4 is to account for senc values starting at 0x10 */
-               lpcr = senc << (LPCR_VRMASD_SH - 4);
+       /* Create HPTEs in the hash page table for the VRMA */
+       kvmppc_map_vrma(vcpu, memslot, porder);
  
-               /* Create HPTEs in the hash page table for the VRMA */
-               kvmppc_map_vrma(vcpu, memslot, porder);
-
-       } else {
-               /* Set up to use an RMO region */
-               rma_size = kvm_rma_pages;
-               if (rma_size > memslot->npages)
-                       rma_size = memslot->npages;
-               rma_size <<= PAGE_SHIFT;
-               rmls = lpcr_rmls(rma_size);
-               err = -EINVAL;
-               if ((long)rmls < 0) {
-                       pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
-                       goto out_srcu;
-               }
-               atomic_inc(&ri->use_count);
-               kvm->arch.rma = ri;
-
-               /* Update LPCR and RMOR */
-               if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-                       /* PPC970; insert RMLS value (split field) in HID4 */
-                       lpcr_mask = (1ul << HID4_RMLS0_SH) |
-                               (3ul << HID4_RMLS2_SH) | HID4_RMOR;
-                       lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
-                               ((rmls & 3) << HID4_RMLS2_SH);
-                       /* RMOR is also in HID4 */
-                       lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
-                               << HID4_RMOR_SH;
-               } else {
-                       /* POWER7 */
-                       lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
-                       lpcr = rmls << LPCR_RMLS_SH;
-                       kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
-               }
-               pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
-                       ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
-
-               /* Initialize phys addrs of pages in RMO */
-               npages = kvm_rma_pages;
-               porder = __ilog2(npages);
-               physp = memslot->arch.slot_phys;
-               if (physp) {
-                       if (npages > memslot->npages)
-                               npages = memslot->npages;
-                       spin_lock(&kvm->arch.slot_phys_lock);
-                       for (i = 0; i < npages; ++i)
-                               physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
-                                       porder;
-                       spin_unlock(&kvm->arch.slot_phys_lock);
-               }
-       }
-
-       kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+       kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
  
         /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
         smp_wmb();
@@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
         memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
                sizeof(kvm->arch.enabled_hcalls));
  
-       kvm->arch.rma = NULL;
-
         kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
  
-       if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-               /* PPC970; HID4 is effectively the LPCR */
-               kvm->arch.host_lpid = 0;
-               kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
-               lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
-               lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
-                       ((lpid & 0xf) << HID4_LPID5_SH);
-       } else {
-               /* POWER7; init LPCR for virtual RMA mode */
-               kvm->arch.host_lpid = mfspr(SPRN_LPID);
-               kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
-               lpcr &= LPCR_PECE | LPCR_LPES;
-               lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
-                       LPCR_VPM0 | LPCR_VPM1;
-               kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
-                       (VRMA_VSID << SLB_VSID_SHIFT_1T);
-               /* On POWER8 turn on online bit to enable PURR/SPURR */
-               if (cpu_has_feature(CPU_FTR_ARCH_207S))
-                       lpcr |= LPCR_ONL;
-       }
+       /* Init LPCR for virtual RMA mode */
+       kvm->arch.host_lpid = mfspr(SPRN_LPID);
+       kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+       lpcr &= LPCR_PECE | LPCR_LPES;
+       lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+               LPCR_VPM0 | LPCR_VPM1;
+       kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+               (VRMA_VSID << SLB_VSID_SHIFT_1T);
+       /* On POWER8 turn on online bit to enable PURR/SPURR */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S))
+               lpcr |= LPCR_ONL;
         kvm->arch.lpcr = lpcr;
  
-       kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
-       spin_lock_init(&kvm->arch.slot_phys_lock);
-
         /*
          * Track that we now have a HV mode VM active. This blocks secondary
          * CPU threads from coming online.
@@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
         kvm_hv_vm_deactivated();
  
         kvmppc_free_vcores(kvm);
-       if (kvm->arch.rma) {
-               kvm_release_rma(kvm->arch.rma);
-               kvm->arch.rma = NULL;
-       }
  
         kvmppc_free_hpt(kvm);
  }
@@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
  
  static int kvmppc_core_check_processor_compat_hv(void)
  {
-       if (!cpu_has_feature(CPU_FTR_HVMODE))
+       if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+           !cpu_has_feature(CPU_FTR_ARCH_206))
                 return -EIO;
         return 0;
  }
@@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
  
         switch (ioctl) {
  
-       case KVM_ALLOCATE_RMA: {
-               struct kvm_allocate_rma rma;
-               struct kvm *kvm = filp->private_data;
-
-               r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
-               if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
-                       r = -EFAULT;
-               break;
-       }
-
         case KVM_PPC_ALLOCATE_HTAB: {
                 u32 htab_order;
  
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c

index 3f1bb5a36c2769d2482ab53f9859f0987818337d..1f083ff8a61a065cd7aa2dc2901000224e515e2e 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,6 +16,7 @@
  #include <linux/memblock.h>
  #include <linux/sizes.h>
  #include <linux/cma.h>
+#include <linux/bitops.h>
  
  #include <asm/cputable.h>
  #include <asm/kvm_ppc.h>
@@ -32,95 +33,9 @@
   * By default we reserve 5% of memory for hash pagetable allocation.
   */
  static unsigned long kvm_cma_resv_ratio = 5;
-/*
- * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
- * Each RMA has to be physically contiguous and of a size that the
- * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
- * and other larger sizes.  Since we are unlikely to be allocate that
- * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot using CMA.
- * should be power of 2.
- */
-unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
-EXPORT_SYMBOL_GPL(kvm_rma_pages);
  
  static struct cma *kvm_cma;
  
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-       switch (rma_size) {
-       case 32ul << 20:        /* 32 MB */
-               if (cpu_has_feature(CPU_FTR_ARCH_206))
-                       return 8;       /* only supported on POWER7 */
-               return -1;
-       case 64ul << 20:        /* 64 MB */
-               return 3;
-       case 128ul << 20:       /* 128 MB */
-               return 7;
-       case 256ul << 20:       /* 256 MB */
-               return 4;
-       case 1ul << 30:         /* 1 GB */
-               return 2;
-       case 16ul << 30:        /* 16 GB */
-               return 1;
-       case 256ul << 30:       /* 256 GB */
-               return 0;
-       default:
-               return -1;
-       }
-}
-
-static int __init early_parse_rma_size(char *p)
-{
-       unsigned long kvm_rma_size;
-
-       pr_debug("%s(%s)\n", __func__, p);
-       if (!p)
-               return -EINVAL;
-       kvm_rma_size = memparse(p, &p);
-       /*
-        * Check that the requested size is one supported in hardware
-        */
-       if (lpcr_rmls(kvm_rma_size) < 0) {
-               pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-               return -EINVAL;
-       }
-       kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
-       return 0;
-}
-early_param("kvm_rma_size", early_parse_rma_size);
-
-struct kvm_rma_info *kvm_alloc_rma()
-{
-       struct page *page;
-       struct kvm_rma_info *ri;
-
-       ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
-       if (!ri)
-               return NULL;
-       page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
-       if (!page)
-               goto err_out;
-       atomic_set(&ri->use_count, 1);
-       ri->base_pfn = page_to_pfn(page);
-       return ri;
-err_out:
-       kfree(ri);
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-
-void kvm_release_rma(struct kvm_rma_info *ri)
-{
-       if (atomic_dec_and_test(&ri->use_count)) {
-               cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
-               kfree(ri);
-       }
-}
-EXPORT_SYMBOL_GPL(kvm_release_rma);
-
  static int __init early_parse_kvm_cma_resv(char *p)
  {
         pr_debug("%s(%s)\n", __func__, p);
@@ -132,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
  
  struct page *kvm_alloc_hpt(unsigned long nr_pages)
  {
-       unsigned long align_pages = HPT_ALIGN_PAGES;
-
         VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
  
-       /* Old CPUs require HPT aligned on a multiple of its size */
-       if (!cpu_has_feature(CPU_FTR_ARCH_206))
-               align_pages = nr_pages;
-       return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
+       return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
  }
  EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
  
@@ -180,21 +90,43 @@ void __init kvm_cma_reserve(void)
         if (selected_size) {
                 pr_debug("%s: reserving %ld MiB for global area\n", __func__,
                          (unsigned long)selected_size / SZ_1M);
-               /*
-                * Old CPUs require HPT aligned on a multiple of its size. So for them
-                * make the alignment as max size we could request.
-                */
-               if (!cpu_has_feature(CPU_FTR_ARCH_206))
-                       align_size = __rounddown_pow_of_two(selected_size);
-               else
-                       align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
-
-               align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+               align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
                 cma_declare_contiguous(0, selected_size, 0, align_size,
                         KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
         }
  }
  
+/*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded.  If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+                           unsigned int yield_count)
+{
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+       int threads_running;
+       int threads_ceded;
+       int threads_conferring;
+       u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+       int rv = H_SUCCESS; /* => don't yield */
+
+       set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+       while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
+               threads_running = VCORE_ENTRY_COUNT(vc);
+               threads_ceded = hweight32(vc->napping_threads);
+               threads_conferring = hweight32(vc->conferring_threads);
+               if (threads_ceded + threads_conferring >= threads_running) {
+                       rv = H_TOO_HARD; /* => do yield */
+                       break;
+               }
+       }
+       clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+       return rv;
+}
+
  /*
   * When running HV mode KVM we need to block certain operations while KVM VMs
   * exist in the system. We use a counter of VMs to track this.
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S

index 731be7478b27ddbec69e52e61bc94edd5630c91c..36540a99d178497b41e022242cf21a77a189992d 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry)
         std     r3, _CCR(r1)
  
         /* Save host DSCR */
-BEGIN_FTR_SECTION
         mfspr   r3, SPRN_DSCR
         std     r3, HSTATE_DSCR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
  
  BEGIN_FTR_SECTION
         /* Save host DABR */
@@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         mfspr   r7, SPRN_MMCR0          /* save MMCR0 */
         mtspr   SPRN_MMCR0, r3          /* freeze all counters, disable interrupts */
         mfspr   r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-       /* On P7, clear MMCRA in order to disable SDAR updates */
+       /* Clear MMCRA in order to disable SDAR updates */
         li      r5, 0
         mtspr   SPRN_MMCRA, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
         isync
         ld      r3, PACALPPACAPTR(r13)  /* is the host using the PMU? */
         lbz     r5, LPPACA_PMCINUSE(r3)
@@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         mfspr   r7, SPRN_PMC4
         mfspr   r8, SPRN_PMC5
         mfspr   r9, SPRN_PMC6
-BEGIN_FTR_SECTION
-       mfspr   r10, SPRN_PMC7
-       mfspr   r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
         stw     r3, HSTATE_PMC(r13)
         stw     r5, HSTATE_PMC + 4(r13)
         stw     r6, HSTATE_PMC + 8(r13)
         stw     r7, HSTATE_PMC + 12(r13)
         stw     r8, HSTATE_PMC + 16(r13)
         stw     r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-       stw     r10, HSTATE_PMC + 24(r13)
-       stw     r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
  31:
  
         /*
@@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
         add     r8,r8,r7
         std     r8,HSTATE_DECEXP(r13)
  
-#ifdef CONFIG_SMP
-       /*
-        * On PPC970, if the guest vcpu has an external interrupt pending,
-        * send ourselves an IPI so as to interrupt the guest once it
-        * enables interrupts.  (It must have interrupts disabled,
-        * otherwise we would already have delivered the interrupt.)
-        *
-        * XXX If this is a UP build, smp_send_reschedule is not available,
-        * so the interrupt will be delayed until the next time the vcpu
-        * enters the guest with interrupts enabled.
-        */
-BEGIN_FTR_SECTION
-       ld      r4, HSTATE_KVM_VCPU(r13)
-       ld      r0, VCPU_PENDING_EXC(r4)
-       li      r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
-       oris    r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
-       and.    r0, r0, r7
-       beq     32f
-       lhz     r3, PACAPACAINDEX(r13)
-       bl      smp_send_reschedule
-       nop
-32:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-#endif /* CONFIG_SMP */
-
         /* Jump to partition switch code */
         bl      kvmppc_hv_entry_trampoline
         nop
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c

index d562c8e2bc30cba62e463588a2cabf9757bebf64..60081bd75847276c8291ffffe156071eb6872194 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -138,8 +138,5 @@ out:
  
  long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
  {
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
-               return kvmppc_realmode_mc_power7(vcpu);
-
-       return 0;
+       return kvmppc_realmode_mc_power7(vcpu);
  }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c

index 084ad54c73cd6e9918a73f8a6da2be9abda65311..510bdfbc40734ff0a3e8cc03d8b38f357d4e0d98 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
          * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
          * we can use tlbiel as long as we mark all other physical
          * cores as potentially having stale TLB entries for this lpid.
-        * If we're not using MMU notifiers, we never take pages away
-        * from the guest, so we can use tlbiel if requested.
          * Otherwise, don't use tlbiel.
          */
         if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
                 global = 0;
-       else if (kvm->arch.using_mmu_notifiers)
-               global = 1;
         else
-               global = !(flags & H_LOCAL);
+               global = 1;
  
         if (!global) {
                 /* any other core might now have stale TLB entries... */
@@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
         struct revmap_entry *rev;
         unsigned long g_ptel;
         struct kvm_memory_slot *memslot;
-       unsigned long *physp, pte_size;
+       unsigned long pte_size;
         unsigned long is_io;
         unsigned long *rmap;
         pte_t pte;
@@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
         is_io = ~0ul;
         rmap = NULL;
         if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
-               /* PPC970 can't do emulated MMIO */
-               if (!cpu_has_feature(CPU_FTR_ARCH_206))
-                       return H_PARAMETER;
                 /* Emulated MMIO - mark this with key=31 */
                 pteh |= HPTE_V_ABSENT;
                 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
@@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
         slot_fn = gfn - memslot->base_gfn;
         rmap = &memslot->arch.rmap[slot_fn];
  
-       if (!kvm->arch.using_mmu_notifiers) {
-               physp = memslot->arch.slot_phys;
-               if (!physp)
-                       return H_PARAMETER;
-               physp += slot_fn;
-               if (realmode)
-                       physp = real_vmalloc_addr(physp);
-               pa = *physp;
-               if (!pa)
-                       return H_TOO_HARD;
-               is_io = pa & (HPTE_R_I | HPTE_R_W);
-               pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
-               pa &= PAGE_MASK;
+       /* Translate to host virtual address */
+       hva = __gfn_to_hva_memslot(memslot, gfn);
+
+       /* Look up the Linux PTE for the backing page */
+       pte_size = psize;
+       pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
+       if (pte_present(pte) && !pte_numa(pte)) {
+               if (writing && !pte_write(pte))
+                       /* make the actual HPTE be read-only */
+                       ptel = hpte_make_readonly(ptel);
+               is_io = hpte_cache_bits(pte_val(pte));
+               pa = pte_pfn(pte) << PAGE_SHIFT;
+               pa |= hva & (pte_size - 1);
                 pa |= gpa & ~PAGE_MASK;
-       } else {
-               /* Translate to host virtual address */
-               hva = __gfn_to_hva_memslot(memslot, gfn);
-
-               /* Look up the Linux PTE for the backing page */
-               pte_size = psize;
-               pte = lookup_linux_pte_and_update(pgdir, hva, writing,
-                                                 &pte_size);
-               if (pte_present(pte) && !pte_numa(pte)) {
-                       if (writing && !pte_write(pte))
-                               /* make the actual HPTE be read-only */
-                               ptel = hpte_make_readonly(ptel);
-                       is_io = hpte_cache_bits(pte_val(pte));
-                       pa = pte_pfn(pte) << PAGE_SHIFT;
-                       pa |= hva & (pte_size - 1);
-                       pa |= gpa & ~PAGE_MASK;
-               }
         }
  
         if (pte_size < psize)
@@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                         rmap = real_vmalloc_addr(rmap);
                 lock_rmap(rmap);
                 /* Check for pending invalidations under the rmap chain lock */
-               if (kvm->arch.using_mmu_notifiers &&
-                   mmu_notifier_retry(kvm, mmu_seq)) {
+               if (mmu_notifier_retry(kvm, mmu_seq)) {
                         /* inval in progress, write a non-present HPTE */
                         pteh |= HPTE_V_ABSENT;
                         pteh &= ~HPTE_V_VALID;
@@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock)
         return old == 0;
  }
  
-/*
- * tlbie/tlbiel is a bit different on the PPC970 compared to later
- * processors such as POWER7; the large page bit is in the instruction
- * not RB, and the top 16 bits and the bottom 12 bits of the VA
- * in RB must be 0.
- */
-static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
-                         long npages, int global, bool need_sync)
-{
-       long i;
-
-       if (global) {
-               while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-                       cpu_relax();
-               if (need_sync)
-                       asm volatile("ptesync" : : : "memory");
-               for (i = 0; i < npages; ++i) {
-                       unsigned long rb = rbvalues[i];
-
-                       if (rb & 1)             /* large page */
-                               asm volatile("tlbie %0,1" : :
-                                            "r" (rb & 0x0000fffffffff000ul));
-                       else
-                               asm volatile("tlbie %0,0" : :
-                                            "r" (rb & 0x0000fffffffff000ul));
-               }
-               asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-               kvm->arch.tlbie_lock = 0;
-       } else {
-               if (need_sync)
-                       asm volatile("ptesync" : : : "memory");
-               for (i = 0; i < npages; ++i) {
-                       unsigned long rb = rbvalues[i];
-
-                       if (rb & 1)             /* large page */
-                               asm volatile("tlbiel %0,1" : :
-                                            "r" (rb & 0x0000fffffffff000ul));
-                       else
-                               asm volatile("tlbiel %0,0" : :
-                                            "r" (rb & 0x0000fffffffff000ul));
-               }
-               asm volatile("ptesync" : : : "memory");
-       }
-}
-
  static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
                       long npages, int global, bool need_sync)
  {
         long i;
  
-       if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-               /* PPC970 tlbie instruction is a bit different */
-               do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
-               return;
-       }
         if (global) {
                 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
                         cpu_relax();
@@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
                 rev->guest_rpte = r;
                 note_hpte_modification(kvm, rev);
         }
-       r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
  
         /* Update HPTE */
         if (v & HPTE_V_VALID) {
-               rb = compute_tlbie_rb(v, r, pte_index);
-               hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
-               do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
                 /*
-                * If the host has this page as readonly but the guest
-                * wants to make it read/write, reduce the permissions.
-                * Checking the host permissions involves finding the
-                * memslot and then the Linux PTE for the page.
+                * If the page is valid, don't let it transition from
+                * readonly to writable.  If it should be writable, we'll
+                * take a trap and let the page fault code sort it out.
                  */
-               if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
-                       unsigned long psize, gfn, hva;
-                       struct kvm_memory_slot *memslot;
-                       pgd_t *pgdir = vcpu->arch.pgdir;
-                       pte_t pte;
-
-                       psize = hpte_page_size(v, r);
-                       gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
-                       memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
-                       if (memslot) {
-                               hva = __gfn_to_hva_memslot(memslot, gfn);
-                               pte = lookup_linux_pte_and_update(pgdir, hva,
-                                                                 1, &psize);
-                               if (pte_present(pte) && !pte_write(pte))
-                                       r = hpte_make_readonly(r);
-                       }
+               pte = be64_to_cpu(hpte[1]);
+               r = (pte & ~mask) | bits;
+               if (hpte_is_writable(r) && !hpte_is_writable(pte))
+                       r = hpte_make_readonly(r);
+               /* If the PTE is changing, invalidate it first */
+               if (r != pte) {
+                       rb = compute_tlbie_rb(v, r, pte_index);
+                       hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
+                                             HPTE_V_ABSENT);
+                       do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
+                                 true);
+                       hpte[1] = cpu_to_be64(r);
                 }
         }
-       hpte[1] = cpu_to_be64(r);
-       eieio();
-       hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
+       unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
         asm volatile("ptesync" : : : "memory");
         return H_SUCCESS;
  }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c

index 3ee38e6e884f5e786df84500d0912ff4454f5ef2..7b066f6b02aded88e701b536b2ddcd5cb24d7a96 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
          * state update in HW (ie bus transactions) so we can handle them
          * separately here as well.
          */
-       if (resend)
+       if (resend) {
                 icp->rm_action |= XICS_RM_CHECK_RESEND;
+               icp->rm_resend_icp = icp;
+       }
  }
  
  
@@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
          * nothing needs to be done as there can be no XISR to
          * reject.
          *
+        * ICP state: Check_IPI
+        *
          * If the CPPR is less favored, then we might be replacing
-        * an interrupt, and thus need to possibly reject it as in
+        * an interrupt, and thus need to possibly reject it.
          *
-        * ICP state: Check_IPI
+        * ICP State: IPI
+        *
+        * Besides rejecting any pending interrupts, we also
+        * update XISR and pending_pri to mark IPI as pending.
+        *
+        * PAPR does not describe this state, but if the MFRR is being
+        * made less favored than its earlier value, there might be
+        * a previously-rejected interrupt needing to be resent.
+        * Ideally, we would want to resend only if
+        *      prio(pending_interrupt) < mfrr &&
+        *      prio(pending_interrupt) < cppr
+        * where pending interrupt is the one that was rejected. But
+        * we don't have that state, so we simply trigger a resend
+        * whenever the MFRR is made less favored.
          */
         do {
                 old_state = new_state = ACCESS_ONCE(icp->state);
@@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
                 resend = false;
                 if (mfrr < new_state.cppr) {
                         /* Reject a pending interrupt if not an IPI */
-                       if (mfrr <= new_state.pending_pri)
+                       if (mfrr <= new_state.pending_pri) {
                                 reject = new_state.xisr;
-                       new_state.pending_pri = mfrr;
-                       new_state.xisr = XICS_IPI;
+                               new_state.pending_pri = mfrr;
+                               new_state.xisr = XICS_IPI;
+                       }
                 }
  
-               if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+               if (mfrr > old_state.mfrr) {
                         resend = new_state.need_resend;
                         new_state.need_resend = 0;
                 }
@@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
         }
  
         /* Pass resends to virtual mode */
-       if (resend)
+       if (resend) {
                 this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+               this_icp->rm_resend_icp = icp;
+       }
  
         return check_too_hard(xics, this_icp);
  }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 65c105b17a2558a2ee41a1479475431d899c805f..10554df13852fe05041230f1314216e6deb54e80 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
         lwz     r6, HSTATE_PMC + 12(r13)
         lwz     r8, HSTATE_PMC + 16(r13)
         lwz     r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-       lwz     r10, HSTATE_PMC + 24(r13)
-       lwz     r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
         mtspr   SPRN_PMC1, r3
         mtspr   SPRN_PMC2, r4
         mtspr   SPRN_PMC3, r5
         mtspr   SPRN_PMC4, r6
         mtspr   SPRN_PMC5, r8
         mtspr   SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-       mtspr   SPRN_PMC7, r10
-       mtspr   SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
         ld      r3, HSTATE_MMCR(r13)
         ld      r4, HSTATE_MMCR + 8(r13)
         ld      r5, HSTATE_MMCR + 16(r13)
@@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  
         cmpwi   cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
         cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
         beq     11f
         cmpwi   cr2, r12, BOOK3S_INTERRUPT_HMI
         beq     cr2, 14f                        /* HMI check */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
  
         /* RFI into the highmem handler, or branch to interrupt handler */
         mfmsr   r6
@@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
         mtmsrd  r6, 1                   /* Clear RI in MSR */
         mtsrr0  r8
         mtsrr1  r7
-       beqa    0x500                   /* external interrupt (PPC970) */
         beq     cr1, 13f                /* machine check */
         RFI
  
@@ -393,11 +382,8 @@ kvmppc_hv_entry:
         slbia
         ptesync
  
-BEGIN_FTR_SECTION
-       b       30f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
         /*
-        * POWER7 host -> guest partition switch code.
+        * POWER7/POWER8 host -> guest partition switch code.
          * We don't have to lock against concurrent tlbies,
          * but we do have to coordinate across hardware threads.
          */
@@ -505,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         cmpwi   r3,512          /* 1 microsecond */
         li      r12,BOOK3S_INTERRUPT_HV_DECREMENTER
         blt     hdec_soon
-       b       31f
-
-       /*
-        * PPC970 host -> guest partition switch code.
-        * We have to lock against concurrent tlbies,
-        * using native_tlbie_lock to lock against host tlbies
-        * and kvm->arch.tlbie_lock to lock against guest tlbies.
-        * We also have to invalidate the TLB since its
-        * entries aren't tagged with the LPID.
-        */
-30:    ld      r5,HSTATE_KVM_VCORE(r13)
-       ld      r9,VCORE_KVM(r5)        /* pointer to struct kvm */
-
-       /* first take native_tlbie_lock */
-       .section ".toc","aw"
-toc_tlbie_lock:
-       .tc     native_tlbie_lock[TC],native_tlbie_lock
-       .previous
-       ld      r3,toc_tlbie_lock@toc(r2)
-#ifdef __BIG_ENDIAN__
-       lwz     r8,PACA_LOCK_TOKEN(r13)
-#else
-       lwz     r8,PACAPACAINDEX(r13)
-#endif
-24:    lwarx   r0,0,r3
-       cmpwi   r0,0
-       bne     24b
-       stwcx.  r8,0,r3
-       bne     24b
-       isync
-
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       ld      r7,VCORE_LPCR(r5)       /* use vcore->lpcr to store HID4 */
-       li      r0,0x18f
-       rotldi  r0,r0,HID4_LPID5_SH     /* all lpid bits in HID4 = 1 */
-       or      r0,r7,r0
-       ptesync
-       sync
-       mtspr   SPRN_HID4,r0            /* switch to reserved LPID */
-       isync
-       li      r0,0
-       stw     r0,0(r3)                /* drop native_tlbie_lock */
-
-       /* invalidate the whole TLB */
-       li      r0,256
-       mtctr   r0
-       li      r6,0
-25:    tlbiel  r6
-       addi    r6,r6,0x1000
-       bdnz    25b
-       ptesync
  
-       /* Take the guest's tlbie_lock */
-       addi    r3,r9,KVM_TLBIE_LOCK
-24:    lwarx   r0,0,r3
-       cmpwi   r0,0
-       bne     24b
-       stwcx.  r8,0,r3
-       bne     24b
-       isync
-       ld      r6,KVM_SDR1(r9)
-       mtspr   SPRN_SDR1,r6            /* switch to partition page table */
-
-       /* Set up HID4 with the guest's LPID etc. */
-       sync
-       mtspr   SPRN_HID4,r7
-       isync
-
-       /* drop the guest's tlbie_lock */
-       li      r0,0
-       stw     r0,0(r3)
-
-       /* Check if HDEC expires soon */
-       mfspr   r3,SPRN_HDEC
-       cmpwi   r3,10
-       li      r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-       blt     hdec_soon
-
-       /* Enable HDEC interrupts */
-       mfspr   r0,SPRN_HID0
-       li      r3,1
-       rldimi  r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-       sync
-       mtspr   SPRN_HID0,r0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-31:
         /* Do we have a guest vcpu to run? */
         cmpdi   r4, 0
         beq     kvmppc_primary_no_guest
@@ -625,7 +521,6 @@ kvmppc_got_guest:
         stb     r6, VCPU_VPA_DIRTY(r4)
  25:
  
-BEGIN_FTR_SECTION
         /* Save purr/spurr */
         mfspr   r5,SPRN_PURR
         mfspr   r6,SPRN_SPURR
@@ -635,7 +530,6 @@ BEGIN_FTR_SECTION
         ld      r8,VCPU_SPURR(r4)
         mtspr   SPRN_PURR,r7
         mtspr   SPRN_SPURR,r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
  
  BEGIN_FTR_SECTION
         /* Set partition DABR */
@@ -644,9 +538,7 @@ BEGIN_FTR_SECTION
         ld      r6,VCPU_DABR(r4)
         mtspr   SPRN_DABRX,r5
         mtspr   SPRN_DABR,r6
- BEGIN_FTR_SECTION_NESTED(89)
         isync
- END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
  
  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -777,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
         lwz     r7, VCPU_PMC + 12(r4)
         lwz     r8, VCPU_PMC + 16(r4)
         lwz     r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
-       lwz     r10, VCPU_PMC + 24(r4)
-       lwz     r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
         mtspr   SPRN_PMC1, r3
         mtspr   SPRN_PMC2, r5
         mtspr   SPRN_PMC3, r6
         mtspr   SPRN_PMC4, r7
         mtspr   SPRN_PMC5, r8
         mtspr   SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-       mtspr   SPRN_PMC7, r10
-       mtspr   SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
         ld      r3, VCPU_MMCR(r4)
         ld      r5, VCPU_MMCR + 8(r4)
         ld      r6, VCPU_MMCR + 16(r4)
@@ -837,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         ld      r30, VCPU_GPR(R30)(r4)
         ld      r31, VCPU_GPR(R31)(r4)
  
-BEGIN_FTR_SECTION
         /* Switch DSCR to guest value */
         ld      r5, VCPU_DSCR(r4)
         mtspr   SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
  
  BEGIN_FTR_SECTION
-       /* Skip next section on POWER7 or PPC970 */
+       /* Skip next section on POWER7 */
         b       8f
  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
         /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
@@ -920,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
         mtspr   SPRN_DAR, r5
         mtspr   SPRN_DSISR, r6
  
-BEGIN_FTR_SECTION
         /* Restore AMR and UAMOR, set AMOR to all 1s */
         ld      r5,VCPU_AMR(r4)
         ld      r6,VCPU_UAMOR(r4)
@@ -928,7 +809,6 @@ BEGIN_FTR_SECTION
         mtspr   SPRN_AMR,r5
         mtspr   SPRN_UAMOR,r6
         mtspr   SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
  
         /* Restore state of CTRL run bit; assume 1 on entry */
         lwz     r5,VCPU_CTRL(r4)
@@ -963,13 +843,11 @@ deliver_guest_interrupt:
         rldicl  r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
         cmpdi   cr1, r0, 0
         andi.   r8, r11, MSR_EE
-BEGIN_FTR_SECTION
         mfspr   r8, SPRN_LPCR
         /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
         rldimi  r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
         mtspr   SPRN_LPCR, r8
         isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
         beq     5f
         li      r0, BOOK3S_INTERRUPT_EXTERNAL
         bne     cr1, 12f
@@ -1124,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
  
         stw     r12,VCPU_TRAP(r9)
  
-       /* Save HEIR (HV emulation assist reg) in last_inst
+       /* Save HEIR (HV emulation assist reg) in emul_inst
            if this is an HEI (HV emulation interrupt, e40) */
         li      r3,KVM_INST_FETCH_FAILED
-BEGIN_FTR_SECTION
         cmpwi   r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
         bne     11f
         mfspr   r3,SPRN_HEIR
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-11:    stw     r3,VCPU_LAST_INST(r9)
+11:    stw     r3,VCPU_HEIR(r9)
  
         /* these are volatile across C function calls */
         mfctr   r3
@@ -1140,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
         std     r3, VCPU_CTR(r9)
         stw     r4, VCPU_XER(r9)
  
-BEGIN_FTR_SECTION
         /* If this is a page table miss then see if it's theirs or ours */
         cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
         beq     kvmppc_hdsi
         cmpwi   r12, BOOK3S_INTERRUPT_H_INST_STORAGE
         beq     kvmppc_hisi
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
  
         /* See if this is a leftover HDEC interrupt */
         cmpwi   r12,BOOK3S_INTERRUPT_HV_DECREMENTER
@@ -1159,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
         cmpwi   r12,BOOK3S_INTERRUPT_SYSCALL
         beq     hcall_try_real_mode
  
-       /* Only handle external interrupts here on arch 206 and later */
-BEGIN_FTR_SECTION
-       b       ext_interrupt_to_host
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
-
         /* External interrupt ? */
         cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
         bne+    ext_interrupt_to_host
@@ -1193,11 +1062,9 @@ guest_exit_cont:         /* r9 = vcpu, r12 = trap, r13 = paca */
         mfdsisr r7
         std     r6, VCPU_DAR(r9)
         stw     r7, VCPU_DSISR(r9)
-BEGIN_FTR_SECTION
         /* don't overwrite fault_dar/fault_dsisr if HDSI */
         cmpwi   r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
         beq     6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
         std     r6, VCPU_FAULT_DAR(r9)
         stw     r7, VCPU_FAULT_DSISR(r9)
  
@@ -1236,7 +1103,6 @@ mc_cont:
         /*
          * Save the guest PURR/SPURR
          */
-BEGIN_FTR_SECTION
         mfspr   r5,SPRN_PURR
         mfspr   r6,SPRN_SPURR
         ld      r7,VCPU_PURR(r9)
@@ -1256,7 +1122,6 @@ BEGIN_FTR_SECTION
         add     r4,r4,r6
         mtspr   SPRN_PURR,r3
         mtspr   SPRN_SPURR,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
  
         /* Save DEC */
         mfspr   r5,SPRN_DEC
@@ -1306,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
  8:
  
         /* Save and reset AMR and UAMOR before turning on the MMU */
-BEGIN_FTR_SECTION
         mfspr   r5,SPRN_AMR
         mfspr   r6,SPRN_UAMOR
         std     r5,VCPU_AMR(r9)
         std     r6,VCPU_UAMOR(r9)
         li      r6,0
         mtspr   SPRN_AMR,r6
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
  
         /* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
         mfspr   r8, SPRN_DSCR
         ld      r7, HSTATE_DSCR(r13)
         std     r8, VCPU_DSCR(r9)
         mtspr   SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
  
         /* Save non-volatile GPRs */
         std     r14, VCPU_GPR(R14)(r9)
@@ -1503,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         mfspr   r4, SPRN_MMCR0          /* save MMCR0 */
         mtspr   SPRN_MMCR0, r3          /* freeze all counters, disable ints */
         mfspr   r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-       /* On P7, clear MMCRA in order to disable SDAR updates */
+       /* Clear MMCRA in order to disable SDAR updates */
         li      r7, 0
         mtspr   SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
         isync
         beq     21f                     /* if no VPA, save PMU stuff anyway */
         lbz     r7, LPPACA_PMCINUSE(r8)
@@ -1532,20 +1391,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         mfspr   r6, SPRN_PMC4
         mfspr   r7, SPRN_PMC5
         mfspr   r8, SPRN_PMC6
-BEGIN_FTR_SECTION
-       mfspr   r10, SPRN_PMC7
-       mfspr   r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
         stw     r3, VCPU_PMC(r9)
         stw     r4, VCPU_PMC + 4(r9)
         stw     r5, VCPU_PMC + 8(r9)
         stw     r6, VCPU_PMC + 12(r9)
         stw     r7, VCPU_PMC + 16(r9)
         stw     r8, VCPU_PMC + 20(r9)
-BEGIN_FTR_SECTION
-       stw     r10, VCPU_PMC + 24(r9)
-       stw     r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
  BEGIN_FTR_SECTION
         mfspr   r5, SPRN_SIER
         mfspr   r6, SPRN_SPMC1
@@ -1566,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         ptesync
  
  hdec_soon:                     /* r12 = trap, r13 = paca */
-BEGIN_FTR_SECTION
-       b       32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
         /*
-        * POWER7 guest -> host partition switch code.
+        * POWER7/POWER8 guest -> host partition switch code.
          * We don't have to lock against tlbies but we do
          * have to coordinate the hardware threads.
          */
@@ -1698,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  16:    ld      r8,KVM_HOST_LPCR(r4)
         mtspr   SPRN_LPCR,r8
         isync
-       b       33f
-
-       /*
-        * PPC970 guest -> host partition switch code.
-        * We have to lock against concurrent tlbies, and
-        * we have to flush the whole TLB.
-        */
-32:    ld      r5,HSTATE_KVM_VCORE(r13)
-       ld      r4,VCORE_KVM(r5)        /* pointer to struct kvm */
-
-       /* Take the guest's tlbie_lock */
-#ifdef __BIG_ENDIAN__
-       lwz     r8,PACA_LOCK_TOKEN(r13)
-#else
-       lwz     r8,PACAPACAINDEX(r13)
-#endif
-       addi    r3,r4,KVM_TLBIE_LOCK
-24:    lwarx   r0,0,r3
-       cmpwi   r0,0
-       bne     24b
-       stwcx.  r8,0,r3
-       bne     24b
-       isync
-
-       ld      r7,KVM_HOST_LPCR(r4)    /* use kvm->arch.host_lpcr for HID4 */
-       li      r0,0x18f
-       rotldi  r0,r0,HID4_LPID5_SH     /* all lpid bits in HID4 = 1 */
-       or      r0,r7,r0
-       ptesync
-       sync
-       mtspr   SPRN_HID4,r0            /* switch to reserved LPID */
-       isync
-       li      r0,0
-       stw     r0,0(r3)                /* drop guest tlbie_lock */
-
-       /* invalidate the whole TLB */
-       li      r0,256
-       mtctr   r0
-       li      r6,0
-25:    tlbiel  r6
-       addi    r6,r6,0x1000
-       bdnz    25b
-       ptesync
-
-       /* take native_tlbie_lock */
-       ld      r3,toc_tlbie_lock@toc(2)
-24:    lwarx   r0,0,r3
-       cmpwi   r0,0
-       bne     24b
-       stwcx.  r8,0,r3
-       bne     24b
-       isync
-
-       ld      r6,KVM_HOST_SDR1(r4)
-       mtspr   SPRN_SDR1,r6            /* switch to host page table */
-
-       /* Set up host HID4 value */
-       sync
-       mtspr   SPRN_HID4,r7
-       isync
-       li      r0,0
-       stw     r0,0(r3)                /* drop native_tlbie_lock */
-
-       lis     r8,0x7fff               /* MAX_INT@h */
-       mtspr   SPRN_HDEC,r8
-
-       /* Disable HDEC interrupts */
-       mfspr   r0,SPRN_HID0
-       li      r3,0
-       rldimi  r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-       sync
-       mtspr   SPRN_HID0,r0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
  
         /* load host SLB entries */
-33:    ld      r8,PACA_SLBSHADOWPTR(r13)
+       ld      r8,PACA_SLBSHADOWPTR(r13)
  
         .rept   SLB_NUM_BOLTED
         li      r3, SLBSHADOW_SAVEAREA
@@ -2047,7 +1817,7 @@ hcall_real_table:
         .long   0               /* 0xd8 */
         .long   0               /* 0xdc */
         .long   DOTSYM(kvmppc_h_cede) - hcall_real_table
-       .long   0               /* 0xe4 */
+       .long   DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
         .long   0               /* 0xe8 */
         .long   0               /* 0xec */
         .long   0               /* 0xf0 */
@@ -2126,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede)
         stw     r0,VCPU_TRAP(r3)
         li      r0,H_SUCCESS
         std     r0,VCPU_GPR(R3)(r3)
-BEGIN_FTR_SECTION
-       b       kvm_cede_exit   /* just send it up to host on 970 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
  
         /*
          * Set our bit in the bitmask of napping threads unless all the
@@ -2455,7 +2222,6 @@ BEGIN_FTR_SECTION
  END_FTR_SECTION_IFSET(CPU_FTR_VSX)
  #endif
         mtmsrd  r8
-       isync
         addi    r3,r3,VCPU_FPRS
         bl      store_fp_state
  #ifdef CONFIG_ALTIVEC
@@ -2491,7 +2257,6 @@ BEGIN_FTR_SECTION
  END_FTR_SECTION_IFSET(CPU_FTR_VSX)
  #endif
         mtmsrd  r8
-       isync
         addi    r3,r4,VCPU_FPRS
         bl      load_fp_state
  #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c

index bfb8035314e3c312f90daa56bc391ff90d0bd816..bd6ab1672ae64a9261282c800a6160b20356a950 100644 (file)
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
         return kvmppc_get_field(inst, msb + 32, lsb + 32);
  }
  
-/*
- * Replaces inst bits with ordering according to spec.
- */
-static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
-{
-       return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
-}
-
  bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
  {
         if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c

index cf2eb16846d1179949df7619c224f802ca63b6d6..f57383941d0368e64ab99cc200e70e1ee4e92f53 100644 (file)
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
         return r;
  }
  
-static inline int get_fpr_index(int i)
-{
-       return i * TS_FPRWIDTH;
-}
-
  /* Give up external provider (FPU, Altivec, VSX) */
  void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
  {
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c

index eaeb78047fb87d4694661fc2083ef3d1f161eac5..807351f76f84e899d560d2766e5163583d2ca910 100644 (file)
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
          * there might be a previously-rejected interrupt needing
          * to be resent.
          *
+        * ICP state: Check_IPI
+        *
          * If the CPPR is less favored, then we might be replacing
-        * an interrupt, and thus need to possibly reject it as in
+        * an interrupt, and thus need to possibly reject it.
          *
-        * ICP state: Check_IPI
+        * ICP State: IPI
+        *
+        * Besides rejecting any pending interrupts, we also
+        * update XISR and pending_pri to mark IPI as pending.
+        *
+        * PAPR does not describe this state, but if the MFRR is being
+        * made less favored than its earlier value, there might be
+        * a previously-rejected interrupt needing to be resent.
+        * Ideally, we would want to resend only if
+        *      prio(pending_interrupt) < mfrr &&
+        *      prio(pending_interrupt) < cppr
+        * where pending interrupt is the one that was rejected. But
+        * we don't have that state, so we simply trigger a resend
+        * whenever the MFRR is made less favored.
          */
         do {
                 old_state = new_state = ACCESS_ONCE(icp->state);
@@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
                 resend = false;
                 if (mfrr < new_state.cppr) {
                         /* Reject a pending interrupt if not an IPI */
-                       if (mfrr <= new_state.pending_pri)
+                       if (mfrr <= new_state.pending_pri) {
                                 reject = new_state.xisr;
-                       new_state.pending_pri = mfrr;
-                       new_state.xisr = XICS_IPI;
+                               new_state.pending_pri = mfrr;
+                               new_state.xisr = XICS_IPI;
+                       }
                 }
  
-               if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+               if (mfrr > old_state.mfrr) {
                         resend = new_state.need_resend;
                         new_state.need_resend = 0;
                 }
@@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
         if (icp->rm_action & XICS_RM_KICK_VCPU)
                 kvmppc_fast_vcpu_kick(icp->rm_kick_target);
         if (icp->rm_action & XICS_RM_CHECK_RESEND)
-               icp_check_resend(xics, icp);
+               icp_check_resend(xics, icp->rm_resend_icp);
         if (icp->rm_action & XICS_RM_REJECT)
                 icp_deliver_irq(xics, icp, icp->rm_reject);
         if (icp->rm_action & XICS_RM_NOTIFY_EOI)
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h

index e8aaa7a3f209cc2ac6fdb5aa65f9bf4dd1c6d98f..73f0f2723c074ec2aa292bc4b0f20958666a70c8 100644 (file)
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -74,6 +74,7 @@ struct kvmppc_icp {
  #define XICS_RM_NOTIFY_EOI     0x8
         u32 rm_action;
         struct kvm_vcpu *rm_kick_target;
+       struct kvmppc_icp *rm_resend_icp;
         u32  rm_reject;
         u32  rm_eoied_irq;
  
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c

index 16095841afe14e6b8b5c13338267f6541f0be2bd..b29ce752c7d69027b1b07d42b0d9dc6d0ce2cf82 100644 (file)
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -78,7 +78,7 @@ static inline int local_sid_setup_one(struct id *entry)
  
         sid = __this_cpu_inc_return(pcpu_last_used_sid);
         if (sid < NUM_TIDS) {
-               __this_cpu_write(pcpu_sids)entry[sid], entry);
+               __this_cpu_write(pcpu_sids.entry[sid], entry);
                 entry->val = sid;
                 entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]);
                 ret = sid;
@@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
         kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
  }
  
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
  static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
  {
         kvmppc_booke_vcpu_load(vcpu, cpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c

index c1f8f53cd31224743be63e5969df08e4d67d07b6..c45eaab752b0d074f3b60689af861ccb75110269 100644 (file)
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                         r = 0;
                 break;
         case KVM_CAP_PPC_RMA:
-               r = hv_enabled;
-               /* PPC970 requires an RMA */
-               if (r && cpu_has_feature(CPU_FTR_ARCH_201))
-                       r = 2;
+               r = 0;
                 break;
  #endif
         case KVM_CAP_SYNC_MMU:
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-               if (hv_enabled)
-                       r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
-               else
-                       r = 0;
+               r = hv_enabled;
  #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
                 r = 1;
  #else
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h

new file mode 100644 (file)

index 0000000..f647ce0
--- /dev/null
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+       {0x100, "SYSTEM_RESET"}, \
+       {0x200, "MACHINE_CHECK"}, \
+       {0x300, "DATA_STORAGE"}, \
+       {0x380, "DATA_SEGMENT"}, \
+       {0x400, "INST_STORAGE"}, \
+       {0x480, "INST_SEGMENT"}, \
+       {0x500, "EXTERNAL"}, \
+       {0x501, "EXTERNAL_LEVEL"}, \
+       {0x502, "EXTERNAL_HV"}, \
+       {0x600, "ALIGNMENT"}, \
+       {0x700, "PROGRAM"}, \
+       {0x800, "FP_UNAVAIL"}, \
+       {0x900, "DECREMENTER"}, \
+       {0x980, "HV_DECREMENTER"}, \
+       {0xc00, "SYSCALL"}, \
+       {0xd00, "TRACE"}, \
+       {0xe00, "H_DATA_STORAGE"}, \
+       {0xe20, "H_INST_STORAGE"}, \
+       {0xe40, "H_EMUL_ASSIST"}, \
+       {0xf00, "PERFMON"}, \
+       {0xf20, "ALTIVEC"}, \
+       {0xf40, "VSX"}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h

index f7537cf26ce70ac5fd17653d47bbbda5fb1e57b8..7ec534d1db9f4ecf1f71a0a3cc4639657ae5a7c0 100644 (file)
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release,
                 __entry->pfn, __entry->flags)
  );
  
+#ifdef CONFIG_SPE_POSSIBLE
+#define kvm_trace_symbol_irqprio_spe \
+       {BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
+       {BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
+       {BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
+#else
+#define kvm_trace_symbol_irqprio_spe
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define kvm_trace_symbol_irqprio_e500mc \
+       {BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
+       {BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
+#else
+#define kvm_trace_symbol_irqprio_e500mc
+#endif
+
+#define kvm_trace_symbol_irqprio \
+       kvm_trace_symbol_irqprio_spe \
+       kvm_trace_symbol_irqprio_e500mc \
+       {BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
+       {BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
+       {BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
+       {BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
+       {BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
+       {BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
+       {BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
+       {BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
+       {BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
+       {BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
+       {BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
+       {BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
+       {BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
+       {BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
+       {BOOKE_IRQPRIO_FIT, "FIT"}, \
+       {BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
+       {BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
+       {BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
+       {BOOKE_IRQPRIO_DBELL, "DBELL"}, \
+       {BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
+
  TRACE_EVENT(kvm_booke_queue_irqprio,
         TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
         TP_ARGS(vcpu, priority),
@@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
                 __entry->pending        = vcpu->arch.pending_exceptions;
         ),
  
-       TP_printk("vcpu=%x prio=%x pending=%lx",
-               __entry->cpu_nr, __entry->priority, __entry->pending)
+       TP_printk("vcpu=%x prio=%s pending=%lx",
+               __entry->cpu_nr,
+               __print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
+               __entry->pending)
  );
  
  #endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h

new file mode 100644 (file)

index 0000000..33d9daf
--- /dev/null
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -0,0 +1,477 @@
+#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_HV_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+#include <asm/hvcall.h>
+#include <asm/kvm_asm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_hv
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
+#define kvm_trace_symbol_hcall \
+       {H_REMOVE,                      "H_REMOVE"}, \
+       {H_ENTER,                       "H_ENTER"}, \
+       {H_READ,                        "H_READ"}, \
+       {H_CLEAR_MOD,                   "H_CLEAR_MOD"}, \
+       {H_CLEAR_REF,                   "H_CLEAR_REF"}, \
+       {H_PROTECT,                     "H_PROTECT"}, \
+       {H_GET_TCE,                     "H_GET_TCE"}, \
+       {H_PUT_TCE,                     "H_PUT_TCE"}, \
+       {H_SET_SPRG0,                   "H_SET_SPRG0"}, \
+       {H_SET_DABR,                    "H_SET_DABR"}, \
+       {H_PAGE_INIT,                   "H_PAGE_INIT"}, \
+       {H_SET_ASR,                     "H_SET_ASR"}, \
+       {H_ASR_ON,                      "H_ASR_ON"}, \
+       {H_ASR_OFF,                     "H_ASR_OFF"}, \
+       {H_LOGICAL_CI_LOAD,             "H_LOGICAL_CI_LOAD"}, \
+       {H_LOGICAL_CI_STORE,            "H_LOGICAL_CI_STORE"}, \
+       {H_LOGICAL_CACHE_LOAD,          "H_LOGICAL_CACHE_LOAD"}, \
+       {H_LOGICAL_CACHE_STORE,         "H_LOGICAL_CACHE_STORE"}, \
+       {H_LOGICAL_ICBI,                "H_LOGICAL_ICBI"}, \
+       {H_LOGICAL_DCBF,                "H_LOGICAL_DCBF"}, \
+       {H_GET_TERM_CHAR,               "H_GET_TERM_CHAR"}, \
+       {H_PUT_TERM_CHAR,               "H_PUT_TERM_CHAR"}, \
+       {H_REAL_TO_LOGICAL,             "H_REAL_TO_LOGICAL"}, \
+       {H_HYPERVISOR_DATA,             "H_HYPERVISOR_DATA"}, \
+       {H_EOI,                         "H_EOI"}, \
+       {H_CPPR,                        "H_CPPR"}, \
+       {H_IPI,                         "H_IPI"}, \
+       {H_IPOLL,                       "H_IPOLL"}, \
+       {H_XIRR,                        "H_XIRR"}, \
+       {H_PERFMON,                     "H_PERFMON"}, \
+       {H_MIGRATE_DMA,                 "H_MIGRATE_DMA"}, \
+       {H_REGISTER_VPA,                "H_REGISTER_VPA"}, \
+       {H_CEDE,                        "H_CEDE"}, \
+       {H_CONFER,                      "H_CONFER"}, \
+       {H_PROD,                        "H_PROD"}, \
+       {H_GET_PPP,                     "H_GET_PPP"}, \
+       {H_SET_PPP,                     "H_SET_PPP"}, \
+       {H_PURR,                        "H_PURR"}, \
+       {H_PIC,                         "H_PIC"}, \
+       {H_REG_CRQ,                     "H_REG_CRQ"}, \
+       {H_FREE_CRQ,                    "H_FREE_CRQ"}, \
+       {H_VIO_SIGNAL,                  "H_VIO_SIGNAL"}, \
+       {H_SEND_CRQ,                    "H_SEND_CRQ"}, \
+       {H_COPY_RDMA,                   "H_COPY_RDMA"}, \
+       {H_REGISTER_LOGICAL_LAN,        "H_REGISTER_LOGICAL_LAN"}, \
+       {H_FREE_LOGICAL_LAN,            "H_FREE_LOGICAL_LAN"}, \
+       {H_ADD_LOGICAL_LAN_BUFFER,      "H_ADD_LOGICAL_LAN_BUFFER"}, \
+       {H_SEND_LOGICAL_LAN,            "H_SEND_LOGICAL_LAN"}, \
+       {H_BULK_REMOVE,                 "H_BULK_REMOVE"}, \
+       {H_MULTICAST_CTRL,              "H_MULTICAST_CTRL"}, \
+       {H_SET_XDABR,                   "H_SET_XDABR"}, \
+       {H_STUFF_TCE,                   "H_STUFF_TCE"}, \
+       {H_PUT_TCE_INDIRECT,            "H_PUT_TCE_INDIRECT"}, \
+       {H_CHANGE_LOGICAL_LAN_MAC,      "H_CHANGE_LOGICAL_LAN_MAC"}, \
+       {H_VTERM_PARTNER_INFO,          "H_VTERM_PARTNER_INFO"}, \
+       {H_REGISTER_VTERM,              "H_REGISTER_VTERM"}, \
+       {H_FREE_VTERM,                  "H_FREE_VTERM"}, \
+       {H_RESET_EVENTS,                "H_RESET_EVENTS"}, \
+       {H_ALLOC_RESOURCE,              "H_ALLOC_RESOURCE"}, \
+       {H_FREE_RESOURCE,               "H_FREE_RESOURCE"}, \
+       {H_MODIFY_QP,                   "H_MODIFY_QP"}, \
+       {H_QUERY_QP,                    "H_QUERY_QP"}, \
+       {H_REREGISTER_PMR,              "H_REREGISTER_PMR"}, \
+       {H_REGISTER_SMR,                "H_REGISTER_SMR"}, \
+       {H_QUERY_MR,                    "H_QUERY_MR"}, \
+       {H_QUERY_MW,                    "H_QUERY_MW"}, \
+       {H_QUERY_HCA,                   "H_QUERY_HCA"}, \
+       {H_QUERY_PORT,                  "H_QUERY_PORT"}, \
+       {H_MODIFY_PORT,                 "H_MODIFY_PORT"}, \
+       {H_DEFINE_AQP1,                 "H_DEFINE_AQP1"}, \
+       {H_GET_TRACE_BUFFER,            "H_GET_TRACE_BUFFER"}, \
+       {H_DEFINE_AQP0,                 "H_DEFINE_AQP0"}, \
+       {H_RESIZE_MR,                   "H_RESIZE_MR"}, \
+       {H_ATTACH_MCQP,                 "H_ATTACH_MCQP"}, \
+       {H_DETACH_MCQP,                 "H_DETACH_MCQP"}, \
+       {H_CREATE_RPT,                  "H_CREATE_RPT"}, \
+       {H_REMOVE_RPT,                  "H_REMOVE_RPT"}, \
+       {H_REGISTER_RPAGES,             "H_REGISTER_RPAGES"}, \
+       {H_DISABLE_AND_GETC,            "H_DISABLE_AND_GETC"}, \
+       {H_ERROR_DATA,                  "H_ERROR_DATA"}, \
+       {H_GET_HCA_INFO,                "H_GET_HCA_INFO"}, \
+       {H_GET_PERF_COUNT,              "H_GET_PERF_COUNT"}, \
+       {H_MANAGE_TRACE,                "H_MANAGE_TRACE"}, \
+       {H_FREE_LOGICAL_LAN_BUFFER,     "H_FREE_LOGICAL_LAN_BUFFER"}, \
+       {H_QUERY_INT_STATE,             "H_QUERY_INT_STATE"}, \
+       {H_POLL_PENDING,                "H_POLL_PENDING"}, \
+       {H_ILLAN_ATTRIBUTES,            "H_ILLAN_ATTRIBUTES"}, \
+       {H_MODIFY_HEA_QP,               "H_MODIFY_HEA_QP"}, \
+       {H_QUERY_HEA_QP,                "H_QUERY_HEA_QP"}, \
+       {H_QUERY_HEA,                   "H_QUERY_HEA"}, \
+       {H_QUERY_HEA_PORT,              "H_QUERY_HEA_PORT"}, \
+       {H_MODIFY_HEA_PORT,             "H_MODIFY_HEA_PORT"}, \
+       {H_REG_BCMC,                    "H_REG_BCMC"}, \
+       {H_DEREG_BCMC,                  "H_DEREG_BCMC"}, \
+       {H_REGISTER_HEA_RPAGES,         "H_REGISTER_HEA_RPAGES"}, \
+       {H_DISABLE_AND_GET_HEA,         "H_DISABLE_AND_GET_HEA"}, \
+       {H_GET_HEA_INFO,                "H_GET_HEA_INFO"}, \
+       {H_ALLOC_HEA_RESOURCE,          "H_ALLOC_HEA_RESOURCE"}, \
+       {H_ADD_CONN,                    "H_ADD_CONN"}, \
+       {H_DEL_CONN,                    "H_DEL_CONN"}, \
+       {H_JOIN,                        "H_JOIN"}, \
+       {H_VASI_STATE,                  "H_VASI_STATE"}, \
+       {H_ENABLE_CRQ,                  "H_ENABLE_CRQ"}, \
+       {H_GET_EM_PARMS,                "H_GET_EM_PARMS"}, \
+       {H_SET_MPP,                     "H_SET_MPP"}, \
+       {H_GET_MPP,                     "H_GET_MPP"}, \
+       {H_HOME_NODE_ASSOCIATIVITY,     "H_HOME_NODE_ASSOCIATIVITY"}, \
+       {H_BEST_ENERGY,                 "H_BEST_ENERGY"}, \
+       {H_XIRR_X,                      "H_XIRR_X"}, \
+       {H_RANDOM,                      "H_RANDOM"}, \
+       {H_COP,                         "H_COP"}, \
+       {H_GET_MPP_X,                   "H_GET_MPP_X"}, \
+       {H_SET_MODE,                    "H_SET_MODE"}, \
+       {H_RTAS,                        "H_RTAS"}
+
+#define kvm_trace_symbol_kvmret \
+       {RESUME_GUEST,                  "RESUME_GUEST"}, \
+       {RESUME_GUEST_NV,               "RESUME_GUEST_NV"}, \
+       {RESUME_HOST,                   "RESUME_HOST"}, \
+       {RESUME_HOST_NV,                "RESUME_HOST_NV"}
+
+#define kvm_trace_symbol_hcall_rc \
+       {H_SUCCESS,                     "H_SUCCESS"}, \
+       {H_BUSY,                        "H_BUSY"}, \
+       {H_CLOSED,                      "H_CLOSED"}, \
+       {H_NOT_AVAILABLE,               "H_NOT_AVAILABLE"}, \
+       {H_CONSTRAINED,                 "H_CONSTRAINED"}, \
+       {H_PARTIAL,                     "H_PARTIAL"}, \
+       {H_IN_PROGRESS,                 "H_IN_PROGRESS"}, \
+       {H_PAGE_REGISTERED,             "H_PAGE_REGISTERED"}, \
+       {H_PARTIAL_STORE,               "H_PARTIAL_STORE"}, \
+       {H_PENDING,                     "H_PENDING"}, \
+       {H_CONTINUE,                    "H_CONTINUE"}, \
+       {H_LONG_BUSY_START_RANGE,       "H_LONG_BUSY_START_RANGE"}, \
+       {H_LONG_BUSY_ORDER_1_MSEC,      "H_LONG_BUSY_ORDER_1_MSEC"}, \
+       {H_LONG_BUSY_ORDER_10_MSEC,     "H_LONG_BUSY_ORDER_10_MSEC"}, \
+       {H_LONG_BUSY_ORDER_100_MSEC,    "H_LONG_BUSY_ORDER_100_MSEC"}, \
+       {H_LONG_BUSY_ORDER_1_SEC,       "H_LONG_BUSY_ORDER_1_SEC"}, \
+       {H_LONG_BUSY_ORDER_10_SEC,      "H_LONG_BUSY_ORDER_10_SEC"}, \
+       {H_LONG_BUSY_ORDER_100_SEC,     "H_LONG_BUSY_ORDER_100_SEC"}, \
+       {H_LONG_BUSY_END_RANGE,         "H_LONG_BUSY_END_RANGE"}, \
+       {H_TOO_HARD,                    "H_TOO_HARD"}, \
+       {H_HARDWARE,                    "H_HARDWARE"}, \
+       {H_FUNCTION,                    "H_FUNCTION"}, \
+       {H_PRIVILEGE,                   "H_PRIVILEGE"}, \
+       {H_PARAMETER,                   "H_PARAMETER"}, \
+       {H_BAD_MODE,                    "H_BAD_MODE"}, \
+       {H_PTEG_FULL,                   "H_PTEG_FULL"}, \
+       {H_NOT_FOUND,                   "H_NOT_FOUND"}, \
+       {H_RESERVED_DABR,               "H_RESERVED_DABR"}, \
+       {H_NO_MEM,                      "H_NO_MEM"}, \
+       {H_AUTHORITY,                   "H_AUTHORITY"}, \
+       {H_PERMISSION,                  "H_PERMISSION"}, \
+       {H_DROPPED,                     "H_DROPPED"}, \
+       {H_SOURCE_PARM,                 "H_SOURCE_PARM"}, \
+       {H_DEST_PARM,                   "H_DEST_PARM"}, \
+       {H_REMOTE_PARM,                 "H_REMOTE_PARM"}, \
+       {H_RESOURCE,                    "H_RESOURCE"}, \
+       {H_ADAPTER_PARM,                "H_ADAPTER_PARM"}, \
+       {H_RH_PARM,                     "H_RH_PARM"}, \
+       {H_RCQ_PARM,                    "H_RCQ_PARM"}, \
+       {H_SCQ_PARM,                    "H_SCQ_PARM"}, \
+       {H_EQ_PARM,                     "H_EQ_PARM"}, \
+       {H_RT_PARM,                     "H_RT_PARM"}, \
+       {H_ST_PARM,                     "H_ST_PARM"}, \
+       {H_SIGT_PARM,                   "H_SIGT_PARM"}, \
+       {H_TOKEN_PARM,                  "H_TOKEN_PARM"}, \
+       {H_MLENGTH_PARM,                "H_MLENGTH_PARM"}, \
+       {H_MEM_PARM,                    "H_MEM_PARM"}, \
+       {H_MEM_ACCESS_PARM,             "H_MEM_ACCESS_PARM"}, \
+       {H_ATTR_PARM,                   "H_ATTR_PARM"}, \
+       {H_PORT_PARM,                   "H_PORT_PARM"}, \
+       {H_MCG_PARM,                    "H_MCG_PARM"}, \
+       {H_VL_PARM,                     "H_VL_PARM"}, \
+       {H_TSIZE_PARM,                  "H_TSIZE_PARM"}, \
+       {H_TRACE_PARM,                  "H_TRACE_PARM"}, \
+       {H_MASK_PARM,                   "H_MASK_PARM"}, \
+       {H_MCG_FULL,                    "H_MCG_FULL"}, \
+       {H_ALIAS_EXIST,                 "H_ALIAS_EXIST"}, \
+       {H_P_COUNTER,                   "H_P_COUNTER"}, \
+       {H_TABLE_FULL,                  "H_TABLE_FULL"}, \
+       {H_ALT_TABLE,                   "H_ALT_TABLE"}, \
+       {H_MR_CONDITION,                "H_MR_CONDITION"}, \
+       {H_NOT_ENOUGH_RESOURCES,        "H_NOT_ENOUGH_RESOURCES"}, \
+       {H_R_STATE,                     "H_R_STATE"}, \
+       {H_RESCINDED,                   "H_RESCINDED"}, \
+       {H_P2,                          "H_P2"}, \
+       {H_P3,                          "H_P3"}, \
+       {H_P4,                          "H_P4"}, \
+       {H_P5,                          "H_P5"}, \
+       {H_P6,                          "H_P6"}, \
+       {H_P7,                          "H_P7"}, \
+       {H_P8,                          "H_P8"}, \
+       {H_P9,                          "H_P9"}, \
+       {H_TOO_BIG,                     "H_TOO_BIG"}, \
+       {H_OVERLAP,                     "H_OVERLAP"}, \
+       {H_INTERRUPT,                   "H_INTERRUPT"}, \
+       {H_BAD_DATA,                    "H_BAD_DATA"}, \
+       {H_NOT_ACTIVE,                  "H_NOT_ACTIVE"}, \
+       {H_SG_LIST,                     "H_SG_LIST"}, \
+       {H_OP_MODE,                     "H_OP_MODE"}, \
+       {H_COP_HW,                      "H_COP_HW"}, \
+       {H_UNSUPPORTED_FLAG_START,      "H_UNSUPPORTED_FLAG_START"}, \
+       {H_UNSUPPORTED_FLAG_END,        "H_UNSUPPORTED_FLAG_END"}, \
+       {H_MULTI_THREADS_ACTIVE,        "H_MULTI_THREADS_ACTIVE"}, \
+       {H_OUTSTANDING_COP_OPS,         "H_OUTSTANDING_COP_OPS"}
+
+TRACE_EVENT(kvm_guest_enter,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  pc)
+               __field(unsigned long,  pending_exceptions)
+               __field(u8,             ceded)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id        = vcpu->vcpu_id;
+               __entry->pc             = kvmppc_get_pc(vcpu);
+               __entry->ceded          = vcpu->arch.ceded;
+               __entry->pending_exceptions  = vcpu->arch.pending_exceptions;
+       ),
+
+       TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
+                       __entry->vcpu_id,
+                       __entry->pc,
+                       __entry->pending_exceptions, __entry->ceded)
+);
+
+TRACE_EVENT(kvm_guest_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(int,            trap)
+               __field(unsigned long,  pc)
+               __field(unsigned long,  msr)
+               __field(u8,             ceded)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id = vcpu->vcpu_id;
+               __entry->trap    = vcpu->arch.trap;
+               __entry->ceded   = vcpu->arch.ceded;
+               __entry->pc      = kvmppc_get_pc(vcpu);
+               __entry->msr     = vcpu->arch.shregs.msr;
+       ),
+
+       TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
+               __entry->vcpu_id,
+               __print_symbolic(__entry->trap, kvm_trace_symbol_exit),
+               __entry->pc, __entry->msr, __entry->ceded
+       )
+);
+
+TRACE_EVENT(kvm_page_fault_enter,
+       TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
+                struct kvm_memory_slot *memslot, unsigned long ea,
+                unsigned long dsisr),
+
+       TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  hpte_v)
+               __field(unsigned long,  hpte_r)
+               __field(unsigned long,  gpte_r)
+               __field(unsigned long,  ea)
+               __field(u64,            base_gfn)
+               __field(u32,            slot_flags)
+               __field(u32,            dsisr)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->hpte_v   = hptep[0];
+               __entry->hpte_r   = hptep[1];
+               __entry->gpte_r   = hptep[2];
+               __entry->ea       = ea;
+               __entry->dsisr    = dsisr;
+               __entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
+               __entry->slot_flags = memslot ? memslot->flags : 0;
+       ),
+
+       TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
+                  __entry->vcpu_id,
+                  __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
+                  __entry->ea, __entry->dsisr,
+                  __entry->base_gfn, __entry->slot_flags)
+);
+
+TRACE_EVENT(kvm_page_fault_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
+
+       TP_ARGS(vcpu, hptep, ret),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  hpte_v)
+               __field(unsigned long,  hpte_r)
+               __field(long,           ret)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->hpte_v = hptep[0];
+               __entry->hpte_r = hptep[1];
+               __entry->ret = ret;
+       ),
+
+       TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
+                  __entry->vcpu_id,
+                  __entry->hpte_v, __entry->hpte_r, __entry->ret)
+);
+
+TRACE_EVENT(kvm_hcall_enter,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  req)
+               __field(unsigned long,  gpr4)
+               __field(unsigned long,  gpr5)
+               __field(unsigned long,  gpr6)
+               __field(unsigned long,  gpr7)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->req   = kvmppc_get_gpr(vcpu, 3);
+               __entry->gpr4  = kvmppc_get_gpr(vcpu, 4);
+               __entry->gpr5  = kvmppc_get_gpr(vcpu, 5);
+               __entry->gpr6  = kvmppc_get_gpr(vcpu, 6);
+               __entry->gpr7  = kvmppc_get_gpr(vcpu, 7);
+       ),
+
+       TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
+                  __entry->vcpu_id,
+                  __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
+                  __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
+);
+
+TRACE_EVENT(kvm_hcall_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu, int ret),
+
+       TP_ARGS(vcpu, ret),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  ret)
+               __field(unsigned long,  hcall_rc)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->ret      = ret;
+               __entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
+       ),
+
+       TP_printk("VCPU %d: ret=%s hcall_rc=%s",
+                  __entry->vcpu_id,
+                  __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
+                  __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
+                                       H_TOO_HARD : __entry->hcall_rc,
+                                       kvm_trace_symbol_hcall_rc))
+);
+
+TRACE_EVENT(kvmppc_run_core,
+       TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+       TP_ARGS(vc, where),
+
+       TP_STRUCT__entry(
+               __field(int,    n_runnable)
+               __field(int,    runner_vcpu)
+               __field(int,    where)
+               __field(pid_t,  tgid)
+       ),
+
+       TP_fast_assign(
+               __entry->runner_vcpu    = vc->runner->vcpu_id;
+               __entry->n_runnable     = vc->n_runnable;
+               __entry->where          = where;
+               __entry->tgid           = current->tgid;
+       ),
+
+       TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
+                   __entry->where ? "Exit" : "Enter",
+                   __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_blocked,
+       TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+       TP_ARGS(vc, where),
+
+       TP_STRUCT__entry(
+               __field(int,    n_runnable)
+               __field(int,    runner_vcpu)
+               __field(int,    where)
+               __field(pid_t,  tgid)
+       ),
+
+       TP_fast_assign(
+               __entry->runner_vcpu = vc->runner->vcpu_id;
+               __entry->n_runnable  = vc->n_runnable;
+               __entry->where       = where;
+               __entry->tgid        = current->tgid;
+       ),
+
+       TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
+                  __entry->where ? "Exit" : "Enter",
+                  __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_enter,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(pid_t,          tgid)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->tgid     = current->tgid;
+       ),
+
+       TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
+
+       TP_ARGS(vcpu, run),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(int,            exit)
+               __field(int,            ret)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->exit     = run->exit_reason;
+               __entry->ret      = vcpu->arch.ret;
+       ),
+
+       TP_printk("VCPU %d: exit=%d, ret=%d",
+                       __entry->vcpu_id, __entry->exit, __entry->ret)
+);
+
+#endif /* _TRACE_KVM_HV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h

index e1357cd8dc1f48a50eb69a83efd209ce586af002..810507cb688aaa3aa6d3b01c8e0f39e8df82a09f 100644 (file)
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -3,36 +3,13 @@
  #define _TRACE_KVM_PR_H
  
  #include <linux/tracepoint.h>
+#include "trace_book3s.h"
  
  #undef TRACE_SYSTEM
  #define TRACE_SYSTEM kvm_pr
  #define TRACE_INCLUDE_PATH .
  #define TRACE_INCLUDE_FILE trace_pr
  
-#define kvm_trace_symbol_exit \
-       {0x100, "SYSTEM_RESET"}, \
-       {0x200, "MACHINE_CHECK"}, \
-       {0x300, "DATA_STORAGE"}, \
-       {0x380, "DATA_SEGMENT"}, \
-       {0x400, "INST_STORAGE"}, \
-       {0x480, "INST_SEGMENT"}, \
-       {0x500, "EXTERNAL"}, \
-       {0x501, "EXTERNAL_LEVEL"}, \
-       {0x502, "EXTERNAL_HV"}, \
-       {0x600, "ALIGNMENT"}, \
-       {0x700, "PROGRAM"}, \
-       {0x800, "FP_UNAVAIL"}, \
-       {0x900, "DECREMENTER"}, \
-       {0x980, "HV_DECREMENTER"}, \
-       {0xc00, "SYSCALL"}, \
-       {0xd00, "TRACE"}, \
-       {0xe00, "H_DATA_STORAGE"}, \
-       {0xe20, "H_INST_STORAGE"}, \
-       {0xe40, "H_EMUL_ASSIST"}, \
-       {0xf00, "PERFMON"}, \
-       {0xf20, "ALTIVEC"}, \
-       {0xf40, "VSX"}
-
  TRACE_EVENT(kvm_book3s_reenter,
         TP_PROTO(int r, struct kvm_vcpu *vcpu),
         TP_ARGS(r, vcpu),
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h

index 2175f911a73a0606fda558f7417243fcedaf07e1..9cba74d5d8533da4f2e4275897608b455486bb24 100644 (file)
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -123,7 +123,7 @@ struct kvm_s390_sie_block {
  #define ICPT_PARTEXEC  0x38
  #define ICPT_IOINST    0x40
         __u8    icptcode;               /* 0x0050 */
-       __u8    reserved51;             /* 0x0051 */
+       __u8    icptstatus;             /* 0x0051 */
         __u16   ihcpu;                  /* 0x0052 */
         __u8    reserved54[2];          /* 0x0054 */
         __u16   ipa;                    /* 0x0056 */
@@ -226,10 +226,17 @@ struct kvm_vcpu_stat {
         u32 instruction_sigp_sense_running;
         u32 instruction_sigp_external_call;
         u32 instruction_sigp_emergency;
+       u32 instruction_sigp_cond_emergency;
+       u32 instruction_sigp_start;
         u32 instruction_sigp_stop;
+       u32 instruction_sigp_stop_store_status;
+       u32 instruction_sigp_store_status;
         u32 instruction_sigp_arch;
         u32 instruction_sigp_prefix;
         u32 instruction_sigp_restart;
+       u32 instruction_sigp_init_cpu_reset;
+       u32 instruction_sigp_cpu_reset;
+       u32 instruction_sigp_unknown;
         u32 diagnose_10;
         u32 diagnose_44;
         u32 diagnose_9c;
@@ -288,6 +295,79 @@ struct kvm_vcpu_stat {
  #define PGM_PER                                0x80
  #define PGM_CRYPTO_OPERATION           0x119
  
+/* irq types in order of priority */
+enum irq_types {
+       IRQ_PEND_MCHK_EX = 0,
+       IRQ_PEND_SVC,
+       IRQ_PEND_PROG,
+       IRQ_PEND_MCHK_REP,
+       IRQ_PEND_EXT_IRQ_KEY,
+       IRQ_PEND_EXT_MALFUNC,
+       IRQ_PEND_EXT_EMERGENCY,
+       IRQ_PEND_EXT_EXTERNAL,
+       IRQ_PEND_EXT_CLOCK_COMP,
+       IRQ_PEND_EXT_CPU_TIMER,
+       IRQ_PEND_EXT_TIMING,
+       IRQ_PEND_EXT_SERVICE,
+       IRQ_PEND_EXT_HOST,
+       IRQ_PEND_PFAULT_INIT,
+       IRQ_PEND_PFAULT_DONE,
+       IRQ_PEND_VIRTIO,
+       IRQ_PEND_IO_ISC_0,
+       IRQ_PEND_IO_ISC_1,
+       IRQ_PEND_IO_ISC_2,
+       IRQ_PEND_IO_ISC_3,
+       IRQ_PEND_IO_ISC_4,
+       IRQ_PEND_IO_ISC_5,
+       IRQ_PEND_IO_ISC_6,
+       IRQ_PEND_IO_ISC_7,
+       IRQ_PEND_SIGP_STOP,
+       IRQ_PEND_RESTART,
+       IRQ_PEND_SET_PREFIX,
+       IRQ_PEND_COUNT
+};
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
+                      (1UL << MCHK_EXTD_BIT) | \
+                      (1UL << MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY)    | \
+                          (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+                          (1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+                          (1UL << IRQ_PEND_EXT_MALFUNC)    | \
+                          (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+                          (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+                          (1UL << IRQ_PEND_EXT_TIMING)     | \
+                          (1UL << IRQ_PEND_EXT_HOST)       | \
+                          (1UL << IRQ_PEND_EXT_SERVICE)    | \
+                          (1UL << IRQ_PEND_VIRTIO)         | \
+                          (1UL << IRQ_PEND_PFAULT_INIT)    | \
+                          (1UL << IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
+                         (1UL << IRQ_PEND_IO_ISC_1) | \
+                         (1UL << IRQ_PEND_IO_ISC_2) | \
+                         (1UL << IRQ_PEND_IO_ISC_3) | \
+                         (1UL << IRQ_PEND_IO_ISC_4) | \
+                         (1UL << IRQ_PEND_IO_ISC_5) | \
+                         (1UL << IRQ_PEND_IO_ISC_6) | \
+                         (1UL << IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
+                           (1UL << IRQ_PEND_MCHK_EX))
+
  struct kvm_s390_interrupt_info {
         struct list_head list;
         u64     type;
@@ -306,14 +386,25 @@ struct kvm_s390_interrupt_info {
  #define ACTION_STORE_ON_STOP           (1<<0)
  #define ACTION_STOP_ON_STOP            (1<<1)
  
+struct kvm_s390_irq_payload {
+       struct kvm_s390_io_info io;
+       struct kvm_s390_ext_info ext;
+       struct kvm_s390_pgm_info pgm;
+       struct kvm_s390_emerg_info emerg;
+       struct kvm_s390_extcall_info extcall;
+       struct kvm_s390_prefix_info prefix;
+       struct kvm_s390_mchk_info mchk;
+};
+
  struct kvm_s390_local_interrupt {
         spinlock_t lock;
-       struct list_head list;
-       atomic_t active;
         struct kvm_s390_float_interrupt *float_int;
         wait_queue_head_t *wq;
         atomic_t *cpuflags;
         unsigned int action_bits;
+       DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+       struct kvm_s390_irq_payload irq;
+       unsigned long pending_irqs;
  };
  
  struct kvm_s390_float_interrupt {
@@ -434,6 +525,8 @@ struct kvm_arch{
         int user_cpu_state_ctrl;
         struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
         wait_queue_head_t ipte_wq;
+       int ipte_lock_count;
+       struct mutex ipte_mutex;
         spinlock_t start_stop_lock;
         struct kvm_s390_crypto crypto;
  };
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h

index e510b9460efa40bd165d2a254fa283d1d72a20b3..3009c2ba46d227f9c6f0ffb14832837476ee72fb 100644 (file)
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -24,6 +24,7 @@ void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
  
  int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
                           unsigned long key, bool nq);
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
  
  static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
  {
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h

index 49576115dbb76da7659bdcceffca7f1452e37fdf..fad4ae23ece05a6e7d922d75033c4fb347c6357d 100644 (file)
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -10,6 +10,7 @@
  #define SIGP_RESTART                 6
  #define SIGP_STOP_AND_STORE_STATUS    9
  #define SIGP_INITIAL_CPU_RESET      11
+#define SIGP_CPU_RESET              12
  #define SIGP_SET_PREFIX                     13
  #define SIGP_STORE_STATUS_AT_ADDRESS 14
  #define SIGP_SET_ARCHITECTURE       18
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

index 0f961a1c64b35e24d182375dbaac8c80d15e2e2d..8b9ccf02a2c5d9b132d06a2c9c7a1916f64084bb 100644 (file)
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -207,8 +207,6 @@ union raddress {
         unsigned long pfra : 52; /* Page-Frame Real Address */
  };
  
-static int ipte_lock_count;
-static DEFINE_MUTEX(ipte_mutex);
  
  int ipte_lock_held(struct kvm_vcpu *vcpu)
  {
@@ -216,47 +214,51 @@ int ipte_lock_held(struct kvm_vcpu *vcpu)
  
         if (vcpu->arch.sie_block->eca & 1)
                 return ic->kh != 0;
-       return ipte_lock_count != 0;
+       return vcpu->kvm->arch.ipte_lock_count != 0;
  }
  
  static void ipte_lock_simple(struct kvm_vcpu *vcpu)
  {
         union ipte_control old, new, *ic;
  
-       mutex_lock(&ipte_mutex);
-       ipte_lock_count++;
-       if (ipte_lock_count > 1)
+       mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+       vcpu->kvm->arch.ipte_lock_count++;
+       if (vcpu->kvm->arch.ipte_lock_count > 1)
                 goto out;
         ic = &vcpu->kvm->arch.sca->ipte_control;
         do {
-               old = ACCESS_ONCE(*ic);
+               old = *ic;
+               barrier();
                 while (old.k) {
                         cond_resched();
-                       old = ACCESS_ONCE(*ic);
+                       old = *ic;
+                       barrier();
                 }
                 new = old;
                 new.k = 1;
         } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
  out:
-       mutex_unlock(&ipte_mutex);
+       mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
  }
  
  static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
  {
         union ipte_control old, new, *ic;
  
-       mutex_lock(&ipte_mutex);
-       ipte_lock_count--;
-       if (ipte_lock_count)
+       mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+       vcpu->kvm->arch.ipte_lock_count--;
+       if (vcpu->kvm->arch.ipte_lock_count)
                 goto out;
         ic = &vcpu->kvm->arch.sca->ipte_control;
         do {
-               new = old = ACCESS_ONCE(*ic);
+               old = *ic;
+               barrier();
+               new = old;
                 new.k = 0;
         } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
         wake_up(&vcpu->kvm->arch.ipte_wq);
  out:
-       mutex_unlock(&ipte_mutex);
+       mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
  }
  
  static void ipte_lock_siif(struct kvm_vcpu *vcpu)
@@ -265,10 +267,12 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
  
         ic = &vcpu->kvm->arch.sca->ipte_control;
         do {
-               old = ACCESS_ONCE(*ic);
+               old = *ic;
+               barrier();
                 while (old.kg) {
                         cond_resched();
-                       old = ACCESS_ONCE(*ic);
+                       old = *ic;
+                       barrier();
                 }
                 new = old;
                 new.k = 1;
@@ -282,7 +286,9 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
  
         ic = &vcpu->kvm->arch.sca->ipte_control;
         do {
-               new = old = ACCESS_ONCE(*ic);
+               old = *ic;
+               barrier();
+               new = old;
                 new.kh--;
                 if (!new.kh)
                         new.k = 0;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c

index eaf46291d3619ab063f1162f54e82c3985db7d09..81c77ab8102ee9f754ed133f76819c8a649bda3b 100644 (file)
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,6 +38,19 @@ static const intercept_handler_t instruction_handlers[256] = {
         [0xeb] = kvm_s390_handle_eb,
  };
  
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+{
+       struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+       /* Use the length of the EXECUTE instruction if necessary */
+       if (sie_block->icptstatus & 1) {
+               ilc = (sie_block->icptstatus >> 4) & 0x6;
+               if (!ilc)
+                       ilc = 4;
+       }
+       sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+}
+
  static int handle_noop(struct kvm_vcpu *vcpu)
  {
         switch (vcpu->arch.sie_block->icptcode) {
@@ -244,7 +257,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
  static int handle_external_interrupt(struct kvm_vcpu *vcpu)
  {
         u16 eic = vcpu->arch.sie_block->eic;
-       struct kvm_s390_interrupt irq;
+       struct kvm_s390_irq irq;
         psw_t newpsw;
         int rc;
  
@@ -269,7 +282,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
                 if (kvm_s390_si_ext_call_pending(vcpu))
                         return 0;
                 irq.type = KVM_S390_INT_EXTERNAL_CALL;
-               irq.parm = vcpu->arch.sie_block->extcpuaddr;
+               irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
                 break;
         default:
                 return -EOPNOTSUPP;
@@ -288,7 +301,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
   */
  static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
  {
-       psw_t *psw = &vcpu->arch.sie_block->gpsw;
         unsigned long srcaddr, dstaddr;
         int reg1, reg2, rc;
  
@@ -310,7 +322,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
         if (rc != 0)
                 return rc;
  
-       psw->addr = __rewind_psw(*psw, 4);
+       kvm_s390_rewind_psw(vcpu, 4);
  
         return 0;
  }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c

index a39838457f01778d81dd2827348ab0e8732936a0..f00f31e66cd8312a00c4c8c5292ae5752fed5fdc 100644 (file)
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -16,6 +16,7 @@
  #include <linux/mmu_context.h>
  #include <linux/signal.h>
  #include <linux/slab.h>
+#include <linux/bitmap.h>
  #include <asm/asm-offsets.h>
  #include <asm/uaccess.h>
  #include "kvm-s390.h"
@@ -27,8 +28,8 @@
  #define IOINT_CSSID_MASK 0x03fc0000
  #define IOINT_AI_MASK 0x04000000
  #define PFAULT_INIT 0x0600
-
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
  
  static int is_ioint(u64 type)
  {
@@ -136,6 +137,31 @@ static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
         return 0;
  }
  
+static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.local_int.pending_irqs;
+}
+
+static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+{
+       unsigned long active_mask = pending_local_irqs(vcpu);
+
+       if (psw_extint_disabled(vcpu))
+               active_mask &= ~IRQ_PEND_EXT_MASK;
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+               __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
+               __clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+               __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
+               __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+       if (psw_mchk_disabled(vcpu))
+               active_mask &= ~IRQ_PEND_MCHK_MASK;
+
+       return active_mask;
+}
+
  static void __set_cpu_idle(struct kvm_vcpu *vcpu)
  {
         atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
@@ -170,26 +196,45 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
         atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
  }
  
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+       if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+               return;
+       if (psw_extint_disabled(vcpu))
+               __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+       else
+               vcpu->arch.sie_block->lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+       if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+               return;
+       if (psw_mchk_disabled(vcpu))
+               vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+       else
+               vcpu->arch.sie_block->lctl |= LCTL_CR14;
+}
+
+/* Set interception request for non-deliverable local interrupts */
+static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+{
+       set_intercept_indicators_ext(vcpu);
+       set_intercept_indicators_mchk(vcpu);
+}
+
  static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
                                       struct kvm_s390_interrupt_info *inti)
  {
         switch (inti->type) {
-       case KVM_S390_INT_EXTERNAL_CALL:
-       case KVM_S390_INT_EMERGENCY:
         case KVM_S390_INT_SERVICE:
-       case KVM_S390_INT_PFAULT_INIT:
         case KVM_S390_INT_PFAULT_DONE:
         case KVM_S390_INT_VIRTIO:
-       case KVM_S390_INT_CLOCK_COMP:
-       case KVM_S390_INT_CPU_TIMER:
                 if (psw_extint_disabled(vcpu))
                         __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
                 else
                         vcpu->arch.sie_block->lctl |= LCTL_CR0;
                 break;
-       case KVM_S390_SIGP_STOP:
-               __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
-               break;
         case KVM_S390_MCHK:
                 if (psw_mchk_disabled(vcpu))
                         vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -226,13 +271,236 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
         }
  }
  
-static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
-                             struct kvm_s390_pgm_info *pgm_info)
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+                                        0, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+                          (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+                                        0, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+                          (u16 __user *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_ext_info ext;
+       int rc;
+
+       spin_lock(&li->lock);
+       ext = li->irq.ext;
+       clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+       li->irq.ext.ext_params2 = 0;
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
+                  0, ext.ext_params2);
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                        KVM_S390_INT_PFAULT_INIT,
+                                        0, ext.ext_params2);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_mchk_info mchk;
+       int rc;
+
+       spin_lock(&li->lock);
+       mchk = li->irq.mchk;
+       /*
+        * If there was an exigent machine check pending, then any repressible
+        * machine checks that might have been pending are indicated along
+        * with it, so always clear both bits
+        */
+       clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+       clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+       memset(&li->irq.mchk, 0, sizeof(mchk));
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+                  mchk.mcic);
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+                                        mchk.cr14, mchk.mcic);
+
+       rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+       rc |= put_guest_lc(vcpu, mchk.mcic,
+                          (u64 __user *) __LC_MCCK_CODE);
+       rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+                          (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+                            &mchk.fixed_logout, sizeof(mchk.fixed_logout));
+       rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+       vcpu->stat.deliver_restart_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+       rc  = write_guest_lc(vcpu,
+                            offsetof(struct _lowcore, restart_old_psw),
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
+{
+       VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+       vcpu->stat.deliver_stop_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
+                                        0, 0);
+
+       __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+       clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
+       return 0;
+}
+
+static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_prefix_info prefix;
+
+       spin_lock(&li->lock);
+       prefix = li->irq.prefix;
+       li->irq.prefix.address = 0;
+       clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
+       vcpu->stat.deliver_prefix_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                        KVM_S390_SIGP_SET_PREFIX,
+                                        prefix.address, 0);
+
+       kvm_s390_set_prefix(vcpu, prefix.address);
+       return 0;
+}
+
+static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+       int cpu_addr;
+
+       spin_lock(&li->lock);
+       cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+       clear_bit(cpu_addr, li->sigp_emerg_pending);
+       if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
+               clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+       vcpu->stat.deliver_emergency_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+                                        cpu_addr, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
+                          (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_extcall_info extcall;
+       int rc;
+
+       spin_lock(&li->lock);
+       extcall = li->irq.extcall;
+       li->irq.extcall.code = 0;
+       clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+       vcpu->stat.deliver_external_call++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                        KVM_S390_INT_EXTERNAL_CALL,
+                                        extcall.code, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
+                          (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
+                           sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
  {
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_pgm_info pgm_info;
         int rc = 0;
         u16 ilc = get_ilc(vcpu);
  
-       switch (pgm_info->code & ~PGM_PER) {
+       spin_lock(&li->lock);
+       pgm_info = li->irq.pgm;
+       clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
+       memset(&li->irq.pgm, 0, sizeof(pgm_info));
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+                  pgm_info.code, ilc);
+       vcpu->stat.deliver_program_int++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+                                        pgm_info.code, 0);
+
+       switch (pgm_info.code & ~PGM_PER) {
         case PGM_AFX_TRANSLATION:
         case PGM_ASX_TRANSLATION:
         case PGM_EX_TRANSLATION:
@@ -243,7 +511,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
         case PGM_PRIMARY_AUTHORITY:
         case PGM_SECONDARY_AUTHORITY:
         case PGM_SPACE_SWITCH:
-               rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+               rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
                                   (u64 *)__LC_TRANS_EXC_CODE);
                 break;
         case PGM_ALEN_TRANSLATION:
@@ -252,7 +520,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
         case PGM_ASTE_SEQUENCE:
         case PGM_ASTE_VALIDITY:
         case PGM_EXTENDED_AUTHORITY:
-               rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+               rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
                                   (u8 *)__LC_EXC_ACCESS_ID);
                 break;
         case PGM_ASCE_TYPE:
@@ -261,247 +529,208 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
         case PGM_REGION_SECOND_TRANS:
         case PGM_REGION_THIRD_TRANS:
         case PGM_SEGMENT_TRANSLATION:
-               rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+               rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
                                   (u64 *)__LC_TRANS_EXC_CODE);
-               rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+               rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
                                    (u8 *)__LC_EXC_ACCESS_ID);
-               rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+               rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
                                    (u8 *)__LC_OP_ACCESS_ID);
                 break;
         case PGM_MONITOR:
-               rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
-                                 (u64 *)__LC_MON_CLASS_NR);
-               rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+               rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
+                                 (u16 *)__LC_MON_CLASS_NR);
+               rc |= put_guest_lc(vcpu, pgm_info.mon_code,
                                    (u64 *)__LC_MON_CODE);
                 break;
         case PGM_DATA:
-               rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+               rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
                                   (u32 *)__LC_DATA_EXC_CODE);
                 break;
         case PGM_PROTECTION:
-               rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+               rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
                                   (u64 *)__LC_TRANS_EXC_CODE);
-               rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+               rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
                                    (u8 *)__LC_EXC_ACCESS_ID);
                 break;
         }
  
-       if (pgm_info->code & PGM_PER) {
-               rc |= put_guest_lc(vcpu, pgm_info->per_code,
+       if (pgm_info.code & PGM_PER) {
+               rc |= put_guest_lc(vcpu, pgm_info.per_code,
                                    (u8 *) __LC_PER_CODE);
-               rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+               rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
                                    (u8 *)__LC_PER_ATMID);
-               rc |= put_guest_lc(vcpu, pgm_info->per_address,
+               rc |= put_guest_lc(vcpu, pgm_info.per_address,
                                    (u64 *) __LC_PER_ADDRESS);
-               rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+               rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
                                    (u8 *) __LC_PER_ACCESS_ID);
         }
  
         rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
-       rc |= put_guest_lc(vcpu, pgm_info->code,
+       rc |= put_guest_lc(vcpu, pgm_info.code,
                            (u16 *)__LC_PGM_INT_CODE);
         rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
                              &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
         rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
                             &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
  
-       return rc;
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
+                                         struct kvm_s390_interrupt_info *inti)
+{
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+                  inti->ext.ext_params);
+       vcpu->stat.deliver_service_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                        inti->ext.ext_params, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+                          (u32 *)__LC_EXT_PARAMS);
+       return rc ? -EFAULT : 0;
  }
  
-static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
-                                  struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
+                                          struct kvm_s390_interrupt_info *inti)
  {
-       const unsigned short table[] = { 2, 4, 4, 6 };
-       int rc = 0;
+       int rc;
+
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                        KVM_S390_INT_PFAULT_DONE, 0,
+                                        inti->ext.ext_params2);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                          (u64 *)__LC_EXT_PARAMS2);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
+                                        struct kvm_s390_interrupt_info *inti)
+{
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
+                  inti->ext.ext_params, inti->ext.ext_params2);
+       vcpu->stat.deliver_virtio_interrupt++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                        inti->ext.ext_params,
+                                        inti->ext.ext_params2);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+                          (u32 *)__LC_EXT_PARAMS);
+       rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                          (u64 *)__LC_EXT_PARAMS2);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
+                                    struct kvm_s390_interrupt_info *inti)
+{
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+       vcpu->stat.deliver_io_int++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                        ((__u32)inti->io.subchannel_id << 16) |
+                                               inti->io.subchannel_nr,
+                                        ((__u64)inti->io.io_int_parm << 32) |
+                                               inti->io.io_int_word);
+
+       rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+                          (u16 *)__LC_SUBCHANNEL_ID);
+       rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+                          (u16 *)__LC_SUBCHANNEL_NR);
+       rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+                          (u32 *)__LC_IO_INT_PARM);
+       rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+                          (u32 *)__LC_IO_INT_WORD);
+       rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
+                                          struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_mchk_info *mchk = &inti->mchk;
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+                  mchk->mcic);
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+                                        mchk->cr14, mchk->mcic);
+
+       rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+       rc |= put_guest_lc(vcpu, mchk->mcic,
+                       (u64 __user *) __LC_MCCK_CODE);
+       rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
+                       (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+                            &mchk->fixed_logout, sizeof(mchk->fixed_logout));
+       rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
+
+static const deliver_irq_t deliver_irq_funcs[] = {
+       [IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+       [IRQ_PEND_PROG]           = __deliver_prog,
+       [IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
+       [IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
+       [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
+       [IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
+       [IRQ_PEND_RESTART]        = __deliver_restart,
+       [IRQ_PEND_SIGP_STOP]      = __deliver_stop,
+       [IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
+       [IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+};
+
+static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
+                                          struct kvm_s390_interrupt_info *inti)
+{
+       int rc;
  
         switch (inti->type) {
-       case KVM_S390_INT_EMERGENCY:
-               VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
-               vcpu->stat.deliver_emergency_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->emerg.code, 0);
-               rc  = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, inti->emerg.code,
-                                  (u16 *)__LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               break;
-       case KVM_S390_INT_EXTERNAL_CALL:
-               VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
-               vcpu->stat.deliver_external_call++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->extcall.code, 0);
-               rc  = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, inti->extcall.code,
-                                  (u16 *)__LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw,
-                                   sizeof(psw_t));
-               break;
-       case KVM_S390_INT_CLOCK_COMP:
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->ext.ext_params, 0);
-               rc = deliver_ckc_interrupt(vcpu);
-               break;
-       case KVM_S390_INT_CPU_TIMER:
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->ext.ext_params, 0);
-               rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
-                                  (u16 *)__LC_EXT_INT_CODE);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-                                  (u32 *)__LC_EXT_PARAMS);
-               break;
         case KVM_S390_INT_SERVICE:
-               VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-                          inti->ext.ext_params);
-               vcpu->stat.deliver_service_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->ext.ext_params, 0);
-               rc  = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-                                  (u32 *)__LC_EXT_PARAMS);
-               break;
-       case KVM_S390_INT_PFAULT_INIT:
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-                                                inti->ext.ext_params2);
-               rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
-                                  (u16 *) __LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                                  (u64 *) __LC_EXT_PARAMS2);
+               rc = __deliver_service(vcpu, inti);
                 break;
         case KVM_S390_INT_PFAULT_DONE:
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-                                                inti->ext.ext_params2);
-               rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                                  (u64 *)__LC_EXT_PARAMS2);
+               rc = __deliver_pfault_done(vcpu, inti);
                 break;
         case KVM_S390_INT_VIRTIO:
-               VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-                          inti->ext.ext_params, inti->ext.ext_params2);
-               vcpu->stat.deliver_virtio_interrupt++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->ext.ext_params,
-                                                inti->ext.ext_params2);
-               rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-                                  (u32 *)__LC_EXT_PARAMS);
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                                  (u64 *)__LC_EXT_PARAMS2);
-               break;
-       case KVM_S390_SIGP_STOP:
-               VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
-               vcpu->stat.deliver_stop_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                0, 0);
-               __set_intercept_indicator(vcpu, inti);
-               break;
-
-       case KVM_S390_SIGP_SET_PREFIX:
-               VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
-                          inti->prefix.address);
-               vcpu->stat.deliver_prefix_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->prefix.address, 0);
-               kvm_s390_set_prefix(vcpu, inti->prefix.address);
-               break;
-
-       case KVM_S390_RESTART:
-               VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
-               vcpu->stat.deliver_restart_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                0, 0);
-               rc  = write_guest_lc(vcpu,
-                                    offsetof(struct _lowcore, restart_old_psw),
-                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
-                                   &vcpu->arch.sie_block->gpsw,
-                                   sizeof(psw_t));
+               rc = __deliver_virtio(vcpu, inti);
                 break;
-       case KVM_S390_PROGRAM_INT:
-               VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
-                          inti->pgm.code,
-                          table[vcpu->arch.sie_block->ipa >> 14]);
-               vcpu->stat.deliver_program_int++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->pgm.code, 0);
-               rc = __deliver_prog_irq(vcpu, &inti->pgm);
-               break;
-
         case KVM_S390_MCHK:
-               VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-                          inti->mchk.mcic);
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->mchk.cr14,
-                                                inti->mchk.mcic);
-               rc  = kvm_s390_vcpu_store_status(vcpu,
-                                                KVM_S390_STORE_STATUS_PREFIXED);
-               rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
-               rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+               rc = __deliver_mchk_floating(vcpu, inti);
                 break;
-
         case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-       {
-               __u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
-                       inti->io.subchannel_nr;
-               __u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
-                       inti->io.io_int_word;
-               VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-               vcpu->stat.deliver_io_int++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                param0, param1);
-               rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-                                  (u16 *)__LC_SUBCHANNEL_ID);
-               rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-                                  (u16 *)__LC_SUBCHANNEL_NR);
-               rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-                                  (u32 *)__LC_IO_INT_PARM);
-               rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-                                  (u32 *)__LC_IO_INT_WORD);
-               rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw,
-                                   sizeof(psw_t));
+               rc = __deliver_io(vcpu, inti);
                 break;
-       }
         default:
                 BUG();
         }
@@ -509,19 +738,6 @@ static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
         return rc;
  }
  
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
-{
-       int rc;
-
-       rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
-       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw,
-                           sizeof(psw_t));
-       return rc;
-}
-
  /* Check whether SIGP interpretation facility has an external call pending */
  int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
  {
@@ -538,20 +754,11 @@ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
  
  int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
  {
-       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
         struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
         struct kvm_s390_interrupt_info  *inti;
-       int rc = 0;
+       int rc;
  
-       if (atomic_read(&li->active)) {
-               spin_lock(&li->lock);
-               list_for_each_entry(inti, &li->list, list)
-                       if (__interrupt_is_deliverable(vcpu, inti)) {
-                               rc = 1;
-                               break;
-                       }
-               spin_unlock(&li->lock);
-       }
+       rc = !!deliverable_local_irqs(vcpu);
  
         if ((!rc) && atomic_read(&fi->active)) {
                 spin_lock(&fi->lock);
@@ -643,18 +850,15 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
  void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
  {
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_interrupt_info  *n, *inti = NULL;
  
         spin_lock(&li->lock);
-       list_for_each_entry_safe(inti, n, &li->list, list) {
-               list_del(&inti->list);
-               kfree(inti);
-       }
-       atomic_set(&li->active, 0);
+       li->pending_irqs = 0;
+       bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+       memset(&li->irq, 0, sizeof(li->irq));
         spin_unlock(&li->lock);
  
         /* clear pending external calls set by sigp interpretation facility */
-       atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+       atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
         atomic_clear_mask(SIGP_CTRL_C,
                           &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
  }
@@ -664,34 +868,35 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
         struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
         struct kvm_s390_interrupt_info  *n, *inti = NULL;
+       deliver_irq_t func;
         int deliver;
         int rc = 0;
+       unsigned long irq_type;
+       unsigned long deliverable_irqs;
  
         __reset_intercept_indicators(vcpu);
-       if (atomic_read(&li->active)) {
-               do {
-                       deliver = 0;
-                       spin_lock(&li->lock);
-                       list_for_each_entry_safe(inti, n, &li->list, list) {
-                               if (__interrupt_is_deliverable(vcpu, inti)) {
-                                       list_del(&inti->list);
-                                       deliver = 1;
-                                       break;
-                               }
-                               __set_intercept_indicator(vcpu, inti);
-                       }
-                       if (list_empty(&li->list))
-                               atomic_set(&li->active, 0);
-                       spin_unlock(&li->lock);
-                       if (deliver) {
-                               rc = __do_deliver_interrupt(vcpu, inti);
-                               kfree(inti);
-                       }
-               } while (!rc && deliver);
-       }
  
-       if (!rc && kvm_cpu_has_pending_timer(vcpu))
-               rc = deliver_ckc_interrupt(vcpu);
+       /* pending ckc conditions might have been invalidated */
+       clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+       if (kvm_cpu_has_pending_timer(vcpu))
+               set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+
+       do {
+               deliverable_irqs = deliverable_local_irqs(vcpu);
+               /* bits are in the order of interrupt priority */
+               irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+               if (irq_type == IRQ_PEND_COUNT)
+                       break;
+               func = deliver_irq_funcs[irq_type];
+               if (!func) {
+                       WARN_ON_ONCE(func == NULL);
+                       clear_bit(irq_type, &li->pending_irqs);
+                       continue;
+               }
+               rc = func(vcpu);
+       } while (!rc && irq_type != IRQ_PEND_COUNT);
+
+       set_intercept_indicators_local(vcpu);
  
         if (!rc && atomic_read(&fi->active)) {
                 do {
@@ -710,7 +915,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
                                 atomic_set(&fi->active, 0);
                         spin_unlock(&fi->lock);
                         if (deliver) {
-                               rc = __do_deliver_interrupt(vcpu, inti);
+                               rc = __deliver_floating_interrupt(vcpu, inti);
                                 kfree(inti);
                         }
                 } while (!rc && deliver);
@@ -719,23 +924,26 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
         return rc;
  }
  
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
  {
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_interrupt_info *inti;
  
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-       if (!inti)
-               return -ENOMEM;
+       li->irq.pgm = irq->u.pgm;
+       set_bit(IRQ_PEND_PROG, &li->pending_irqs);
+       return 0;
+}
  
-       inti->type = KVM_S390_PROGRAM_INT;
-       inti->pgm.code = code;
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_irq irq;
  
         VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
-       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
+                                  0, 1);
         spin_lock(&li->lock);
-       list_add(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
+       irq.u.pgm.code = code;
+       __inject_prog(vcpu, &irq);
         BUG_ON(waitqueue_active(li->wq));
         spin_unlock(&li->lock);
         return 0;
@@ -745,27 +953,166 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
                              struct kvm_s390_pgm_info *pgm_info)
  {
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_interrupt_info *inti;
-
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-       if (!inti)
-               return -ENOMEM;
+       struct kvm_s390_irq irq;
+       int rc;
  
         VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
                    pgm_info->code);
         trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
                                    pgm_info->code, 0, 1);
-
-       inti->type = KVM_S390_PROGRAM_INT;
-       memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
         spin_lock(&li->lock);
-       list_add(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
+       irq.u.pgm = *pgm_info;
+       rc = __inject_prog(vcpu, &irq);
         BUG_ON(waitqueue_active(li->wq));
         spin_unlock(&li->lock);
+       return rc;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
+                  irq->u.ext.ext_params, irq->u.ext.ext_params2);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
+                                  irq->u.ext.ext_params,
+                                  irq->u.ext.ext_params2, 2);
+
+       li->irq.ext = irq->u.ext;
+       set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
         return 0;
  }
  
+int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
+
+       VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+                  irq->u.extcall.code);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
+                                  irq->u.extcall.code, 0, 2);
+
+       *extcall = irq->u.extcall;
+       set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+
+       VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+                  prefix->address);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
+                                  prefix->address, 0, 2);
+
+       *prefix = irq->u.prefix;
+       set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+       return 0;
+}
+
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+
+       li->action_bits |= ACTION_STOP_ON_STOP;
+       set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+       return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
+                                struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+
+       set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+       return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+                                  struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_emerg_info *emerg = &li->irq.emerg;
+
+       VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+                  irq->u.emerg.code);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+                                  emerg->code, 0, 2);
+
+       set_bit(emerg->code, li->sigp_emerg_pending);
+       set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+
+       VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+                  mchk->mcic);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
+                                  mchk->mcic, 2);
+
+       /*
+        * Because repressible machine checks can be indicated along with
+        * exigent machine checks (PoP, Chapter 11, Interruption action)
+        * we need to combine cr14, mcic and external damage code.
+        * Failing storage address and the logout area should not be or'ed
+        * together, we just indicate the last occurrence of the corresponding
+        * machine check
+        */
+       mchk->cr14 |= irq->u.mchk.cr14;
+       mchk->mcic |= irq->u.mchk.mcic;
+       mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
+       mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
+       memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
+              sizeof(mchk->fixed_logout));
+       if (mchk->mcic & MCHK_EX_MASK)
+               set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+       else if (mchk->mcic & MCHK_REP_MASK)
+               set_bit(IRQ_PEND_MCHK_REP,  &li->pending_irqs);
+       return 0;
+}
+
+static int __inject_ckc(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+                                  0, 0, 2);
+
+       set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       return 0;
+}
+
+static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+                                  0, 0, 2);
+
+       set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       return 0;
+}
+
+
  struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                                                     u64 cr6, u64 schid)
  {
@@ -851,7 +1198,17 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
         dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
         li = &dst_vcpu->arch.local_int;
         spin_lock(&li->lock);
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       switch (inti->type) {
+       case KVM_S390_MCHK:
+               atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+               break;
+       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+               atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+               break;
+       default:
+               atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+               break;
+       }
         spin_unlock(&li->lock);
         kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
  unlock_fi:
@@ -920,92 +1277,85 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
         __inject_vm(kvm, inti);
  }
  
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-                        struct kvm_s390_interrupt *s390int)
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+                      struct kvm_s390_irq *irq)
  {
-       struct kvm_s390_local_interrupt *li;
-       struct kvm_s390_interrupt_info *inti;
+       irq->type = s390int->type;
+       switch (irq->type) {
+       case KVM_S390_PROGRAM_INT:
+               if (s390int->parm & 0xffff0000)
+                       return -EINVAL;
+               irq->u.pgm.code = s390int->parm;
+               break;
+       case KVM_S390_SIGP_SET_PREFIX:
+               irq->u.prefix.address = s390int->parm;
+               break;
+       case KVM_S390_INT_EXTERNAL_CALL:
+               if (irq->u.extcall.code & 0xffff0000)
+                       return -EINVAL;
+               irq->u.extcall.code = s390int->parm;
+               break;
+       case KVM_S390_INT_EMERGENCY:
+               if (irq->u.emerg.code & 0xffff0000)
+                       return -EINVAL;
+               irq->u.emerg.code = s390int->parm;
+               break;
+       case KVM_S390_MCHK:
+               irq->u.mchk.mcic = s390int->parm64;
+               break;
+       }
+       return 0;
+}
  
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-       if (!inti)
-               return -ENOMEM;
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
  
-       switch (s390int->type) {
+       spin_lock(&li->lock);
+       switch (irq->type) {
         case KVM_S390_PROGRAM_INT:
-               if (s390int->parm & 0xffff0000) {
-                       kfree(inti);
-                       return -EINVAL;
-               }
-               inti->type = s390int->type;
-               inti->pgm.code = s390int->parm;
                 VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
-                          s390int->parm);
+                          irq->u.pgm.code);
+               rc = __inject_prog(vcpu, irq);
                 break;
         case KVM_S390_SIGP_SET_PREFIX:
-               inti->prefix.address = s390int->parm;
-               inti->type = s390int->type;
-               VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
-                          s390int->parm);
+               rc = __inject_set_prefix(vcpu, irq);
                 break;
         case KVM_S390_SIGP_STOP:
+               rc = __inject_sigp_stop(vcpu, irq);
+               break;
         case KVM_S390_RESTART:
+               rc = __inject_sigp_restart(vcpu, irq);
+               break;
         case KVM_S390_INT_CLOCK_COMP:
+               rc = __inject_ckc(vcpu);
+               break;
         case KVM_S390_INT_CPU_TIMER:
-               VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
-               inti->type = s390int->type;
+               rc = __inject_cpu_timer(vcpu);
                 break;
         case KVM_S390_INT_EXTERNAL_CALL:
-               if (s390int->parm & 0xffff0000) {
-                       kfree(inti);
-                       return -EINVAL;
-               }
-               VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
-                          s390int->parm);
-               inti->type = s390int->type;
-               inti->extcall.code = s390int->parm;
+               rc = __inject_extcall(vcpu, irq);
                 break;
         case KVM_S390_INT_EMERGENCY:
-               if (s390int->parm & 0xffff0000) {
-                       kfree(inti);
-                       return -EINVAL;
-               }
-               VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
-               inti->type = s390int->type;
-               inti->emerg.code = s390int->parm;
+               rc = __inject_sigp_emergency(vcpu, irq);
                 break;
         case KVM_S390_MCHK:
-               VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
-                          s390int->parm64);
-               inti->type = s390int->type;
-               inti->mchk.mcic = s390int->parm64;
+               rc = __inject_mchk(vcpu, irq);
                 break;
         case KVM_S390_INT_PFAULT_INIT:
-               inti->type = s390int->type;
-               inti->ext.ext_params2 = s390int->parm64;
+               rc = __inject_pfault_init(vcpu, irq);
                 break;
         case KVM_S390_INT_VIRTIO:
         case KVM_S390_INT_SERVICE:
         case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
         default:
-               kfree(inti);
-               return -EINVAL;
+               rc = -EINVAL;
         }
-       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
-                                  s390int->parm64, 2);
-
-       li = &vcpu->arch.local_int;
-       spin_lock(&li->lock);
-       if (inti->type == KVM_S390_PROGRAM_INT)
-               list_add(&inti->list, &li->list);
-       else
-               list_add_tail(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
-       if (inti->type == KVM_S390_SIGP_STOP)
-               li->action_bits |= ACTION_STOP_ON_STOP;
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
         spin_unlock(&li->lock);
-       kvm_s390_vcpu_wakeup(vcpu);
-       return 0;
+       if (!rc)
+               kvm_s390_vcpu_wakeup(vcpu);
+       return rc;
  }
  
  void kvm_s390_clear_float_irqs(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index 6b049ee75a5694d74cce4f6402b92ae7879f8c4e..3e09801e310461ff00e3975080eb9bd3b75de52f 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -81,10 +81,17 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+       { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
+       { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+       { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
+       { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+       { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
+       { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
+       { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
         { "diagnose_10", VCPU_STAT(diagnose_10) },
         { "diagnose_44", VCPU_STAT(diagnose_44) },
         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
@@ -453,6 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
         spin_lock_init(&kvm->arch.float_int.lock);
         INIT_LIST_HEAD(&kvm->arch.float_int.list);
         init_waitqueue_head(&kvm->arch.ipte_wq);
+       mutex_init(&kvm->arch.ipte_mutex);
  
         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
         VM_EVENT(kvm, 3, "%s", "vm created");
@@ -711,7 +719,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
         }
  
         spin_lock_init(&vcpu->arch.local_int.lock);
-       INIT_LIST_HEAD(&vcpu->arch.local_int.list);
         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
         vcpu->arch.local_int.wq = &vcpu->wq;
         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
@@ -1114,13 +1121,15 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
                                       unsigned long token)
  {
         struct kvm_s390_interrupt inti;
-       inti.parm64 = token;
+       struct kvm_s390_irq irq;
  
         if (start_token) {
-               inti.type = KVM_S390_INT_PFAULT_INIT;
-               WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+               irq.u.ext.ext_params2 = token;
+               irq.type = KVM_S390_INT_PFAULT_INIT;
+               WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
         } else {
                 inti.type = KVM_S390_INT_PFAULT_DONE;
+               inti.parm64 = token;
                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
         }
  }
@@ -1614,11 +1623,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
         switch (ioctl) {
         case KVM_S390_INTERRUPT: {
                 struct kvm_s390_interrupt s390int;
+               struct kvm_s390_irq s390irq;
  
                 r = -EFAULT;
                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
                         break;
-               r = kvm_s390_inject_vcpu(vcpu, &s390int);
+               if (s390int_to_s390irq(&s390int, &s390irq))
+                       return -EINVAL;
+               r = kvm_s390_inject_vcpu(vcpu, &s390irq);
                 break;
         }
         case KVM_S390_STORE_STATUS:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h

index 244d02303182804540cfd2f800edff6527615677..a8f3d9b71c1159aeb35659b2a3dbbdb3cf531ee1 100644 (file)
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,8 +24,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
  /* declare vfacilities extern */
  extern unsigned long *vfacilities;
  
-int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
-
  /* Transactional Memory Execution related macros */
  #define IS_TE_ENABLED(vcpu)    ((vcpu->arch.sie_block->ecb & 0x10))
  #define TDB_FORMAT1            1
@@ -144,7 +142,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm);
  int __must_check kvm_s390_inject_vm(struct kvm *kvm,
                                     struct kvm_s390_interrupt *s390int);
  int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-                                     struct kvm_s390_interrupt *s390int);
+                                     struct kvm_s390_irq *irq);
  int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
  struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                                                     u64 cr6, u64 schid);
@@ -152,6 +150,10 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
                               struct kvm_s390_interrupt_info *inti);
  int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
  
+/* implemented in intercept.c */
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
  /* implemented in priv.c */
  int is_valid_psw(psw_t *psw);
  int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -222,6 +224,9 @@ static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
         return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
  }
  
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+                       struct kvm_s390_irq *s390irq);
+
  /* implemented in interrupt.c */
  int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
  int psw_extint_disabled(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c

index f47cb0c6d906d11655ed88154b188406092c6f13..1be578d64dfc785df2d298e2e9c16ace3c437615 100644 (file)
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -180,21 +180,18 @@ static int handle_skey(struct kvm_vcpu *vcpu)
         if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
  
-       vcpu->arch.sie_block->gpsw.addr =
-               __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+       kvm_s390_rewind_psw(vcpu, 4);
         VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
         return 0;
  }
  
  static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
  {
-       psw_t *psw = &vcpu->arch.sie_block->gpsw;
-
         vcpu->stat.instruction_ipte_interlock++;
-       if (psw_bits(*psw).p)
+       if (psw_bits(vcpu->arch.sie_block->gpsw).p)
                 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
         wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
-       psw->addr = __rewind_psw(*psw, 4);
+       kvm_s390_rewind_psw(vcpu, 4);
         VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
         return 0;
  }
@@ -650,10 +647,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
                 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  
         start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
-       if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-               if (kvm_s390_check_low_addr_protection(vcpu, start))
-                       return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
-       }
+       start = kvm_s390_logical_to_effective(vcpu, start);
  
         switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
         case 0x00000000:
@@ -669,6 +663,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
         default:
                 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
         }
+
+       if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+               if (kvm_s390_check_low_addr_protection(vcpu, start))
+                       return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+       }
+
         while (start < end) {
                 unsigned long useraddr, abs_addr;
  
@@ -725,8 +725,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
                 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  
         /* Rewind PSW to repeat the ESSA instruction */
-       vcpu->arch.sie_block->gpsw.addr =
-               __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+       kvm_s390_rewind_psw(vcpu, 4);
         vcpu->arch.sie_block->cbrlo &= PAGE_MASK;       /* reset nceo */
         cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
         down_read(&gmap->mm->mmap_sem);
@@ -769,8 +768,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
  {
         int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
         int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u32 val = 0;
-       int reg, rc;
+       int reg, rc, nr_regs;
+       u32 ctl_array[16];
         u64 ga;
  
         vcpu->stat.instruction_lctl++;
@@ -786,19 +785,20 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
         VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
         trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
  
+       nr_regs = ((reg3 - reg1) & 0xf) + 1;
+       rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
         reg = reg1;
+       nr_regs = 0;
         do {
-               rc = read_guest(vcpu, ga, &val, sizeof(val));
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
                 vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
-               vcpu->arch.sie_block->gcr[reg] |= val;
-               ga += 4;
+               vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
                 if (reg == reg3)
                         break;
                 reg = (reg + 1) % 16;
         } while (1);
-
+       kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
         return 0;
  }
  
@@ -806,9 +806,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
  {
         int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
         int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+       int reg, rc, nr_regs;
+       u32 ctl_array[16];
         u64 ga;
-       u32 val;
-       int reg, rc;
  
         vcpu->stat.instruction_stctl++;
  
@@ -824,26 +824,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
         trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
  
         reg = reg1;
+       nr_regs = 0;
         do {
-               val = vcpu->arch.sie_block->gcr[reg] &  0x00000000fffffffful;
-               rc = write_guest(vcpu, ga, &val, sizeof(val));
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
-               ga += 4;
+               ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
                 if (reg == reg3)
                         break;
                 reg = (reg + 1) % 16;
         } while (1);
-
-       return 0;
+       rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+       return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
  }
  
  static int handle_lctlg(struct kvm_vcpu *vcpu)
  {
         int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
         int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u64 ga, val;
-       int reg, rc;
+       int reg, rc, nr_regs;
+       u64 ctl_array[16];
+       u64 ga;
  
         vcpu->stat.instruction_lctlg++;
  
@@ -855,22 +853,22 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
         if (ga & 7)
                 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  
-       reg = reg1;
-
         VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
         trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
  
+       nr_regs = ((reg3 - reg1) & 0xf) + 1;
+       rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
+       reg = reg1;
+       nr_regs = 0;
         do {
-               rc = read_guest(vcpu, ga, &val, sizeof(val));
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
-               vcpu->arch.sie_block->gcr[reg] = val;
-               ga += 8;
+               vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
                 if (reg == reg3)
                         break;
                 reg = (reg + 1) % 16;
         } while (1);
-
+       kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
         return 0;
  }
  
@@ -878,8 +876,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
  {
         int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
         int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u64 ga, val;
-       int reg, rc;
+       int reg, rc, nr_regs;
+       u64 ctl_array[16];
+       u64 ga;
  
         vcpu->stat.instruction_stctg++;
  
@@ -891,23 +890,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
         if (ga & 7)
                 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  
-       reg = reg1;
-
         VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
         trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
  
+       reg = reg1;
+       nr_regs = 0;
         do {
-               val = vcpu->arch.sie_block->gcr[reg];
-               rc = write_guest(vcpu, ga, &val, sizeof(val));
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
-               ga += 8;
+               ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
                 if (reg == reg3)
                         break;
                 reg = (reg + 1) % 16;
         } while (1);
-
-       return 0;
+       rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+       return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
  }
  
  static const intercept_handler_t eb_handlers[256] = {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c

index cf243ba3d50f2907277ce18a1af88944ff5efc55..6651f9f73973806e390a8d649e256400383c0711 100644 (file)
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,20 +20,13 @@
  #include "kvm-s390.h"
  #include "trace.h"
  
-static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
                         u64 *reg)
  {
         struct kvm_s390_local_interrupt *li;
-       struct kvm_vcpu *dst_vcpu = NULL;
         int cpuflags;
         int rc;
  
-       if (cpu_addr >= KVM_MAX_VCPUS)
-               return SIGP_CC_NOT_OPERATIONAL;
-
-       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
         li = &dst_vcpu->arch.local_int;
  
         cpuflags = atomic_read(li->cpuflags);
@@ -48,55 +41,53 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
                 rc = SIGP_CC_STATUS_STORED;
         }
  
-       VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+       VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+                  rc);
         return rc;
  }
  
-static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+                                   struct kvm_vcpu *dst_vcpu)
  {
-       struct kvm_s390_interrupt s390int = {
+       struct kvm_s390_irq irq = {
                 .type = KVM_S390_INT_EMERGENCY,
-               .parm = vcpu->vcpu_id,
+               .u.emerg.code = vcpu->vcpu_id,
         };
-       struct kvm_vcpu *dst_vcpu = NULL;
         int rc = 0;
  
-       if (cpu_addr < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
-
-       rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+       rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
         if (!rc)
-               VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+               VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+                          dst_vcpu->vcpu_id);
  
         return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
  }
  
-static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+       return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+                                       struct kvm_vcpu *dst_vcpu,
                                         u16 asn, u64 *reg)
  {
-       struct kvm_vcpu *dst_vcpu = NULL;
         const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
         u16 p_asn, s_asn;
         psw_t *psw;
         u32 flags;
  
-       if (cpu_addr < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
         flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
         psw = &dst_vcpu->arch.sie_block->gpsw;
         p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
         s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
  
-       /* Deliver the emergency signal? */
+       /* Inject the emergency signal? */
         if (!(flags & CPUSTAT_STOPPED)
             || (psw->mask & psw_int_mask) != psw_int_mask
             || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
             || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
-               return __sigp_emergency(vcpu, cpu_addr);
+               return __inject_sigp_emergency(vcpu, dst_vcpu);
         } else {
                 *reg &= 0xffffffff00000000UL;
                 *reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -104,23 +95,19 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
         }
  }
  
-static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+                               struct kvm_vcpu *dst_vcpu)
  {
-       struct kvm_s390_interrupt s390int = {
+       struct kvm_s390_irq irq = {
                 .type = KVM_S390_INT_EXTERNAL_CALL,
-               .parm = vcpu->vcpu_id,
+               .u.extcall.code = vcpu->vcpu_id,
         };
-       struct kvm_vcpu *dst_vcpu = NULL;
         int rc;
  
-       if (cpu_addr < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
-
-       rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+       rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
         if (!rc)
-               VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
+               VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+                          dst_vcpu->vcpu_id);
  
         return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
  }
@@ -128,29 +115,20 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
  static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
  {
         struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
-       struct kvm_s390_interrupt_info *inti;
         int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
  
-       inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
-       if (!inti)
-               return -ENOMEM;
-       inti->type = KVM_S390_SIGP_STOP;
-
         spin_lock(&li->lock);
         if (li->action_bits & ACTION_STOP_ON_STOP) {
                 /* another SIGP STOP is pending */
-               kfree(inti);
                 rc = SIGP_CC_BUSY;
                 goto out;
         }
         if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
-               kfree(inti);
                 if ((action & ACTION_STORE_ON_STOP) != 0)
                         rc = -ESHUTDOWN;
                 goto out;
         }
-       list_add_tail(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
+       set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
         li->action_bits |= action;
         atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
         kvm_s390_vcpu_wakeup(dst_vcpu);
@@ -160,23 +138,27 @@ out:
         return rc;
  }
  
-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
  {
-       struct kvm_vcpu *dst_vcpu = NULL;
         int rc;
  
-       if (cpu_addr >= KVM_MAX_VCPUS)
-               return SIGP_CC_NOT_OPERATIONAL;
+       rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
+       VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
  
-       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
+       return rc;
+}
  
-       rc = __inject_sigp_stop(dst_vcpu, action);
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+                                       struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+       int rc;
  
-       VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+       rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
+                                             ACTION_STORE_ON_STOP);
+       VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+                  dst_vcpu->vcpu_id);
  
-       if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+       if (rc == -ESHUTDOWN) {
                 /* If the CPU has already been stopped, we still have
                  * to save the status when doing stop-and-store. This
                  * has to be done after unlocking all spinlocks. */
@@ -212,18 +194,12 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
         return rc;
  }
  
-static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
-                            u64 *reg)
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+                            u32 address, u64 *reg)
  {
         struct kvm_s390_local_interrupt *li;
-       struct kvm_vcpu *dst_vcpu = NULL;
-       struct kvm_s390_interrupt_info *inti;
         int rc;
  
-       if (cpu_addr < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
         li = &dst_vcpu->arch.local_int;
  
         /*
@@ -238,46 +214,34 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
                 return SIGP_CC_STATUS_STORED;
         }
  
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-       if (!inti)
-               return SIGP_CC_BUSY;
-
         spin_lock(&li->lock);
         /* cpu must be in stopped state */
         if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
                 *reg &= 0xffffffff00000000UL;
                 *reg |= SIGP_STATUS_INCORRECT_STATE;
                 rc = SIGP_CC_STATUS_STORED;
-               kfree(inti);
                 goto out_li;
         }
  
-       inti->type = KVM_S390_SIGP_SET_PREFIX;
-       inti->prefix.address = address;
-
-       list_add_tail(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
+       li->irq.prefix.address = address;
+       set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
         kvm_s390_vcpu_wakeup(dst_vcpu);
         rc = SIGP_CC_ORDER_CODE_ACCEPTED;
  
-       VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+       VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
+                  address);
  out_li:
         spin_unlock(&li->lock);
         return rc;
  }
  
-static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
-                                       u32 addr, u64 *reg)
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+                                      struct kvm_vcpu *dst_vcpu,
+                                      u32 addr, u64 *reg)
  {
-       struct kvm_vcpu *dst_vcpu = NULL;
         int flags;
         int rc;
  
-       if (cpu_id < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
-
         spin_lock(&dst_vcpu->arch.local_int.lock);
         flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
         spin_unlock(&dst_vcpu->arch.local_int.lock);
@@ -297,19 +261,12 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
         return rc;
  }
  
-static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
-                               u64 *reg)
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+                               struct kvm_vcpu *dst_vcpu, u64 *reg)
  {
         struct kvm_s390_local_interrupt *li;
-       struct kvm_vcpu *dst_vcpu = NULL;
         int rc;
  
-       if (cpu_addr >= KVM_MAX_VCPUS)
-               return SIGP_CC_NOT_OPERATIONAL;
-
-       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
         li = &dst_vcpu->arch.local_int;
         if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
                 /* running */
@@ -321,26 +278,19 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
                 rc = SIGP_CC_STATUS_STORED;
         }
  
-       VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
-                  rc);
+       VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+                  dst_vcpu->vcpu_id, rc);
  
         return rc;
  }
  
-/* Test whether the destination CPU is available and not busy */
-static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+                                  struct kvm_vcpu *dst_vcpu, u8 order_code)
  {
-       struct kvm_s390_local_interrupt *li;
-       int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
-       struct kvm_vcpu *dst_vcpu = NULL;
-
-       if (cpu_addr >= KVM_MAX_VCPUS)
-               return SIGP_CC_NOT_OPERATIONAL;
+       struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+       /* handle (RE)START in user space */
+       int rc = -EOPNOTSUPP;
  
-       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
-       li = &dst_vcpu->arch.local_int;
         spin_lock(&li->lock);
         if (li->action_bits & ACTION_STOP_ON_STOP)
                 rc = SIGP_CC_BUSY;
@@ -349,90 +299,131 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
         return rc;
  }
  
-int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+                                   struct kvm_vcpu *dst_vcpu, u8 order_code)
  {
-       int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
-       int r3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u32 parameter;
-       u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
-       u8 order_code;
-       int rc;
+       /* handle (INITIAL) CPU RESET in user space */
+       return -EOPNOTSUPP;
+}
  
-       /* sigp in userspace can exit */
-       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+                                 struct kvm_vcpu *dst_vcpu)
+{
+       /* handle unknown orders in user space */
+       return -EOPNOTSUPP;
+}
  
-       order_code = kvm_s390_get_base_disp_rs(vcpu);
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+                          u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+       int rc;
+       struct kvm_vcpu *dst_vcpu;
  
-       if (r1 % 2)
-               parameter = vcpu->run->s.regs.gprs[r1];
-       else
-               parameter = vcpu->run->s.regs.gprs[r1 + 1];
+       if (cpu_addr >= KVM_MAX_VCPUS)
+               return SIGP_CC_NOT_OPERATIONAL;
+
+       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+       if (!dst_vcpu)
+               return SIGP_CC_NOT_OPERATIONAL;
  
-       trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
         switch (order_code) {
         case SIGP_SENSE:
                 vcpu->stat.instruction_sigp_sense++;
-               rc = __sigp_sense(vcpu, cpu_addr,
-                                 &vcpu->run->s.regs.gprs[r1]);
+               rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
                 break;
         case SIGP_EXTERNAL_CALL:
                 vcpu->stat.instruction_sigp_external_call++;
-               rc = __sigp_external_call(vcpu, cpu_addr);
+               rc = __sigp_external_call(vcpu, dst_vcpu);
                 break;
         case SIGP_EMERGENCY_SIGNAL:
                 vcpu->stat.instruction_sigp_emergency++;
-               rc = __sigp_emergency(vcpu, cpu_addr);
+               rc = __sigp_emergency(vcpu, dst_vcpu);
                 break;
         case SIGP_STOP:
                 vcpu->stat.instruction_sigp_stop++;
-               rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
+               rc = __sigp_stop(vcpu, dst_vcpu);
                 break;
         case SIGP_STOP_AND_STORE_STATUS:
-               vcpu->stat.instruction_sigp_stop++;
-               rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
-                                                ACTION_STOP_ON_STOP);
+               vcpu->stat.instruction_sigp_stop_store_status++;
+               rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
                 break;
         case SIGP_STORE_STATUS_AT_ADDRESS:
-               rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
-                                                &vcpu->run->s.regs.gprs[r1]);
-               break;
-       case SIGP_SET_ARCHITECTURE:
-               vcpu->stat.instruction_sigp_arch++;
-               rc = __sigp_set_arch(vcpu, parameter);
+               vcpu->stat.instruction_sigp_store_status++;
+               rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+                                                status_reg);
                 break;
         case SIGP_SET_PREFIX:
                 vcpu->stat.instruction_sigp_prefix++;
-               rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
-                                      &vcpu->run->s.regs.gprs[r1]);
+               rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
                 break;
         case SIGP_COND_EMERGENCY_SIGNAL:
-               rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
-                                                 &vcpu->run->s.regs.gprs[r1]);
+               vcpu->stat.instruction_sigp_cond_emergency++;
+               rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+                                                 status_reg);
                 break;
         case SIGP_SENSE_RUNNING:
                 vcpu->stat.instruction_sigp_sense_running++;
-               rc = __sigp_sense_running(vcpu, cpu_addr,
-                                         &vcpu->run->s.regs.gprs[r1]);
+               rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
                 break;
         case SIGP_START:
-               rc = sigp_check_callable(vcpu, cpu_addr);
-               if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
-                       rc = -EOPNOTSUPP;    /* Handle START in user space */
+               vcpu->stat.instruction_sigp_start++;
+               rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
                 break;
         case SIGP_RESTART:
                 vcpu->stat.instruction_sigp_restart++;
-               rc = sigp_check_callable(vcpu, cpu_addr);
-               if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
-                       VCPU_EVENT(vcpu, 4,
-                                  "sigp restart %x to handle userspace",
-                                  cpu_addr);
-                       /* user space must know about restart */
-                       rc = -EOPNOTSUPP;
-               }
+               rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+               break;
+       case SIGP_INITIAL_CPU_RESET:
+               vcpu->stat.instruction_sigp_init_cpu_reset++;
+               rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+               break;
+       case SIGP_CPU_RESET:
+               vcpu->stat.instruction_sigp_cpu_reset++;
+               rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+               break;
+       default:
+               vcpu->stat.instruction_sigp_unknown++;
+               rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+       }
+
+       if (rc == -EOPNOTSUPP)
+               VCPU_EVENT(vcpu, 4,
+                          "sigp order %u -> cpu %x: handled in user space",
+                          order_code, dst_vcpu->vcpu_id);
+
+       return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+       int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+       int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+       u32 parameter;
+       u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+       u8 order_code;
+       int rc;
+
+       /* sigp in userspace can exit */
+       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+       order_code = kvm_s390_get_base_disp_rs(vcpu);
+
+       if (r1 % 2)
+               parameter = vcpu->run->s.regs.gprs[r1];
+       else
+               parameter = vcpu->run->s.regs.gprs[r1 + 1];
+
+       trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
+       switch (order_code) {
+       case SIGP_SET_ARCHITECTURE:
+               vcpu->stat.instruction_sigp_arch++;
+               rc = __sigp_set_arch(vcpu, parameter);
                 break;
         default:
-               return -EOPNOTSUPP;
+               rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+                                    parameter,
+                                    &vcpu->run->s.regs.gprs[r1]);
         }
  
         if (rc < 0)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c

index 71c7eff2c89f0a6489baf3edfd28bdc0e1c228ef..be99357d238c68e34dee133c52053e23c88a34d4 100644 (file)
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -844,7 +844,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  
         down_read(&mm->mmap_sem);
  retry:
-       ptep = get_locked_pte(current->mm, addr, &ptl);
+       ptep = get_locked_pte(mm, addr, &ptl);
         if (unlikely(!ptep)) {
                 up_read(&mm->mmap_sem);
                 return -EFAULT;
@@ -888,6 +888,45 @@ retry:
  }
  EXPORT_SYMBOL(set_guest_storage_key);
  
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+{
+       spinlock_t *ptl;
+       pgste_t pgste;
+       pte_t *ptep;
+       uint64_t physaddr;
+       unsigned long key = 0;
+
+       down_read(&mm->mmap_sem);
+       ptep = get_locked_pte(mm, addr, &ptl);
+       if (unlikely(!ptep)) {
+               up_read(&mm->mmap_sem);
+               return -EFAULT;
+       }
+       pgste = pgste_get_lock(ptep);
+
+       if (pte_val(*ptep) & _PAGE_INVALID) {
+               key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+               key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
+               key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
+               key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
+       } else {
+               physaddr = pte_val(*ptep) & PAGE_MASK;
+               key = page_get_storage_key(physaddr);
+
+               /* Reflect guest's logical view, not physical */
+               if (pgste_val(pgste) & PGSTE_GR_BIT)
+                       key |= _PAGE_REFERENCED;
+               if (pgste_val(pgste) & PGSTE_GC_BIT)
+                       key |= _PAGE_CHANGED;
+       }
+
+       pgste_set_unlock(ptep, pgste);
+       pte_unmap_unlock(ptep, ptl);
+       up_read(&mm->mmap_sem);
+       return key;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
  #else /* CONFIG_PGSTE */
  
  static inline int page_table_with_pgste(struct page *page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c

index be65f035d18a11bbfeb40ded7f7ae8854889db58..5cbc96d801ff9cefc38a09cdea943121dc195a0e 100644 (file)
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -460,10 +460,12 @@ static void __init sparc_context_init(int numctx)
  void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm,
                struct task_struct *tsk)
  {
+       unsigned long flags;
+
         if (mm->context == NO_CONTEXT) {
-               spin_lock(&srmmu_context_spinlock);
+               spin_lock_irqsave(&srmmu_context_spinlock, flags);
                 alloc_context(old_mm, mm);
-               spin_unlock(&srmmu_context_spinlock);
+               spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
                 srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
         }
  
@@ -986,14 +988,15 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
  
  void destroy_context(struct mm_struct *mm)
  {
+       unsigned long flags;
  
         if (mm->context != NO_CONTEXT) {
                 flush_cache_mm(mm);
                 srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir);
                 flush_tlb_mm(mm);
-               spin_lock(&srmmu_context_spinlock);
+               spin_lock_irqsave(&srmmu_context_spinlock, flags);
                 free_context(mm->context);
-               spin_unlock(&srmmu_context_spinlock);
+               spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
                 mm->context = NO_CONTEXT;
         }
  }
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 6ed0c30d6a0c347e6dd4c9ac0cbd26acfb9ec53b..d89c6b828c96492a414fe9a3d92ca7ec3791b53c 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,7 +33,7 @@
  
  #define KVM_MAX_VCPUS 255
  #define KVM_SOFT_MAX_VCPUS 160
-#define KVM_USER_MEM_SLOTS 125
+#define KVM_USER_MEM_SLOTS 509
  /* memory slots that are not exposed to userspace */
  #define KVM_PRIVATE_MEM_SLOTS 3
  #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
@@ -51,6 +51,7 @@
                           | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
  
  #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
+#define CR3_PCID_INVD           (1UL << 63)
  #define CR4_RESERVED_BITS                                               \
         (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
                           | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
@@ -361,6 +362,7 @@ struct kvm_vcpu_arch {
         int mp_state;
         u64 ia32_misc_enable_msr;
         bool tpr_access_reporting;
+       u64 ia32_xss;
  
         /*
          * Paging state of the vcpu
@@ -542,7 +544,7 @@ struct kvm_apic_map {
         struct rcu_head rcu;
         u8 ldr_bits;
         /* fields bellow are used to decode ldr values in different modes */
-       u32 cid_shift, cid_mask, lid_mask;
+       u32 cid_shift, cid_mask, lid_mask, broadcast;
         struct kvm_lapic *phys_map[256];
         /* first index is cluster id second is cpu id in a cluster */
         struct kvm_lapic *logical_map[16][16];
@@ -602,6 +604,9 @@ struct kvm_arch {
  
         struct kvm_xen_hvm_config xen_hvm_config;
  
+       /* reads protected by irq_srcu, writes by irq_lock */
+       struct hlist_head mask_notifier_list;
+
         /* fields used by HYPER-V emulation */
         u64 hv_guest_os_id;
         u64 hv_hypercall;
@@ -659,6 +664,16 @@ struct msr_data {
         u64 data;
  };
  
+struct kvm_lapic_irq {
+       u32 vector;
+       u32 delivery_mode;
+       u32 dest_mode;
+       u32 level;
+       u32 trig_mode;
+       u32 shorthand;
+       u32 dest_id;
+};
+
  struct kvm_x86_ops {
         int (*cpu_has_kvm_support)(void);          /* __init */
         int (*disabled_by_bios)(void);             /* __init */
@@ -767,6 +782,7 @@ struct kvm_x86_ops {
                                enum x86_intercept_stage stage);
         void (*handle_external_intr)(struct kvm_vcpu *vcpu);
         bool (*mpx_supported)(void);
+       bool (*xsaves_supported)(void);
  
         int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
  
@@ -818,6 +834,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
                           const void *val, int bytes);
  u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
  
+struct kvm_irq_mask_notifier {
+       void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+       int irq;
+       struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+                                   struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+                                     struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+                            bool mask);
+
  extern bool tdp_enabled;
  
  u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -863,7 +892,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
  
  void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
  int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
  
  int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
                     int reason, bool has_error_code, u32 error_code);
@@ -895,6 +924,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                             gfn_t gfn, void *data, int offset, int len,
                             u32 access);
  bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
  
  static inline int __kvm_irq_line_state(unsigned long *irq_state,
                                        int irq_source_id, int level)
@@ -1066,6 +1096,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
  void kvm_define_shared_msr(unsigned index, u32 msr);
  int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
  
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
  bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
  
  void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h

index bcbfade26d8d587835124db66c968a09fd5b7d5b..45afaee9555c8b1285082b51dde7a9762df9a571 100644 (file)
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
  #define SECONDARY_EXEC_PAUSE_LOOP_EXITING      0x00000400
  #define SECONDARY_EXEC_ENABLE_INVPCID          0x00001000
  #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
+#define SECONDARY_EXEC_XSAVES                  0x00100000
  
  
  #define PIN_BASED_EXT_INTR_MASK                 0x00000001
@@ -159,6 +160,8 @@ enum vmcs_field {
         EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
         VMREAD_BITMAP                   = 0x00002026,
         VMWRITE_BITMAP                  = 0x00002028,
+       XSS_EXIT_BITMAP                 = 0x0000202C,
+       XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
         GUEST_PHYSICAL_ADDRESS          = 0x00002400,
         GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
         VMCS_LINK_POINTER               = 0x00002800,
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h

index 7e7a79ada6584fa4161e285f1f32181a29baa70e..5fa9770035dc935c0a90899f470e668ab889ec7f 100644 (file)
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,7 @@
  #define XSTATE_Hi16_ZMM                0x80
  
  #define XSTATE_FPSSE   (XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512  (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
  /* Bit 63 of XCR0 is reserved for future expansion */
  #define XSTATE_EXTEND_MASK     (~(XSTATE_FPSSE | (1ULL << 63)))
  
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h

index 990a2fe1588d53d1c101394e5f9827f9fc63c557..b813bf9da1e2794788087fc69e6a8cddb535ed38 100644 (file)
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -72,6 +72,8 @@
  #define EXIT_REASON_XSETBV              55
  #define EXIT_REASON_APIC_WRITE          56
  #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_XSAVES              63
+#define EXIT_REASON_XRSTORS             64
  
  #define VMX_EXIT_REASONS \
         { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -116,6 +118,8 @@
         { EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
         { EXIT_REASON_INVD,                  "INVD" }, \
         { EXIT_REASON_INVVPID,               "INVVPID" }, \
-       { EXIT_REASON_INVPCID,               "INVPCID" }
+       { EXIT_REASON_INVPCID,               "INVPCID" }, \
+       { EXIT_REASON_XSAVES,                "XSAVES" }, \
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }
  
  #endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c

index f6945bef2cd10e7b1cbf439cb5ae89cc614bf47c..94f6434843008c38b8a091c077e0c94fc36c9a07 100644 (file)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault);
  static void __init paravirt_ops_setup(void)
  {
         pv_info.name = "KVM";
-       pv_info.paravirt_enabled = 1;
+
+       /*
+        * KVM isn't paravirt in the sense of paravirt_enabled.  A KVM
+        * guest kernel works like a bare metal kernel with additional
+        * features, and paravirt_enabled is about features that are
+        * missing.
+        */
+       pv_info.paravirt_enabled = 0;
  
         if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
                 pv_cpu_ops.io_delay = kvm_io_delay;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c

index d9156ceecdff03ca0d55ab2834df0d4f7e828203..42caaef897c86987a75796c1b6edea79d7f164cb 100644 (file)
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -59,13 +59,12 @@ static void kvm_get_wallclock(struct timespec *now)
  
         native_write_msr(msr_kvm_wall_clock, low, high);
  
-       preempt_disable();
-       cpu = smp_processor_id();
+       cpu = get_cpu();
  
         vcpu_time = &hv_clock[cpu].pvti;
         pvclock_read_wallclock(&wall_clock, vcpu_time, now);
  
-       preempt_enable();
+       put_cpu();
  }
  
  static int kvm_set_wallclock(const struct timespec *now)
@@ -107,11 +106,10 @@ static unsigned long kvm_get_tsc_khz(void)
         int cpu;
         unsigned long tsc_khz;
  
-       preempt_disable();
-       cpu = smp_processor_id();
+       cpu = get_cpu();
         src = &hv_clock[cpu].pvti;
         tsc_khz = pvclock_tsc_khz(src);
-       preempt_enable();
+       put_cpu();
         return tsc_khz;
  }
  
@@ -263,7 +261,6 @@ void __init kvmclock_init(void)
  #endif
         kvm_get_preset_lpj();
         clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
-       pv_info.paravirt_enabled = 1;
         pv_info.name = "KVM";
  
         if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
@@ -284,23 +281,22 @@ int __init kvm_setup_vsyscall_timeinfo(void)
  
         size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
  
-       preempt_disable();
-       cpu = smp_processor_id();
+       cpu = get_cpu();
  
         vcpu_time = &hv_clock[cpu].pvti;
         flags = pvclock_read_flags(vcpu_time);
  
         if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
-               preempt_enable();
+               put_cpu();
                 return 1;
         }
  
         if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
-               preempt_enable();
+               put_cpu();
                 return ret;
         }
  
-       preempt_enable();
+       put_cpu();
  
         kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
  #endif
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c

index 4c540c4719d80d14a9797648d2d65cfc768c2e10..0de1fae2bdf000b5ed6cfae68d76ca21d5c246b9 100644 (file)
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -738,3 +738,4 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
  
         return (void *)xsave + xstate_comp_offsets[feature];
  }
+EXPORT_SYMBOL_GPL(get_xsave_addr);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile

index 25d22b2d6509e3e0924c39641214085de2aa2b52..08f790dfadc9fb90dc3591687aaa93cc73fe1f88 100644 (file)
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I.
  
  KVM := ../../../virt/kvm
  
-kvm-y                  += $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-                               $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+kvm-y                  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
                                 $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)    += $(KVM)/assigned-dev.o $(KVM)/iommu.o
  kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
  
  kvm-y                  += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-                          i8254.o cpuid.o pmu.o
+                          i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)    += assigned-dev.o iommu.o
  kvm-intel-y            += vmx.o
  kvm-amd-y              += svm.o
  
diff --git a/virt/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c

similarity index 97%

rename from virt/kvm/assigned-dev.c

rename to arch/x86/kvm/assigned-dev.c

index e05000e200d22bf7e7edf1ba6a3d8dbc1d1c9a6b..6eb5c20ee3739855a5d3fe52aa9a253b677552fd 100644 (file)
--- a/virt/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -20,6 +20,32 @@
  #include <linux/namei.h>
  #include <linux/fs.h>
  #include "irq.h"
+#include "assigned-dev.h"
+
+struct kvm_assigned_dev_kernel {
+       struct kvm_irq_ack_notifier ack_notifier;
+       struct list_head list;
+       int assigned_dev_id;
+       int host_segnr;
+       int host_busnr;
+       int host_devfn;
+       unsigned int entries_nr;
+       int host_irq;
+       bool host_irq_disabled;
+       bool pci_2_3;
+       struct msix_entry *host_msix_entries;
+       int guest_irq;
+       struct msix_entry *guest_msix_entries;
+       unsigned long irq_requested_type;
+       int irq_source_id;
+       int flags;
+       struct pci_dev *dev;
+       struct kvm *kvm;
+       spinlock_t intx_lock;
+       spinlock_t intx_mask_lock;
+       char irq_name[32];
+       struct pci_saved_state *pci_saved_state;
+};
  
  static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
                                                       int assigned_dev_id)
@@ -748,7 +774,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
                 if (r)
                         goto out_list_del;
         }
-       r = kvm_assign_device(kvm, match);
+       r = kvm_assign_device(kvm, match->dev);
         if (r)
                 goto out_list_del;
  
@@ -790,7 +816,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
                 goto out;
         }
  
-       kvm_deassign_device(kvm, match);
+       kvm_deassign_device(kvm, match->dev);
  
         kvm_free_assigned_device(kvm, match);
  
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h

new file mode 100644 (file)

index 0000000..a428c1a
--- /dev/null
+++ b/arch/x86/kvm/assigned-dev.h
@@ -0,0 +1,32 @@
+#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
+#define ARCH_X86_KVM_ASSIGNED_DEV_H
+
+#include <linux/kvm_host.h>
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
+
+int kvm_iommu_map_guest(struct kvm *kvm);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+                                 unsigned long arg);
+
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+#else
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+       return 0;
+}
+
+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+                                               unsigned long arg)
+{
+       return -ENOTTY;
+}
+
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
+#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c

index 976e3a57f9eaa0bc178e4a65b45531507f0416b4..8a80737ee6e6ec14bc7d9a6ffe08d9f580d3c890 100644 (file)
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,7 +23,7 @@
  #include "mmu.h"
  #include "trace.h"
  
-static u32 xstate_required_size(u64 xstate_bv)
+static u32 xstate_required_size(u64 xstate_bv, bool compacted)
  {
         int feature_bit = 0;
         u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
@@ -31,9 +31,10 @@ static u32 xstate_required_size(u64 xstate_bv)
         xstate_bv &= XSTATE_EXTEND_MASK;
         while (xstate_bv) {
                 if (xstate_bv & 0x1) {
-                       u32 eax, ebx, ecx, edx;
+                       u32 eax, ebx, ecx, edx, offset;
                         cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
-                       ret = max(ret, eax + ebx);
+                       offset = compacted ? ret : ebx;
+                       ret = max(ret, offset + eax);
                 }
  
                 xstate_bv >>= 1;
@@ -53,6 +54,8 @@ u64 kvm_supported_xcr0(void)
         return xcr0;
  }
  
+#define F(x) bit(X86_FEATURE_##x)
+
  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
  {
         struct kvm_cpuid_entry2 *best;
@@ -64,13 +67,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
  
         /* Update OSXSAVE bit */
         if (cpu_has_xsave && best->function == 0x1) {
-               best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+               best->ecx &= ~F(OSXSAVE);
                 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
-                       best->ecx |= bit(X86_FEATURE_OSXSAVE);
+                       best->ecx |= F(OSXSAVE);
         }
  
         if (apic) {
-               if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
+               if (best->ecx & F(TSC_DEADLINE_TIMER))
                         apic->lapic_timer.timer_mode_mask = 3 << 17;
                 else
                         apic->lapic_timer.timer_mode_mask = 1 << 17;
@@ -85,9 +88,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
                         (best->eax | ((u64)best->edx << 32)) &
                         kvm_supported_xcr0();
                 vcpu->arch.guest_xstate_size = best->ebx =
-                       xstate_required_size(vcpu->arch.xcr0);
+                       xstate_required_size(vcpu->arch.xcr0, false);
         }
  
+       best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
+       if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
+               best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+
         /*
          * The existing code assumes virtual address is 48-bit in the canonical
          * address checks; exit if it is ever changed.
@@ -122,8 +129,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
                         break;
                 }
         }
-       if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
-               entry->edx &= ~bit(X86_FEATURE_NX);
+       if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
+               entry->edx &= ~F(NX);
                 printk(KERN_INFO "kvm: guest NX capability removed\n");
         }
  }
@@ -227,8 +234,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
         entry->flags = 0;
  }
  
-#define F(x) bit(X86_FEATURE_##x)
-
  static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
                                    u32 func, u32 index, int *nent, int maxnent)
  {
@@ -267,6 +272,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
         unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
         unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
         unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
+       unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
  
         /* cpuid 1.edx */
         const u32 kvm_supported_word0_x86_features =
@@ -317,7 +323,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
         const u32 kvm_supported_word9_x86_features =
                 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
                 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
-               F(ADX) | F(SMAP);
+               F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
+               F(AVX512CD);
+
+       /* cpuid 0xD.1.eax */
+       const u32 kvm_supported_word10_x86_features =
+               F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
  
         /* all calls to cpuid_count() should be made on the same cpu */
         get_cpu();
@@ -453,16 +464,34 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                 u64 supported = kvm_supported_xcr0();
  
                 entry->eax &= supported;
+               entry->ebx = xstate_required_size(supported, false);
+               entry->ecx = entry->ebx;
                 entry->edx &= supported >> 32;
                 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+               if (!supported)
+                       break;
+
                 for (idx = 1, i = 1; idx < 64; ++idx) {
                         u64 mask = ((u64)1 << idx);
                         if (*nent >= maxnent)
                                 goto out;
  
                         do_cpuid_1_ent(&entry[i], function, idx);
-                       if (entry[i].eax == 0 || !(supported & mask))
-                               continue;
+                       if (idx == 1) {
+                               entry[i].eax &= kvm_supported_word10_x86_features;
+                               entry[i].ebx = 0;
+                               if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
+                                       entry[i].ebx =
+                                               xstate_required_size(supported,
+                                                                    true);
+                       } else {
+                               if (entry[i].eax == 0 || !(supported & mask))
+                                       continue;
+                               if (WARN_ON_ONCE(entry[i].ecx & 1))
+                                       continue;
+                       }
+                       entry[i].ecx = 0;
+                       entry[i].edx = 0;
                         entry[i].flags |=
                                KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                         ++*nent;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c

index 9f8a2faf50407b6212f71a5de81aea8680855f87..169b09d76ddd83d3033d93d2b7eace6fada2331e 100644 (file)
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -123,6 +123,7 @@
  #define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
  #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
  #define Escape      (5<<15)     /* Escape to coprocessor instruction */
+#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
  #define Sse         (1<<18)     /* SSE Vector instruction */
  /* Generic ModRM decode. */
  #define ModRM       (1<<19)
@@ -166,6 +167,8 @@
  #define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
  #define NoBigReal   ((u64)1 << 50)  /* No big real mode */
  #define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
+#define NearBranch  ((u64)1 << 52)  /* Near branches */
+#define No16       ((u64)1 << 53)  /* No 16 bit operand */
  
  #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
  
@@ -209,6 +212,7 @@ struct opcode {
                 const struct group_dual *gdual;
                 const struct gprefix *gprefix;
                 const struct escape *esc;
+               const struct instr_dual *idual;
                 void (*fastop)(struct fastop *fake);
         } u;
         int (*check_perm)(struct x86_emulate_ctxt *ctxt);
@@ -231,6 +235,11 @@ struct escape {
         struct opcode high[64];
  };
  
+struct instr_dual {
+       struct opcode mod012;
+       struct opcode mod3;
+};
+
  /* EFLAGS bit definitions. */
  #define EFLG_ID (1<<21)
  #define EFLG_VIP (1<<20)
@@ -379,6 +388,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
         ON64(FOP2E(op##q, rax, cl)) \
         FOP_END
  
+/* 2 operand, src and dest are reversed */
+#define FASTOP2R(op, name) \
+       FOP_START(name) \
+       FOP2E(op##b, dl, al) \
+       FOP2E(op##w, dx, ax) \
+       FOP2E(op##l, edx, eax) \
+       ON64(FOP2E(op##q, rdx, rax)) \
+       FOP_END
+
  #define FOP3E(op,  dst, src, src2) \
         FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
  
@@ -477,9 +495,9 @@ address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
  }
  
  static inline unsigned long
-register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
+register_address(struct x86_emulate_ctxt *ctxt, int reg)
  {
-       return address_mask(ctxt, reg);
+       return address_mask(ctxt, reg_read(ctxt, reg));
  }
  
  static void masked_increment(ulong *reg, ulong mask, int inc)
@@ -488,7 +506,7 @@ static void masked_increment(ulong *reg, ulong mask, int inc)
  }
  
  static inline void
-register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
+register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
  {
         ulong mask;
  
@@ -496,7 +514,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
                 mask = ~0UL;
         else
                 mask = ad_mask(ctxt);
-       masked_increment(reg, mask, inc);
+       masked_increment(reg_rmw(ctxt, reg), mask, inc);
  }
  
  static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
@@ -564,40 +582,6 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
         return emulate_exception(ctxt, NM_VECTOR, 0, false);
  }
  
-static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
-                              int cs_l)
-{
-       switch (ctxt->op_bytes) {
-       case 2:
-               ctxt->_eip = (u16)dst;
-               break;
-       case 4:
-               ctxt->_eip = (u32)dst;
-               break;
-#ifdef CONFIG_X86_64
-       case 8:
-               if ((cs_l && is_noncanonical_address(dst)) ||
-                   (!cs_l && (dst >> 32) != 0))
-                       return emulate_gp(ctxt, 0);
-               ctxt->_eip = dst;
-               break;
-#endif
-       default:
-               WARN(1, "unsupported eip assignment size\n");
-       }
-       return X86EMUL_CONTINUE;
-}
-
-static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
-{
-       return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
-}
-
-static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
-       return assign_eip_near(ctxt, ctxt->_eip + rel);
-}
-
  static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
  {
         u16 selector;
@@ -641,25 +625,24 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
                 return true;
  }
  
-static int __linearize(struct x86_emulate_ctxt *ctxt,
-                    struct segmented_address addr,
-                    unsigned *max_size, unsigned size,
-                    bool write, bool fetch,
-                    ulong *linear)
+static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
+                                      struct segmented_address addr,
+                                      unsigned *max_size, unsigned size,
+                                      bool write, bool fetch,
+                                      enum x86emul_mode mode, ulong *linear)
  {
         struct desc_struct desc;
         bool usable;
         ulong la;
         u32 lim;
         u16 sel;
-       unsigned cpl;
  
         la = seg_base(ctxt, addr.seg) + addr.ea;
         *max_size = 0;
-       switch (ctxt->mode) {
+       switch (mode) {
         case X86EMUL_MODE_PROT64:
-               if (((signed long)la << 16) >> 16 != la)
-                       return emulate_gp(ctxt, 0);
+               if (is_noncanonical_address(la))
+                       goto bad;
  
                 *max_size = min_t(u64, ~0u, (1ull << 48) - la);
                 if (size > *max_size)
@@ -678,46 +661,20 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
                 if (!fetch && (desc.type & 8) && !(desc.type & 2))
                         goto bad;
                 lim = desc_limit_scaled(&desc);
-               if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
-                   (ctxt->d & NoBigReal)) {
-                       /* la is between zero and 0xffff */
-                       if (la > 0xffff)
-                               goto bad;
-                       *max_size = 0x10000 - la;
-               } else if ((desc.type & 8) || !(desc.type & 4)) {
-                       /* expand-up segment */
-                       if (addr.ea > lim)
-                               goto bad;
-                       *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
-               } else {
+               if (!(desc.type & 8) && (desc.type & 4)) {
                         /* expand-down segment */
                         if (addr.ea <= lim)
                                 goto bad;
                         lim = desc.d ? 0xffffffff : 0xffff;
-                       if (addr.ea > lim)
-                               goto bad;
-                       *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
                 }
+               if (addr.ea > lim)
+                       goto bad;
+               *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
                 if (size > *max_size)
                         goto bad;
-               cpl = ctxt->ops->cpl(ctxt);
-               if (!(desc.type & 8)) {
-                       /* data segment */
-                       if (cpl > desc.dpl)
-                               goto bad;
-               } else if ((desc.type & 8) && !(desc.type & 4)) {
-                       /* nonconforming code segment */
-                       if (cpl != desc.dpl)
-                               goto bad;
-               } else if ((desc.type & 8) && (desc.type & 4)) {
-                       /* conforming code segment */
-                       if (cpl < desc.dpl)
-                               goto bad;
-               }
+               la &= (u32)-1;
                 break;
         }
-       if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
-               la &= (u32)-1;
         if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
                 return emulate_gp(ctxt, 0);
         *linear = la;
@@ -735,9 +692,55 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
                      ulong *linear)
  {
         unsigned max_size;
-       return __linearize(ctxt, addr, &max_size, size, write, false, linear);
+       return __linearize(ctxt, addr, &max_size, size, write, false,
+                          ctxt->mode, linear);
+}
+
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
+                            enum x86emul_mode mode)
+{
+       ulong linear;
+       int rc;
+       unsigned max_size;
+       struct segmented_address addr = { .seg = VCPU_SREG_CS,
+                                          .ea = dst };
+
+       if (ctxt->op_bytes != sizeof(unsigned long))
+               addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
+       rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+       if (rc == X86EMUL_CONTINUE)
+               ctxt->_eip = addr.ea;
+       return rc;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+       return assign_eip(ctxt, dst, ctxt->mode);
  }
  
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+                         const struct desc_struct *cs_desc)
+{
+       enum x86emul_mode mode = ctxt->mode;
+
+#ifdef CONFIG_X86_64
+       if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
+               u64 efer = 0;
+
+               ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+               if (efer & EFER_LMA)
+                       mode = X86EMUL_MODE_PROT64;
+       }
+#endif
+       if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
+               mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+       return assign_eip(ctxt, dst, mode);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+       return assign_eip_near(ctxt, ctxt->_eip + rel);
+}
  
  static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
                               struct segmented_address addr,
@@ -776,7 +779,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
          * boundary check itself.  Instead, we use max_size to check
          * against op_size.
          */
-       rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear);
+       rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
+                        &linear);
         if (unlikely(rc != X86EMUL_CONTINUE))
                 return rc;
  
@@ -911,6 +915,8 @@ FASTOP2W(btc);
  
  FASTOP2(xadd);
  
+FASTOP2R(cmp, cmp_r);
+
  static u8 test_cc(unsigned int condition, unsigned long flags)
  {
         u8 rc;
@@ -1221,6 +1227,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
                         if (index_reg != 4)
                                 modrm_ea += reg_read(ctxt, index_reg) << scale;
                 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
+                       modrm_ea += insn_fetch(s32, ctxt);
                         if (ctxt->mode == X86EMUL_MODE_PROT64)
                                 ctxt->rip_relative = 1;
                 } else {
@@ -1229,10 +1236,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
                         adjust_modrm_seg(ctxt, base_reg);
                 }
                 switch (ctxt->modrm_mod) {
-               case 0:
-                       if (ctxt->modrm_rm == 5)
-                               modrm_ea += insn_fetch(s32, ctxt);
-                       break;
                 case 1:
                         modrm_ea += insn_fetch(s8, ctxt);
                         break;
@@ -1284,7 +1287,8 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
                 else
                         sv = (s64)ctxt->src.val & (s64)mask;
  
-               ctxt->dst.addr.mem.ea += (sv >> 3);
+               ctxt->dst.addr.mem.ea = address_mask(ctxt,
+                                          ctxt->dst.addr.mem.ea + (sv >> 3));
         }
  
         /* only subword offset */
@@ -1610,6 +1614,9 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
                                 sizeof(base3), &ctxt->exception);
                 if (ret != X86EMUL_CONTINUE)
                         return ret;
+               if (is_noncanonical_address(get_desc_base(&seg_desc) |
+                                            ((u64)base3 << 32)))
+                       return emulate_gp(ctxt, 0);
         }
  load:
         ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
@@ -1807,6 +1814,10 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
         int seg = ctxt->src2.val;
  
         ctxt->src.val = get_segment_selector(ctxt, seg);
+       if (ctxt->op_bytes == 4) {
+               rsp_increment(ctxt, -2);
+               ctxt->op_bytes = 2;
+       }
  
         return em_push(ctxt);
  }
@@ -1850,7 +1861,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
  
  static int em_pushf(struct x86_emulate_ctxt *ctxt)
  {
-       ctxt->src.val =  (unsigned long)ctxt->eflags;
+       ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
         return em_push(ctxt);
  }
  
@@ -2035,7 +2046,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
         if (rc != X86EMUL_CONTINUE)
                 return rc;
  
-       rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+       rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
         if (rc != X86EMUL_CONTINUE) {
                 WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
                 /* assigning eip failed; restore the old cs */
@@ -2045,31 +2056,22 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
         return rc;
  }
  
-static int em_grp45(struct x86_emulate_ctxt *ctxt)
+static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
  {
-       int rc = X86EMUL_CONTINUE;
+       return assign_eip_near(ctxt, ctxt->src.val);
+}
  
-       switch (ctxt->modrm_reg) {
-       case 2: /* call near abs */ {
-               long int old_eip;
-               old_eip = ctxt->_eip;
-               rc = assign_eip_near(ctxt, ctxt->src.val);
-               if (rc != X86EMUL_CONTINUE)
-                       break;
-               ctxt->src.val = old_eip;
-               rc = em_push(ctxt);
-               break;
-       }
-       case 4: /* jmp abs */
-               rc = assign_eip_near(ctxt, ctxt->src.val);
-               break;
-       case 5: /* jmp far */
-               rc = em_jmp_far(ctxt);
-               break;
-       case 6: /* push */
-               rc = em_push(ctxt);
-               break;
-       }
+static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
+{
+       int rc;
+       long int old_eip;
+
+       old_eip = ctxt->_eip;
+       rc = assign_eip_near(ctxt, ctxt->src.val);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+       ctxt->src.val = old_eip;
+       rc = em_push(ctxt);
         return rc;
  }
  
@@ -2128,11 +2130,11 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
         /* Outer-privilege level return is not implemented */
         if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
                 return X86EMUL_UNHANDLEABLE;
-       rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
+       rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
                                        &new_desc);
         if (rc != X86EMUL_CONTINUE)
                 return rc;
-       rc = assign_eip_far(ctxt, eip, new_desc.l);
+       rc = assign_eip_far(ctxt, eip, &new_desc);
         if (rc != X86EMUL_CONTINUE) {
                 WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
                 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
@@ -2316,6 +2318,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
  
                 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
                 ctxt->eflags &= ~msr_data;
+               ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
  #endif
         } else {
                 /* legacy mode */
@@ -2349,11 +2352,9 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
             && !vendor_intel(ctxt))
                 return emulate_ud(ctxt);
  
-       /* XXX sysenter/sysexit have not been tested in 64bit mode.
-       * Therefore, we inject an #UD.
-       */
+       /* sysenter/sysexit have not been tested in 64bit mode. */
         if (ctxt->mode == X86EMUL_MODE_PROT64)
-               return emulate_ud(ctxt);
+               return X86EMUL_UNHANDLEABLE;
  
         setup_syscalls_segments(ctxt, &cs, &ss);
  
@@ -2425,6 +2426,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
                 if ((msr_data & 0xfffc) == 0x0)
                         return emulate_gp(ctxt, 0);
                 ss_sel = (u16)(msr_data + 24);
+               rcx = (u32)rcx;
+               rdx = (u32)rdx;
                 break;
         case X86EMUL_MODE_PROT64:
                 cs_sel = (u16)(msr_data + 32);
@@ -2599,7 +2602,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
         ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
                             &ctxt->exception);
         if (ret != X86EMUL_CONTINUE)
-               /* FIXME: need to provide precise fault address */
                 return ret;
  
         save_state_to_tss16(ctxt, &tss_seg);
@@ -2607,13 +2609,11 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
         ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
                              &ctxt->exception);
         if (ret != X86EMUL_CONTINUE)
-               /* FIXME: need to provide precise fault address */
                 return ret;
  
         ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
                             &ctxt->exception);
         if (ret != X86EMUL_CONTINUE)
-               /* FIXME: need to provide precise fault address */
                 return ret;
  
         if (old_tss_sel != 0xffff) {
@@ -2624,7 +2624,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
                                      sizeof tss_seg.prev_task_link,
                                      &ctxt->exception);
                 if (ret != X86EMUL_CONTINUE)
-                       /* FIXME: need to provide precise fault address */
                         return ret;
         }
  
@@ -2813,7 +2812,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
          *
          * 1. jmp/call/int to task gate: Check against DPL of the task gate
          * 2. Exception/IRQ/iret: No check is performed
-        * 3. jmp/call to TSS: Check against DPL of the TSS
+        * 3. jmp/call to TSS/task-gate: No check is performed since the
+        *    hardware checks it before exiting.
          */
         if (reason == TASK_SWITCH_GATE) {
                 if (idt_index != -1) {
@@ -2830,13 +2830,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
                         if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
                                 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
                 }
-       } else if (reason != TASK_SWITCH_IRET) {
-               int dpl = next_tss_desc.dpl;
-               if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
-                       return emulate_gp(ctxt, tss_selector);
         }
  
-
         desc_limit = desc_limit_scaled(&next_tss_desc);
         if (!next_tss_desc.p ||
             ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2913,8 +2908,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
  {
         int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
  
-       register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
-       op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
+       register_address_increment(ctxt, reg, df * op->bytes);
+       op->addr.mem.ea = register_address(ctxt, reg);
  }
  
  static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -3025,7 +3020,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
         if (rc != X86EMUL_CONTINUE)
                 return X86EMUL_CONTINUE;
  
-       rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+       rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
         if (rc != X86EMUL_CONTINUE)
                 goto fail;
  
@@ -3215,6 +3210,8 @@ static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
                 return emulate_ud(ctxt);
  
         ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
+       if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
+               ctxt->dst.bytes = 2;
         return X86EMUL_CONTINUE;
  }
  
@@ -3317,7 +3314,7 @@ static int em_sidt(struct x86_emulate_ctxt *ctxt)
         return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
  }
  
-static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
  {
         struct desc_ptr desc_ptr;
         int rc;
@@ -3329,12 +3326,23 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
                              ctxt->op_bytes);
         if (rc != X86EMUL_CONTINUE)
                 return rc;
-       ctxt->ops->set_gdt(ctxt, &desc_ptr);
+       if (ctxt->mode == X86EMUL_MODE_PROT64 &&
+           is_noncanonical_address(desc_ptr.address))
+               return emulate_gp(ctxt, 0);
+       if (lgdt)
+               ctxt->ops->set_gdt(ctxt, &desc_ptr);
+       else
+               ctxt->ops->set_idt(ctxt, &desc_ptr);
         /* Disable writeback. */
         ctxt->dst.type = OP_NONE;
         return X86EMUL_CONTINUE;
  }
  
+static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+{
+       return em_lgdt_lidt(ctxt, true);
+}
+
  static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
  {
         int rc;
@@ -3348,20 +3356,7 @@ static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
  
  static int em_lidt(struct x86_emulate_ctxt *ctxt)
  {
-       struct desc_ptr desc_ptr;
-       int rc;
-
-       if (ctxt->mode == X86EMUL_MODE_PROT64)
-               ctxt->op_bytes = 8;
-       rc = read_descriptor(ctxt, ctxt->src.addr.mem,
-                            &desc_ptr.size, &desc_ptr.address,
-                            ctxt->op_bytes);
-       if (rc != X86EMUL_CONTINUE)
-               return rc;
-       ctxt->ops->set_idt(ctxt, &desc_ptr);
-       /* Disable writeback. */
-       ctxt->dst.type = OP_NONE;
-       return X86EMUL_CONTINUE;
+       return em_lgdt_lidt(ctxt, false);
  }
  
  static int em_smsw(struct x86_emulate_ctxt *ctxt)
@@ -3384,7 +3379,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
  {
         int rc = X86EMUL_CONTINUE;
  
-       register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+       register_address_increment(ctxt, VCPU_REGS_RCX, -1);
         if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
             (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
                 rc = jmp_rel(ctxt, ctxt->src.val);
@@ -3554,7 +3549,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
  
                 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
                 if (efer & EFER_LMA)
-                       rsvd = CR3_L_MODE_RESERVED_BITS;
+                       rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
  
                 if (new_val & rsvd)
                         return emulate_gp(ctxt, 0);
@@ -3596,8 +3591,15 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
         if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
                 return emulate_ud(ctxt);
  
-       if (check_dr7_gd(ctxt))
+       if (check_dr7_gd(ctxt)) {
+               ulong dr6;
+
+               ctxt->ops->get_dr(ctxt, 6, &dr6);
+               dr6 &= ~15;
+               dr6 |= DR6_BD | DR6_RTM;
+               ctxt->ops->set_dr(ctxt, 6, dr6);
                 return emulate_db(ctxt);
+       }
  
         return X86EMUL_CONTINUE;
  }
@@ -3684,6 +3686,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
  #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
  #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
  #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
+#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
  #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
  #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
  #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
@@ -3780,11 +3783,11 @@ static const struct opcode group4[] = {
  static const struct opcode group5[] = {
         F(DstMem | SrcNone | Lock,              em_inc),
         F(DstMem | SrcNone | Lock,              em_dec),
-       I(SrcMem | Stack,                       em_grp45),
+       I(SrcMem | NearBranch,                  em_call_near_abs),
         I(SrcMemFAddr | ImplicitOps | Stack,    em_call_far),
-       I(SrcMem | Stack,                       em_grp45),
-       I(SrcMemFAddr | ImplicitOps,            em_grp45),
-       I(SrcMem | Stack,                       em_grp45), D(Undefined),
+       I(SrcMem | NearBranch,                  em_jmp_abs),
+       I(SrcMemFAddr | ImplicitOps,            em_jmp_far),
+       I(SrcMem | Stack,                       em_push), D(Undefined),
  };
  
  static const struct opcode group6[] = {
@@ -3845,8 +3848,12 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
         I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
  };
  
+static const struct instr_dual instr_dual_0f_2b = {
+       I(0, em_mov), N
+};
+
  static const struct gprefix pfx_0f_2b = {
-       I(0, em_mov), I(0, em_mov), N, N,
+       ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
  };
  
  static const struct gprefix pfx_0f_28_0f_29 = {
@@ -3920,6 +3927,10 @@ static const struct escape escape_dd = { {
         N, N, N, N, N, N, N, N,
  } };
  
+static const struct instr_dual instr_dual_0f_c3 = {
+       I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
+};
+
  static const struct opcode opcode_table[256] = {
         /* 0x00 - 0x07 */
         F6ALU(Lock, em_add),
@@ -3964,7 +3975,7 @@ static const struct opcode opcode_table[256] = {
         I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
         I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
         /* 0x70 - 0x7F */
-       X16(D(SrcImmByte)),
+       X16(D(SrcImmByte | NearBranch)),
         /* 0x80 - 0x87 */
         G(ByteOp | DstMem | SrcImm, group1),
         G(DstMem | SrcImm, group1),
@@ -3991,20 +4002,20 @@ static const struct opcode opcode_table[256] = {
         I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
         I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
         I2bv(SrcSI | DstDI | Mov | String, em_mov),
-       F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
+       F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
         /* 0xA8 - 0xAF */
         F2bv(DstAcc | SrcImm | NoWrite, em_test),
         I2bv(SrcAcc | DstDI | Mov | String, em_mov),
         I2bv(SrcSI | DstAcc | Mov | String, em_mov),
-       F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
+       F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
         /* 0xB0 - 0xB7 */
         X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
         /* 0xB8 - 0xBF */
         X8(I(DstReg | SrcImm64 | Mov, em_mov)),
         /* 0xC0 - 0xC7 */
         G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
-       I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
-       I(ImplicitOps | Stack, em_ret),
+       I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
+       I(ImplicitOps | NearBranch, em_ret),
         I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
         I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
         G(ByteOp, group11), G(0, group11),
@@ -4024,13 +4035,14 @@ static const struct opcode opcode_table[256] = {
         /* 0xD8 - 0xDF */
         N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
         /* 0xE0 - 0xE7 */
-       X3(I(SrcImmByte, em_loop)),
-       I(SrcImmByte, em_jcxz),
+       X3(I(SrcImmByte | NearBranch, em_loop)),
+       I(SrcImmByte | NearBranch, em_jcxz),
         I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
         I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
         /* 0xE8 - 0xEF */
-       I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
-       I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
+       I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
+       I(SrcImmFAddr | No64, em_jmp_far),
+       D(SrcImmByte | ImplicitOps | NearBranch),
         I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
         I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
         /* 0xF0 - 0xF7 */
@@ -4090,7 +4102,7 @@ static const struct opcode twobyte_table[256] = {
         N, N, N, N,
         N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
         /* 0x80 - 0x8F */
-       X16(D(SrcImm)),
+       X16(D(SrcImm | NearBranch)),
         /* 0x90 - 0x9F */
         X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
         /* 0xA0 - 0xA7 */
@@ -4121,7 +4133,7 @@ static const struct opcode twobyte_table[256] = {
         D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
         /* 0xC0 - 0xC7 */
         F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
-       N, D(DstMem | SrcReg | ModRM | Mov),
+       N, ID(0, &instr_dual_0f_c3),
         N, N, N, GD(0, &group9),
         /* 0xC8 - 0xCF */
         X8(I(DstReg, em_bswap)),
@@ -4134,12 +4146,20 @@ static const struct opcode twobyte_table[256] = {
         N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
  };
  
+static const struct instr_dual instr_dual_0f_38_f0 = {
+       I(DstReg | SrcMem | Mov, em_movbe), N
+};
+
+static const struct instr_dual instr_dual_0f_38_f1 = {
+       I(DstMem | SrcReg | Mov, em_movbe), N
+};
+
  static const struct gprefix three_byte_0f_38_f0 = {
-       I(DstReg | SrcMem | Mov, em_movbe), N, N, N
+       ID(0, &instr_dual_0f_38_f0), N, N, N
  };
  
  static const struct gprefix three_byte_0f_38_f1 = {
-       I(DstMem | SrcReg | Mov, em_movbe), N, N, N
+       ID(0, &instr_dual_0f_38_f1), N, N, N
  };
  
  /*
@@ -4152,8 +4172,8 @@ static const struct opcode opcode_map_0f_38[256] = {
         /* 0x80 - 0xef */
         X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
         /* 0xf0 - 0xf1 */
-       GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
-       GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
+       GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
+       GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
         /* 0xf2 - 0xff */
         N, N, X4(N), X8(N)
  };
@@ -4275,7 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
                 op->type = OP_MEM;
                 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
                 op->addr.mem.ea =
-                       register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
+                       register_address(ctxt, VCPU_REGS_RDI);
                 op->addr.mem.seg = VCPU_SREG_ES;
                 op->val = 0;
                 op->count = 1;
@@ -4329,7 +4349,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
                 op->type = OP_MEM;
                 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
                 op->addr.mem.ea =
-                       register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
+                       register_address(ctxt, VCPU_REGS_RSI);
                 op->addr.mem.seg = ctxt->seg_override;
                 op->val = 0;
                 op->count = 1;
@@ -4338,7 +4358,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
                 op->type = OP_MEM;
                 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
                 op->addr.mem.ea =
-                       register_address(ctxt,
+                       address_mask(ctxt,
                                 reg_read(ctxt, VCPU_REGS_RBX) +
                                 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
                 op->addr.mem.seg = ctxt->seg_override;
@@ -4510,8 +4530,7 @@ done_prefixes:
  
         /* vex-prefix instructions are not implemented */
         if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
-           (mode == X86EMUL_MODE_PROT64 ||
-           (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
+           (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
                 ctxt->d = NotImpl;
         }
  
@@ -4549,6 +4568,12 @@ done_prefixes:
                         else
                                 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
                         break;
+               case InstrDual:
+                       if ((ctxt->modrm >> 6) == 3)
+                               opcode = opcode.u.idual->mod3;
+                       else
+                               opcode = opcode.u.idual->mod012;
+                       break;
                 default:
                         return EMULATION_FAILED;
                 }
@@ -4567,7 +4592,8 @@ done_prefixes:
                 return EMULATION_FAILED;
  
         if (unlikely(ctxt->d &
-                    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+           (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
+            No16))) {
                 /*
                  * These are copied unconditionally here, and checked unconditionally
                  * in x86_emulate_insn.
@@ -4578,8 +4604,12 @@ done_prefixes:
                 if (ctxt->d & NotImpl)
                         return EMULATION_FAILED;
  
-               if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
-                       ctxt->op_bytes = 8;
+               if (mode == X86EMUL_MODE_PROT64) {
+                       if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
+                               ctxt->op_bytes = 8;
+                       else if (ctxt->d & NearBranch)
+                               ctxt->op_bytes = 8;
+               }
  
                 if (ctxt->d & Op3264) {
                         if (mode == X86EMUL_MODE_PROT64)
@@ -4588,6 +4618,9 @@ done_prefixes:
                                 ctxt->op_bytes = 4;
                 }
  
+               if ((ctxt->d & No16) && ctxt->op_bytes == 2)
+                       ctxt->op_bytes = 4;
+
                 if (ctxt->d & Sse)
                         ctxt->op_bytes = 16;
                 else if (ctxt->d & Mmx)
@@ -4631,7 +4664,8 @@ done_prefixes:
         rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
  
         if (ctxt->rip_relative)
-               ctxt->memopp->addr.mem.ea += ctxt->_eip;
+               ctxt->memopp->addr.mem.ea = address_mask(ctxt,
+                                       ctxt->memopp->addr.mem.ea + ctxt->_eip);
  
  done:
         return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4775,6 +4809,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
                                 goto done;
                 }
  
+               /* Instruction can only be executed in protected mode */
+               if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
+                       rc = emulate_ud(ctxt);
+                       goto done;
+               }
+
                 /* Privileged instruction can be executed only in CPL=0 */
                 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
                         if (ctxt->d & PrivUD)
@@ -4784,12 +4824,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
                         goto done;
                 }
  
-               /* Instruction can only be executed in protected mode */
-               if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
-                       rc = emulate_ud(ctxt);
-                       goto done;
-               }
-
                 /* Do instruction specific permission checks */
                 if (ctxt->d & CheckPerm) {
                         rc = ctxt->check_perm(ctxt);
@@ -4974,8 +5008,7 @@ writeback:
                         count = ctxt->src.count;
                 else
                         count = ctxt->dst.count;
-               register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
-                               -count);
+               register_address_increment(ctxt, VCPU_REGS_RCX, -count);
  
                 if (!string_insn_completed(ctxt)) {
                         /*
@@ -5053,11 +5086,6 @@ twobyte_insn:
                 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
                                                         (s16) ctxt->src.val;
                 break;
-       case 0xc3:              /* movnti */
-               ctxt->dst.bytes = ctxt->op_bytes;
-               ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
-                                                       (u32) ctxt->src.val;
-               break;
         default:
                 goto cannot_emulate;
         }
diff --git a/virt/kvm/ioapic.c b/arch/x86/kvm/ioapic.c

similarity index 98%

rename from virt/kvm/ioapic.c

rename to arch/x86/kvm/ioapic.c

index 0ba4057d271befe7f166f52f66cbd2cdaf9f4ea5..b1947e0f3e100d7552add9688125d9c86d29a8ab 100644 (file)
--- a/virt/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -270,7 +270,6 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
         spin_unlock(&ioapic->lock);
  }
  
-#ifdef CONFIG_X86
  void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
  {
         struct kvm_ioapic *ioapic = kvm->arch.vioapic;
@@ -279,12 +278,6 @@ void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
                 return;
         kvm_make_scan_ioapic_request(kvm);
  }
-#else
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
-{
-       return;
-}
-#endif
  
  static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
  {
@@ -586,11 +579,6 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
         case IOAPIC_REG_WINDOW:
                 ioapic_write_indirect(ioapic, data);
                 break;
-#ifdef CONFIG_IA64
-       case IOAPIC_REG_EOI:
-               __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
-               break;
-#endif
  
         default:
                 break;
diff --git a/virt/kvm/ioapic.h b/arch/x86/kvm/ioapic.h

similarity index 89%

rename from virt/kvm/ioapic.h

rename to arch/x86/kvm/ioapic.h

index e23b70634f1ea88488525a6e9c7a61af0638de01..3c9195535ffc946adff3961691c3f80457b0e53c 100644 (file)
--- a/virt/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -19,7 +19,6 @@ struct kvm_vcpu;
  /* Direct registers. */
  #define IOAPIC_REG_SELECT  0x00
  #define IOAPIC_REG_WINDOW  0x10
-#define IOAPIC_REG_EOI     0x40        /* IA64 IOSAPIC only */
  
  /* Indirect registers. */
  #define IOAPIC_REG_APIC_ID 0x00        /* x86 IOAPIC only */
@@ -45,6 +44,23 @@ struct rtc_status {
         DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
  };
  
+union kvm_ioapic_redirect_entry {
+       u64 bits;
+       struct {
+               u8 vector;
+               u8 delivery_mode:3;
+               u8 dest_mode:1;
+               u8 delivery_status:1;
+               u8 polarity:1;
+               u8 remote_irr:1;
+               u8 trig_mode:1;
+               u8 mask:1;
+               u8 reserve:7;
+               u8 reserved[4];
+               u8 dest_id;
+       } fields;
+};
+
  struct kvm_ioapic {
         u64 base_address;
         u32 ioregsel;
@@ -83,7 +99,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
  
  void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
  int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-               int short_hand, int dest, int dest_mode);
+               int short_hand, unsigned int dest, int dest_mode);
  int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
  void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
                         int trigger_mode);
@@ -97,7 +113,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
                 struct kvm_lapic_irq *irq, unsigned long *dest_map);
  int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
  int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
  void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
                         u32 *tmr);
  
diff --git a/virt/kvm/iommu.c b/arch/x86/kvm/iommu.c

similarity index 96%

rename from virt/kvm/iommu.c

rename to arch/x86/kvm/iommu.c

index c1e6ae989a432165f9d6871626929ed2ecb578e8..17b73eeac8a499ce1490a0c5b3a497085048378b 100644 (file)
--- a/virt/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -31,6 +31,7 @@
  #include <linux/dmar.h>
  #include <linux/iommu.h>
  #include <linux/intel-iommu.h>
+#include "assigned-dev.h"
  
  static bool allow_unsafe_assigned_interrupts;
  module_param_named(allow_unsafe_assigned_interrupts,
@@ -169,10 +170,8 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
         return r;
  }
  
-int kvm_assign_device(struct kvm *kvm,
-                     struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
  {
-       struct pci_dev *pdev = NULL;
         struct iommu_domain *domain = kvm->arch.iommu_domain;
         int r;
         bool noncoherent;
@@ -181,7 +180,6 @@ int kvm_assign_device(struct kvm *kvm,
         if (!domain)
                 return 0;
  
-       pdev = assigned_dev->dev;
         if (pdev == NULL)
                 return -ENODEV;
  
@@ -212,17 +210,14 @@ out_unmap:
         return r;
  }
  
-int kvm_deassign_device(struct kvm *kvm,
-                       struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
  {
         struct iommu_domain *domain = kvm->arch.iommu_domain;
-       struct pci_dev *pdev = NULL;
  
         /* check if iommu exists and in use */
         if (!domain)
                 return 0;
  
-       pdev = assigned_dev->dev;
         if (pdev == NULL)
                 return -ENODEV;
  
diff --git a/virt/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c

similarity index 89%

rename from virt/kvm/irq_comm.c

rename to arch/x86/kvm/irq_comm.c

index 963b8995a9e8a8035ad4df200afb20ca55d6fa1f..72298b3ac025a8dc820f4593e5c7f2278f3bb35a 100644 (file)
--- a/virt/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -26,9 +26,6 @@
  #include <trace/events/kvm.h>
  
  #include <asm/msidef.h>
-#ifdef CONFIG_IA64
-#include <asm/iosapic.h>
-#endif
  
  #include "irq.h"
  
@@ -38,12 +35,8 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
                            struct kvm *kvm, int irq_source_id, int level,
                            bool line_status)
  {
-#ifdef CONFIG_X86
         struct kvm_pic *pic = pic_irqchip(kvm);
         return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
-#else
-       return -1;
-#endif
  }
  
  static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
@@ -57,12 +50,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
  
  inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
  {
-#ifdef CONFIG_IA64
-       return irq->delivery_mode ==
-               (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
-#else
         return irq->delivery_mode == APIC_DM_LOWEST;
-#endif
  }
  
  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
@@ -202,9 +190,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
         }
  
         ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
         ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
         set_bit(irq_source_id, bitmap);
  unlock:
         mutex_unlock(&kvm->irq_lock);
@@ -215,9 +201,7 @@ unlock:
  void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
  {
         ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
         ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
  
         mutex_lock(&kvm->irq_lock);
         if (irq_source_id < 0 ||
@@ -230,9 +214,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
                 goto unlock;
  
         kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
-#ifdef CONFIG_X86
         kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
-#endif
  unlock:
         mutex_unlock(&kvm->irq_lock);
  }
@@ -242,7 +224,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
  {
         mutex_lock(&kvm->irq_lock);
         kimn->irq = irq;
-       hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
+       hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
         mutex_unlock(&kvm->irq_lock);
  }
  
@@ -264,7 +246,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
         idx = srcu_read_lock(&kvm->irq_srcu);
         gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
         if (gsi != -1)
-               hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
+               hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
                         if (kimn->irq == gsi)
                                 kimn->func(kimn, mask);
         srcu_read_unlock(&kvm->irq_srcu, idx);
@@ -322,16 +304,11 @@ out:
           .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
  #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
  
-#ifdef CONFIG_X86
-#  define PIC_ROUTING_ENTRY(irq) \
+#define PIC_ROUTING_ENTRY(irq) \
         { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
           .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
-#  define ROUTING_ENTRY2(irq) \
+#define ROUTING_ENTRY2(irq) \
         IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
-#else
-#  define ROUTING_ENTRY2(irq) \
-       IOAPIC_ROUTING_ENTRY(irq)
-#endif
  
  static const struct kvm_irq_routing_entry default_routing[] = {
         ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
@@ -346,20 +323,6 @@ static const struct kvm_irq_routing_entry default_routing[] = {
         ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
         ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
         ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
-#ifdef CONFIG_IA64
-       ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
-       ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
-       ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
-       ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
-       ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
-       ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
-       ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
-       ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
-       ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
-       ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
-       ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
-       ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
-#endif
  };
  
  int kvm_setup_default_irq_routing(struct kvm *kvm)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c

index b8345dd41b251e6dd81869709be30303ca2fbacf..4f0c0b954686cbf5e980f761b5b9bd4f7bc2df9a 100644 (file)
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -68,6 +68,9 @@
  #define MAX_APIC_VECTOR                        256
  #define APIC_VECTORS_PER_REG           32
  
+#define APIC_BROADCAST                 0xFF
+#define X2APIC_BROADCAST               0xFFFFFFFFul
+
  #define VEC_POS(v) ((v) & (32 - 1))
  #define REG_POS(v) (((v) >> 5) << 4)
  
@@ -129,8 +132,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
         return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
  }
  
-#define KVM_X2APIC_CID_BITS 0
-
  static void recalculate_apic_map(struct kvm *kvm)
  {
         struct kvm_apic_map *new, *old = NULL;
@@ -149,42 +150,56 @@ static void recalculate_apic_map(struct kvm *kvm)
         new->cid_shift = 8;
         new->cid_mask = 0;
         new->lid_mask = 0xff;
+       new->broadcast = APIC_BROADCAST;
  
         kvm_for_each_vcpu(i, vcpu, kvm) {
                 struct kvm_lapic *apic = vcpu->arch.apic;
-               u16 cid, lid;
-               u32 ldr;
  
                 if (!kvm_apic_present(vcpu))
                         continue;
  
+               if (apic_x2apic_mode(apic)) {
+                       new->ldr_bits = 32;
+                       new->cid_shift = 16;
+                       new->cid_mask = new->lid_mask = 0xffff;
+                       new->broadcast = X2APIC_BROADCAST;
+               } else if (kvm_apic_get_reg(apic, APIC_LDR)) {
+                       if (kvm_apic_get_reg(apic, APIC_DFR) ==
+                                                       APIC_DFR_CLUSTER) {
+                               new->cid_shift = 4;
+                               new->cid_mask = 0xf;
+                               new->lid_mask = 0xf;
+                       } else {
+                               new->cid_shift = 8;
+                               new->cid_mask = 0;
+                               new->lid_mask = 0xff;
+                       }
+               }
+
                 /*
                  * All APICs have to be configured in the same mode by an OS.
                  * We take advatage of this while building logical id loockup
-                * table. After reset APICs are in xapic/flat mode, so if we
-                * find apic with different setting we assume this is the mode
+                * table. After reset APICs are in software disabled mode, so if
+                * we find apic with different setting we assume this is the mode
                  * OS wants all apics to be in; build lookup table accordingly.
                  */
-               if (apic_x2apic_mode(apic)) {
-                       new->ldr_bits = 32;
-                       new->cid_shift = 16;
-                       new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1;
-                       new->lid_mask = 0xffff;
-               } else if (kvm_apic_sw_enabled(apic) &&
-                               !new->cid_mask /* flat mode */ &&
-                               kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
-                       new->cid_shift = 4;
-                       new->cid_mask = 0xf;
-                       new->lid_mask = 0xf;
-               }
+               if (kvm_apic_sw_enabled(apic))
+                       break;
+       }
  
-               new->phys_map[kvm_apic_id(apic)] = apic;
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               struct kvm_lapic *apic = vcpu->arch.apic;
+               u16 cid, lid;
+               u32 ldr, aid;
  
+               aid = kvm_apic_id(apic);
                 ldr = kvm_apic_get_reg(apic, APIC_LDR);
                 cid = apic_cluster_id(new, ldr);
                 lid = apic_logical_id(new, ldr);
  
-               if (lid)
+               if (aid < ARRAY_SIZE(new->phys_map))
+                       new->phys_map[aid] = apic;
+               if (lid && cid < ARRAY_SIZE(new->logical_map))
                         new->logical_map[cid][ffs(lid) - 1] = apic;
         }
  out:
@@ -201,11 +216,13 @@ out:
  
  static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
  {
-       u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+       bool enabled = val & APIC_SPIV_APIC_ENABLED;
  
         apic_set_reg(apic, APIC_SPIV, val);
-       if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
-               if (val & APIC_SPIV_APIC_ENABLED) {
+
+       if (enabled != apic->sw_enabled) {
+               apic->sw_enabled = enabled;
+               if (enabled) {
                         static_key_slow_dec_deferred(&apic_sw_disabled);
                         recalculate_apic_map(apic->vcpu->kvm);
                 } else
@@ -237,21 +254,17 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
  
  static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
  {
-       return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-               apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
+       return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
  }
  
  static inline int apic_lvtt_period(struct kvm_lapic *apic)
  {
-       return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-               apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
+       return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
  }
  
  static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
  {
-       return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-               apic->lapic_timer.timer_mode_mask) ==
-                       APIC_LVT_TIMER_TSCDEADLINE);
+       return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
  }
  
  static inline int apic_lvt_nmi_mode(u32 lvt_val)
@@ -326,8 +339,12 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
  
  static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
  {
-       apic->irr_pending = true;
         apic_set_vector(vec, apic->regs + APIC_IRR);
+       /*
+        * irr_pending must be true if any interrupt is pending; set it after
+        * APIC_IRR to avoid race with apic_clear_irr
+        */
+       apic->irr_pending = true;
  }
  
  static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -359,13 +376,15 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
  
         vcpu = apic->vcpu;
  
-       apic_clear_vector(vec, apic->regs + APIC_IRR);
-       if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+       if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
                 /* try to update RVI */
+               apic_clear_vector(vec, apic->regs + APIC_IRR);
                 kvm_make_request(KVM_REQ_EVENT, vcpu);
-       else {
-               vec = apic_search_irr(apic);
-               apic->irr_pending = (vec != -1);
+       } else {
+               apic->irr_pending = false;
+               apic_clear_vector(vec, apic->regs + APIC_IRR);
+               if (apic_search_irr(apic) != -1)
+                       apic->irr_pending = true;
         }
  }
  
@@ -558,16 +577,25 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
         apic_update_ppr(apic);
  }
  
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+{
+       return dest == (apic_x2apic_mode(apic) ?
+                       X2APIC_BROADCAST : APIC_BROADCAST);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
  {
-       return dest == 0xff || kvm_apic_id(apic) == dest;
+       return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
  }
  
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
  {
         int result = 0;
         u32 logical_id;
  
+       if (kvm_apic_broadcast(apic, mda))
+               return 1;
+
         if (apic_x2apic_mode(apic)) {
                 logical_id = kvm_apic_get_reg(apic, APIC_LDR);
                 return logical_id & mda;
@@ -595,7 +623,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
  }
  
  int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-                          int short_hand, int dest, int dest_mode)
+                          int short_hand, unsigned int dest, int dest_mode)
  {
         int result = 0;
         struct kvm_lapic *target = vcpu->arch.apic;
@@ -657,15 +685,24 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
         if (!map)
                 goto out;
  
+       if (irq->dest_id == map->broadcast)
+               goto out;
+
+       ret = true;
+
         if (irq->dest_mode == 0) { /* physical mode */
-               if (irq->delivery_mode == APIC_DM_LOWEST ||
-                               irq->dest_id == 0xff)
+               if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
                         goto out;
-               dst = &map->phys_map[irq->dest_id & 0xff];
+
+               dst = &map->phys_map[irq->dest_id];
         } else {
                 u32 mda = irq->dest_id << (32 - map->ldr_bits);
+               u16 cid = apic_cluster_id(map, mda);
+
+               if (cid >= ARRAY_SIZE(map->logical_map))
+                       goto out;
  
-               dst = map->logical_map[apic_cluster_id(map, mda)];
+               dst = map->logical_map[cid];
  
                 bitmap = apic_logical_id(map, mda);
  
@@ -691,8 +728,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
                         *r = 0;
                 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
         }
-
-       ret = true;
  out:
         rcu_read_unlock();
         return ret;
@@ -1034,6 +1069,26 @@ static void update_divide_count(struct kvm_lapic *apic)
                                    apic->divide_count);
  }
  
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+       struct kvm_vcpu *vcpu = apic->vcpu;
+       wait_queue_head_t *q = &vcpu->wq;
+
+       /*
+        * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
+        * vcpu_enter_guest.
+        */
+       if (atomic_read(&apic->lapic_timer.pending))
+               return;
+
+       atomic_inc(&apic->lapic_timer.pending);
+       /* FIXME: this code should not know anything about vcpus */
+       kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+
+       if (waitqueue_active(q))
+               wake_up_interruptible(q);
+}
+
  static void start_apic_timer(struct kvm_lapic *apic)
  {
         ktime_t now;
@@ -1096,9 +1151,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
                 if (likely(tscdeadline > guest_tsc)) {
                         ns = (tscdeadline - guest_tsc) * 1000000ULL;
                         do_div(ns, this_tsc_khz);
-               }
-               hrtimer_start(&apic->lapic_timer.timer,
-                       ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+                       hrtimer_start(&apic->lapic_timer.timer,
+                               ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+               } else
+                       apic_timer_expired(apic);
  
                 local_irq_restore(flags);
         }
@@ -1203,17 +1259,20 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
  
                 break;
  
-       case APIC_LVTT:
-               if ((kvm_apic_get_reg(apic, APIC_LVTT) &
-                   apic->lapic_timer.timer_mode_mask) !=
-                  (val & apic->lapic_timer.timer_mode_mask))
+       case APIC_LVTT: {
+               u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
+
+               if (apic->lapic_timer.timer_mode != timer_mode) {
+                       apic->lapic_timer.timer_mode = timer_mode;
                         hrtimer_cancel(&apic->lapic_timer.timer);
+               }
  
                 if (!kvm_apic_sw_enabled(apic))
                         val |= APIC_LVT_MASKED;
                 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
                 apic_set_reg(apic, APIC_LVTT, val);
                 break;
+       }
  
         case APIC_TMICT:
                 if (apic_lvtt_tscdeadline(apic))
@@ -1320,7 +1379,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
         if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
                 static_key_slow_dec_deferred(&apic_hw_disabled);
  
-       if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+       if (!apic->sw_enabled)
                 static_key_slow_dec_deferred(&apic_sw_disabled);
  
         if (apic->regs)
@@ -1355,9 +1414,6 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
                 return;
  
         hrtimer_cancel(&apic->lapic_timer.timer);
-       /* Inject here so clearing tscdeadline won't override new value */
-       if (apic_has_pending_timer(vcpu))
-               kvm_inject_apic_timer_irqs(vcpu);
         apic->lapic_timer.tscdeadline = data;
         start_apic_timer(apic);
  }
@@ -1422,6 +1478,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
         apic->base_address = apic->vcpu->arch.apic_base &
                              MSR_IA32_APICBASE_BASE;
  
+       if ((value & MSR_IA32_APICBASE_ENABLE) &&
+            apic->base_address != APIC_DEFAULT_PHYS_BASE)
+               pr_warn_once("APIC base relocation is unsupported by KVM");
+
         /* with FSB delivery interrupt, we can restart APIC functionality */
         apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
                    "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
@@ -1447,6 +1507,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
  
         for (i = 0; i < APIC_LVT_NUM; i++)
                 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+       apic->lapic_timer.timer_mode = 0;
         apic_set_reg(apic, APIC_LVT0,
                      SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
  
@@ -1538,23 +1599,8 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
  {
         struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
         struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
-       struct kvm_vcpu *vcpu = apic->vcpu;
-       wait_queue_head_t *q = &vcpu->wq;
-
-       /*
-        * There is a race window between reading and incrementing, but we do
-        * not care about potentially losing timer events in the !reinject
-        * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
-        * in vcpu_enter_guest.
-        */
-       if (!atomic_read(&ktimer->pending)) {
-               atomic_inc(&ktimer->pending);
-               /* FIXME: this code should not know anything about vcpus */
-               kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-       }
  
-       if (waitqueue_active(q))
-               wake_up_interruptible(q);
+       apic_timer_expired(apic);
  
         if (lapic_is_periodic(apic)) {
                 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
@@ -1693,6 +1739,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
         apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
                                 1 : count_vectors(apic->regs + APIC_ISR);
         apic->highest_isr_cache = -1;
+       if (kvm_x86_ops->hwapic_irr_update)
+               kvm_x86_ops->hwapic_irr_update(vcpu,
+                               apic_find_highest_irr(apic));
         kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
         kvm_make_request(KVM_REQ_EVENT, vcpu);
         kvm_rtc_eoi_tracking_restore_one(vcpu);
@@ -1837,8 +1886,11 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
         if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
                 return 1;
  
+       if (reg == APIC_ICR2)
+               return 1;
+
         /* if this is ICR write vector before command */
-       if (msr == 0x830)
+       if (reg == APIC_ICR)
                 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
         return apic_reg_write(apic, reg, (u32)data);
  }
@@ -1851,9 +1903,15 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
         if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
                 return 1;
  
+       if (reg == APIC_DFR || reg == APIC_ICR2) {
+               apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
+                          reg);
+               return 1;
+       }
+
         if (apic_reg_read(apic, reg, 4, &low))
                 return 1;
-       if (msr == 0x830)
+       if (reg == APIC_ICR)
                 apic_reg_read(apic, APIC_ICR2, 4, &high);
  
         *data = (((u64)high) << 32) | low;
@@ -1908,7 +1966,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
  void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
  {
         struct kvm_lapic *apic = vcpu->arch.apic;
-       unsigned int sipi_vector;
+       u8 sipi_vector;
         unsigned long pe;
  
         if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h

index 6a11845fd8b94435a383823a1e559ba153dbaaca..c674fce53cf90c0defe160673d418b992065efb6 100644 (file)
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -11,6 +11,7 @@
  struct kvm_timer {
         struct hrtimer timer;
         s64 period;                             /* unit: ns */
+       u32 timer_mode;
         u32 timer_mode_mask;
         u64 tscdeadline;
         atomic_t pending;                       /* accumulated triggered timers */
@@ -22,6 +23,7 @@ struct kvm_lapic {
         struct kvm_timer lapic_timer;
         u32 divide_count;
         struct kvm_vcpu *vcpu;
+       bool sw_enabled;
         bool irr_pending;
         /* Number of bits set in ISR. */
         s16 isr_count;
@@ -55,8 +57,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
  
  void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
  void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
                 unsigned long *dest_map);
  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@ -119,11 +121,11 @@ static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
  
  extern struct static_key_deferred apic_sw_disabled;
  
-static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
  {
         if (static_key_false(&apic_sw_disabled.key))
-               return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
-       return APIC_SPIV_APIC_ENABLED;
+               return apic->sw_enabled;
+       return true;
  }
  
  static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
@@ -152,8 +154,6 @@ static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
         ldr >>= 32 - map->ldr_bits;
         cid = (ldr >> map->cid_shift) & map->cid_mask;
  
-       BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
-
         return cid;
  }
  
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index 978f402006eef21ee569720a0d573a6a48e12c97..10fbed126b1121ae5fde2f7ccfbf04133b5f8771 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -214,13 +214,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
  #define MMIO_GEN_LOW_SHIFT             10
  #define MMIO_GEN_LOW_MASK              ((1 << MMIO_GEN_LOW_SHIFT) - 2)
  #define MMIO_GEN_MASK                  ((1 << MMIO_GEN_SHIFT) - 1)
-#define MMIO_MAX_GEN                   ((1 << MMIO_GEN_SHIFT) - 1)
  
  static u64 generation_mmio_spte_mask(unsigned int gen)
  {
         u64 mask;
  
-       WARN_ON(gen > MMIO_MAX_GEN);
+       WARN_ON(gen & ~MMIO_GEN_MASK);
  
         mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
         mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
@@ -263,13 +262,13 @@ static bool is_mmio_spte(u64 spte)
  
  static gfn_t get_mmio_spte_gfn(u64 spte)
  {
-       u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+       u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
         return (spte & ~mask) >> PAGE_SHIFT;
  }
  
  static unsigned get_mmio_spte_access(u64 spte)
  {
-       u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+       u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
         return (spte & ~mask) & ~PAGE_MASK;
  }
  
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index 7527cefc5a43ac9f24a6b1bea6c1eab43299421f..41dd0387cccb639b49fcb99c99035d3691f2464a 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1056,9 +1056,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
  {
         struct vcpu_svm *svm = to_svm(vcpu);
  
-       WARN_ON(adjustment < 0);
-       if (host)
-               adjustment = svm_scale_tsc(vcpu, adjustment);
+       if (host) {
+               if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
+                       WARN_ON(adjustment < 0);
+               adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
+       }
  
         svm->vmcb->control.tsc_offset += adjustment;
         if (is_guest_mode(vcpu))
@@ -2999,7 +3001,6 @@ static int dr_interception(struct vcpu_svm *svm)
  {
         int reg, dr;
         unsigned long val;
-       int err;
  
         if (svm->vcpu.guest_debug == 0) {
                 /*
@@ -3019,12 +3020,15 @@ static int dr_interception(struct vcpu_svm *svm)
         dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
  
         if (dr >= 16) { /* mov to DRn */
+               if (!kvm_require_dr(&svm->vcpu, dr - 16))
+                       return 1;
                 val = kvm_register_read(&svm->vcpu, reg);
                 kvm_set_dr(&svm->vcpu, dr - 16, val);
         } else {
-               err = kvm_get_dr(&svm->vcpu, dr, &val);
-               if (!err)
-                       kvm_register_write(&svm->vcpu, reg, val);
+               if (!kvm_require_dr(&svm->vcpu, dr))
+                       return 1;
+               kvm_get_dr(&svm->vcpu, dr, &val);
+               kvm_register_write(&svm->vcpu, reg, val);
         }
  
         skip_emulated_instruction(&svm->vcpu);
@@ -4123,6 +4127,11 @@ static bool svm_mpx_supported(void)
         return false;
  }
  
+static bool svm_xsaves_supported(void)
+{
+       return false;
+}
+
  static bool svm_has_wbinvd_exit(void)
  {
         return true;
@@ -4410,6 +4419,7 @@ static struct kvm_x86_ops svm_x86_ops = {
         .rdtscp_supported = svm_rdtscp_supported,
         .invpcid_supported = svm_invpcid_supported,
         .mpx_supported = svm_mpx_supported,
+       .xsaves_supported = svm_xsaves_supported,
  
         .set_supported_cpuid = svm_set_supported_cpuid,
  
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h

index 6b06ab8748dd5ca2657ebf1cb87f039aff8f4c64..c2a34bb5ad93a8a3b5336d941c889887124d494f 100644 (file)
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -5,6 +5,7 @@
  #include <asm/vmx.h>
  #include <asm/svm.h>
  #include <asm/clocksource.h>
+#include <asm/pvclock-abi.h>
  
  #undef TRACE_SYSTEM
  #define TRACE_SYSTEM kvm
@@ -877,6 +878,42 @@ TRACE_EVENT(kvm_ple_window,
  #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
         trace_kvm_ple_window(false, vcpu_id, new, old)
  
+TRACE_EVENT(kvm_pvclock_update,
+       TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
+       TP_ARGS(vcpu_id, pvclock),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   vcpu_id                 )
+               __field(        __u32,          version                 )
+               __field(        __u64,          tsc_timestamp           )
+               __field(        __u64,          system_time             )
+               __field(        __u32,          tsc_to_system_mul       )
+               __field(        __s8,           tsc_shift               )
+               __field(        __u8,           flags                   )
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id           = vcpu_id;
+               __entry->version           = pvclock->version;
+               __entry->tsc_timestamp     = pvclock->tsc_timestamp;
+               __entry->system_time       = pvclock->system_time;
+               __entry->tsc_to_system_mul = pvclock->tsc_to_system_mul;
+               __entry->tsc_shift         = pvclock->tsc_shift;
+               __entry->flags             = pvclock->flags;
+       ),
+
+       TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, "
+                 "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, "
+                 "flags 0x%x }",
+                 __entry->vcpu_id,
+                 __entry->version,
+                 __entry->tsc_timestamp,
+                 __entry->system_time,
+                 __entry->tsc_to_system_mul,
+                 __entry->tsc_shift,
+                 __entry->flags)
+);
+
  #endif /* _TRACE_KVM_H */
  
  #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 3e556c68351b7a1356c05e7fe94c6c0895d9755d..feb852b04598b63d187b0870db26d008c81a13d8 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
  static bool __read_mostly nested = 0;
  module_param(nested, bool, S_IRUGO);
  
+static u64 __read_mostly host_xss;
+
  #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
  #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
  #define KVM_VM_CR0_ALWAYS_ON                                           \
         (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
  #define KVM_CR4_GUEST_OWNED_BITS                                     \
         (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
-        | X86_CR4_OSXMMEXCPT)
+        | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
  
  #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
  #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -214,6 +216,7 @@ struct __packed vmcs12 {
         u64 virtual_apic_page_addr;
         u64 apic_access_addr;
         u64 ept_pointer;
+       u64 xss_exit_bitmap;
         u64 guest_physical_address;
         u64 vmcs_link_pointer;
         u64 guest_ia32_debugctl;
@@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
         FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
         FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
         FIELD64(EPT_POINTER, ept_pointer),
+       FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
         FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
         FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
         FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
@@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = {
         FIELD(HOST_RSP, host_rsp),
         FIELD(HOST_RIP, host_rip),
  };
-static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table);
  
  static inline short vmcs_field_to_offset(unsigned long field)
  {
-       if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0)
-               return -1;
+       BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+
+       if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
+           vmcs_field_to_offset_table[field] == 0)
+               return -ENOENT;
+
         return vmcs_field_to_offset_table[field];
  }
  
@@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa);
  static void kvm_cpu_vmxon(u64 addr);
  static void kvm_cpu_vmxoff(void);
  static bool vmx_mpx_supported(void);
+static bool vmx_xsaves_supported(void);
  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
  static void vmx_set_segment(struct kvm_vcpu *vcpu,
                             struct kvm_segment *var, int seg);
@@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
         return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
  }
  
+static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
+{
+       return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
+               vmx_xsaves_supported();
+}
+
  static inline bool is_exception(u32 intr_info)
  {
         return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
         vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
  
         clear_atomic_switch_msr(vmx, MSR_EFER);
-       /* On ept, can't emulate nx, and must switch nx atomically */
-       if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) {
+
+       /*
+        * On EPT, we can't emulate NX, so we must switch EFER atomically.
+        * On CPUs that support "load IA32_EFER", always switch EFER
+        * atomically, since it's faster than switching it manually.
+        */
+       if (cpu_has_load_ia32_efer ||
+           (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
                 guest_efer = vmx->vcpu.arch.efer;
                 if (!(guest_efer & EFER_LMA))
                         guest_efer &= ~EFER_LME;
-               add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer);
+               if (guest_efer != host_efer)
+                       add_atomic_switch_msr(vmx, MSR_EFER,
+                                             guest_efer, host_efer);
                 return false;
         }
  
@@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
         nested_vmx_secondary_ctls_low = 0;
         nested_vmx_secondary_ctls_high &=
                 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-               SECONDARY_EXEC_UNRESTRICTED_GUEST |
-               SECONDARY_EXEC_WBINVD_EXITING;
+               SECONDARY_EXEC_WBINVD_EXITING |
+               SECONDARY_EXEC_XSAVES;
  
         if (enable_ept) {
                 /* nested EPT: emulate EPT also to L1 */
-               nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
+               nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
+                       SECONDARY_EXEC_UNRESTRICTED_GUEST;
                 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
                          VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
                          VMX_EPT_INVEPT_BIT;
@@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                 if (!nested_vmx_allowed(vcpu))
                         return 1;
                 return vmx_get_vmx_msr(vcpu, msr_index, pdata);
+       case MSR_IA32_XSS:
+               if (!vmx_xsaves_supported())
+                       return 1;
+               data = vcpu->arch.ia32_xss;
+               break;
         case MSR_TSC_AUX:
                 if (!to_vmx(vcpu)->rdtscp_enabled)
                         return 1;
@@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 break;
         case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                 return 1; /* they are read-only */
+       case MSR_IA32_XSS:
+               if (!vmx_xsaves_supported())
+                       return 1;
+               /*
+                * The only supported bit as of Skylake is bit 8, but
+                * it is not supported on KVM.
+                */
+               if (data != 0)
+                       return 1;
+               vcpu->arch.ia32_xss = data;
+               if (vcpu->arch.ia32_xss != host_xss)
+                       add_atomic_switch_msr(vmx, MSR_IA32_XSS,
+                               vcpu->arch.ia32_xss, host_xss);
+               else
+                       clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
+               break;
         case MSR_TSC_AUX:
                 if (!vmx->rdtscp_enabled)
                         return 1;
@@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                         SECONDARY_EXEC_ENABLE_INVPCID |
                         SECONDARY_EXEC_APIC_REGISTER_VIRT |
                         SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-                       SECONDARY_EXEC_SHADOW_VMCS;
+                       SECONDARY_EXEC_SHADOW_VMCS |
+                       SECONDARY_EXEC_XSAVES;
                 if (adjust_vmx_controls(min2, opt2,
                                         MSR_IA32_VMX_PROCBASED_CTLS2,
                                         &_cpu_based_2nd_exec_control) < 0)
@@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                 }
         }
  
+       if (cpu_has_xsaves)
+               rdmsrl(MSR_IA32_XSS, host_xss);
+
         return 0;
  }
  
@@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void)
         return 0;
  }
  
-static __init int hardware_setup(void)
-{
-       if (setup_vmcs_config(&vmcs_config) < 0)
-               return -EIO;
-
-       if (boot_cpu_has(X86_FEATURE_NX))
-               kvm_enable_efer_bits(EFER_NX);
-
-       if (!cpu_has_vmx_vpid())
-               enable_vpid = 0;
-       if (!cpu_has_vmx_shadow_vmcs())
-               enable_shadow_vmcs = 0;
-       if (enable_shadow_vmcs)
-               init_vmcs_shadow_fields();
-
-       if (!cpu_has_vmx_ept() ||
-           !cpu_has_vmx_ept_4levels()) {
-               enable_ept = 0;
-               enable_unrestricted_guest = 0;
-               enable_ept_ad_bits = 0;
-       }
-
-       if (!cpu_has_vmx_ept_ad_bits())
-               enable_ept_ad_bits = 0;
-
-       if (!cpu_has_vmx_unrestricted_guest())
-               enable_unrestricted_guest = 0;
-
-       if (!cpu_has_vmx_flexpriority()) {
-               flexpriority_enabled = 0;
-
-               /*
-                * set_apic_access_page_addr() is used to reload apic access
-                * page upon invalidation.  No need to do anything if the
-                * processor does not have the APIC_ACCESS_ADDR VMCS field.
-                */
-               kvm_x86_ops->set_apic_access_page_addr = NULL;
-       }
-
-       if (!cpu_has_vmx_tpr_shadow())
-               kvm_x86_ops->update_cr8_intercept = NULL;
-
-       if (enable_ept && !cpu_has_vmx_ept_2m_page())
-               kvm_disable_largepages();
-
-       if (!cpu_has_vmx_ple())
-               ple_gap = 0;
-
-       if (!cpu_has_vmx_apicv())
-               enable_apicv = 0;
-
-       if (enable_apicv)
-               kvm_x86_ops->update_cr8_intercept = NULL;
-       else {
-               kvm_x86_ops->hwapic_irr_update = NULL;
-               kvm_x86_ops->deliver_posted_interrupt = NULL;
-               kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-       }
-
-       if (nested)
-               nested_vmx_setup_ctls_msrs();
-
-       return alloc_kvm_area();
-}
-
-static __exit void hardware_unsetup(void)
-{
-       free_kvm_area();
-}
-
  static bool emulation_required(struct kvm_vcpu *vcpu)
  {
         return emulate_invalid_guest_state && !guest_state_valid(vcpu);
@@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void)
         kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
  }
  
+#define VMX_XSS_EXIT_BITMAP 0
  /*
   * Sets up the vmcs for emulated real mode.
   */
@@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
         vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
         set_cr4_guest_host_mask(vmx);
  
+       if (vmx_xsaves_supported())
+               vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
+
         return 0;
  }
  
@@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
  static int handle_dr(struct kvm_vcpu *vcpu)
  {
         unsigned long exit_qualification;
-       int dr, reg;
+       int dr, dr7, reg;
+
+       exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+       dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+       /* First, if DR does not exist, trigger UD */
+       if (!kvm_require_dr(vcpu, dr))
+               return 1;
  
         /* Do not handle if the CPL > 0, will trigger GP on re-entry */
         if (!kvm_require_cpl(vcpu, 0))
                 return 1;
-       dr = vmcs_readl(GUEST_DR7);
-       if (dr & DR7_GD) {
+       dr7 = vmcs_readl(GUEST_DR7);
+       if (dr7 & DR7_GD) {
                 /*
                  * As the vm-exit takes precedence over the debug trap, we
                  * need to emulate the latter, either for the host or the
@@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
                  */
                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
                         vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
-                       vcpu->run->debug.arch.dr7 = dr;
-                       vcpu->run->debug.arch.pc =
-                               vmcs_readl(GUEST_CS_BASE) +
-                               vmcs_readl(GUEST_RIP);
+                       vcpu->run->debug.arch.dr7 = dr7;
+                       vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
                         vcpu->run->debug.arch.exception = DB_VECTOR;
                         vcpu->run->exit_reason = KVM_EXIT_DEBUG;
                         return 0;
                 } else {
-                       vcpu->arch.dr7 &= ~DR7_GD;
+                       vcpu->arch.dr6 &= ~15;
                         vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
-                       vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
                         kvm_queue_exception(vcpu, DB_VECTOR);
                         return 1;
                 }
@@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu)
                 return 1;
         }
  
-       exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-       dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
         reg = DEBUG_REG_ACCESS_REG(exit_qualification);
         if (exit_qualification & TYPE_MOV_FROM_DR) {
                 unsigned long val;
@@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
         return 1;
  }
  
+static int handle_xsaves(struct kvm_vcpu *vcpu)
+{
+       skip_emulated_instruction(vcpu);
+       WARN(1, "this should never happen\n");
+       return 1;
+}
+
+static int handle_xrstors(struct kvm_vcpu *vcpu)
+{
+       skip_emulated_instruction(vcpu);
+       WARN(1, "this should never happen\n");
+       return 1;
+}
+
  static int handle_apic_access(struct kvm_vcpu *vcpu)
  {
         if (likely(fasteoi)) {
@@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
         }
  
         /* clear all local breakpoint enable flags */
-       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
+       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
  
         /*
          * TODO: What about debug traps on tss switch?
@@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
         trace_kvm_page_fault(gpa, exit_qualification);
  
         /* It is a write fault? */
-       error_code = exit_qualification & (1U << 1);
+       error_code = exit_qualification & PFERR_WRITE_MASK;
         /* It is a fetch fault? */
-       error_code |= (exit_qualification & (1U << 2)) << 2;
+       error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
         /* ept page table is present? */
-       error_code |= (exit_qualification >> 3) & 0x1;
+       error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
  
         vcpu->arch.exit_qualification = exit_qualification;
  
@@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void)
                                             ple_window_grow, INT_MIN);
  }
  
+static __init int hardware_setup(void)
+{
+       int r = -ENOMEM, i, msr;
+
+       rdmsrl_safe(MSR_EFER, &host_efer);
+
+       for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
+               kvm_define_shared_msr(i, vmx_msr_index[i]);
+
+       vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_io_bitmap_a)
+               return r;
+
+       vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_io_bitmap_b)
+               goto out;
+
+       vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_legacy)
+               goto out1;
+
+       vmx_msr_bitmap_legacy_x2apic =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_legacy_x2apic)
+               goto out2;
+
+       vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_longmode)
+               goto out3;
+
+       vmx_msr_bitmap_longmode_x2apic =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_longmode_x2apic)
+               goto out4;
+       vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_vmread_bitmap)
+               goto out5;
+
+       vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_vmwrite_bitmap)
+               goto out6;
+
+       memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+       memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+
+       /*
+        * Allow direct access to the PC debug port (it is often used for I/O
+        * delays, but the vmexits simply slow things down).
+        */
+       memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+       clear_bit(0x80, vmx_io_bitmap_a);
+
+       memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+
+       memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+       memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+
+       vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+       vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+       vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
+       memcpy(vmx_msr_bitmap_legacy_x2apic,
+                       vmx_msr_bitmap_legacy, PAGE_SIZE);
+       memcpy(vmx_msr_bitmap_longmode_x2apic,
+                       vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+       if (enable_apicv) {
+               for (msr = 0x800; msr <= 0x8ff; msr++)
+                       vmx_disable_intercept_msr_read_x2apic(msr);
+
+               /* According SDM, in x2apic mode, the whole id reg is used.
+                * But in KVM, it only use the highest eight bits. Need to
+                * intercept it */
+               vmx_enable_intercept_msr_read_x2apic(0x802);
+               /* TMCCT */
+               vmx_enable_intercept_msr_read_x2apic(0x839);
+               /* TPR */
+               vmx_disable_intercept_msr_write_x2apic(0x808);
+               /* EOI */
+               vmx_disable_intercept_msr_write_x2apic(0x80b);
+               /* SELF-IPI */
+               vmx_disable_intercept_msr_write_x2apic(0x83f);
+       }
+
+       if (enable_ept) {
+               kvm_mmu_set_mask_ptes(0ull,
+                       (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+                       (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+                       0ull, VMX_EPT_EXECUTABLE_MASK);
+               ept_set_mmio_spte_mask();
+               kvm_enable_tdp();
+       } else
+               kvm_disable_tdp();
+
+       update_ple_window_actual_max();
+
+       if (setup_vmcs_config(&vmcs_config) < 0) {
+               r = -EIO;
+               goto out7;
+    }
+
+       if (boot_cpu_has(X86_FEATURE_NX))
+               kvm_enable_efer_bits(EFER_NX);
+
+       if (!cpu_has_vmx_vpid())
+               enable_vpid = 0;
+       if (!cpu_has_vmx_shadow_vmcs())
+               enable_shadow_vmcs = 0;
+       if (enable_shadow_vmcs)
+               init_vmcs_shadow_fields();
+
+       if (!cpu_has_vmx_ept() ||
+           !cpu_has_vmx_ept_4levels()) {
+               enable_ept = 0;
+               enable_unrestricted_guest = 0;
+               enable_ept_ad_bits = 0;
+       }
+
+       if (!cpu_has_vmx_ept_ad_bits())
+               enable_ept_ad_bits = 0;
+
+       if (!cpu_has_vmx_unrestricted_guest())
+               enable_unrestricted_guest = 0;
+
+       if (!cpu_has_vmx_flexpriority()) {
+               flexpriority_enabled = 0;
+
+               /*
+                * set_apic_access_page_addr() is used to reload apic access
+                * page upon invalidation.  No need to do anything if the
+                * processor does not have the APIC_ACCESS_ADDR VMCS field.
+                */
+               kvm_x86_ops->set_apic_access_page_addr = NULL;
+       }
+
+       if (!cpu_has_vmx_tpr_shadow())
+               kvm_x86_ops->update_cr8_intercept = NULL;
+
+       if (enable_ept && !cpu_has_vmx_ept_2m_page())
+               kvm_disable_largepages();
+
+       if (!cpu_has_vmx_ple())
+               ple_gap = 0;
+
+       if (!cpu_has_vmx_apicv())
+               enable_apicv = 0;
+
+       if (enable_apicv)
+               kvm_x86_ops->update_cr8_intercept = NULL;
+       else {
+               kvm_x86_ops->hwapic_irr_update = NULL;
+               kvm_x86_ops->deliver_posted_interrupt = NULL;
+               kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+       }
+
+       if (nested)
+               nested_vmx_setup_ctls_msrs();
+
+       return alloc_kvm_area();
+
+out7:
+       free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+       free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+out4:
+       free_page((unsigned long)vmx_msr_bitmap_longmode);
+out3:
+       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+out2:
+       free_page((unsigned long)vmx_msr_bitmap_legacy);
+out1:
+       free_page((unsigned long)vmx_io_bitmap_b);
+out:
+       free_page((unsigned long)vmx_io_bitmap_a);
+
+    return r;
+}
+
+static __exit void hardware_unsetup(void)
+{
+       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+       free_page((unsigned long)vmx_msr_bitmap_legacy);
+       free_page((unsigned long)vmx_msr_bitmap_longmode);
+       free_page((unsigned long)vmx_io_bitmap_b);
+       free_page((unsigned long)vmx_io_bitmap_a);
+       free_page((unsigned long)vmx_vmwrite_bitmap);
+       free_page((unsigned long)vmx_vmread_bitmap);
+
+       free_kvm_area();
+}
+
  /*
   * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
   * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field)
   * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
   * 64-bit fields are to be returned).
   */
-static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
-                                       unsigned long field, u64 *ret)
+static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
+                                 unsigned long field, u64 *ret)
  {
         short offset = vmcs_field_to_offset(field);
         char *p;
  
         if (offset < 0)
-               return 0;
+               return offset;
  
         p = ((char *)(get_vmcs12(vcpu))) + offset;
  
         switch (vmcs_field_type(field)) {
         case VMCS_FIELD_TYPE_NATURAL_WIDTH:
                 *ret = *((natural_width *)p);
-               return 1;
+               return 0;
         case VMCS_FIELD_TYPE_U16:
                 *ret = *((u16 *)p);
-               return 1;
+               return 0;
         case VMCS_FIELD_TYPE_U32:
                 *ret = *((u32 *)p);
-               return 1;
+               return 0;
         case VMCS_FIELD_TYPE_U64:
                 *ret = *((u64 *)p);
-               return 1;
+               return 0;
         default:
-               return 0; /* can never happen. */
+               WARN_ON(1);
+               return -ENOENT;
         }
  }
  
  
-static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
-                                   unsigned long field, u64 field_value){
+static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
+                                  unsigned long field, u64 field_value){
         short offset = vmcs_field_to_offset(field);
         char *p = ((char *) get_vmcs12(vcpu)) + offset;
         if (offset < 0)
-               return false;
+               return offset;
  
         switch (vmcs_field_type(field)) {
         case VMCS_FIELD_TYPE_U16:
                 *(u16 *)p = field_value;
-               return true;
+               return 0;
         case VMCS_FIELD_TYPE_U32:
                 *(u32 *)p = field_value;
-               return true;
+               return 0;
         case VMCS_FIELD_TYPE_U64:
                 *(u64 *)p = field_value;
-               return true;
+               return 0;
         case VMCS_FIELD_TYPE_NATURAL_WIDTH:
                 *(natural_width *)p = field_value;
-               return true;
+               return 0;
         default:
-               return false; /* can never happen. */
+               WARN_ON(1);
+               return -ENOENT;
         }
  
  }
@@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
                 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
                         field_value = vmcs_readl(field);
                         break;
+               default:
+                       WARN_ON(1);
+                       continue;
                 }
                 vmcs12_write_any(&vmx->vcpu, field, field_value);
         }
@@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
                         case VMCS_FIELD_TYPE_NATURAL_WIDTH:
                                 vmcs_writel(field, (long)field_value);
                                 break;
+                       default:
+                               WARN_ON(1);
+                               break;
                         }
                 }
         }
@@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
         /* Decode instruction info and find the field to read */
         field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
         /* Read the field, zero-extended to a u64 field_value */
-       if (!vmcs12_read_any(vcpu, field, &field_value)) {
+       if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
                 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
                 skip_emulated_instruction(vcpu);
                 return 1;
@@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
                 return 1;
         }
  
-       if (!vmcs12_write_any(vcpu, field, field_value)) {
+       if (vmcs12_write_any(vcpu, field, field_value) < 0) {
                 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
                 skip_emulated_instruction(vcpu);
                 return 1;
@@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
         [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
         [EXIT_REASON_INVEPT]                  = handle_invept,
         [EXIT_REASON_INVVPID]                 = handle_invvpid,
+       [EXIT_REASON_XSAVES]                  = handle_xsaves,
+       [EXIT_REASON_XRSTORS]                 = handle_xrstors,
  };
  
  static const int kvm_vmx_max_exit_handlers =
@@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
         case EXIT_REASON_XSETBV:
                 return 1;
+       case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
+               /*
+                * This should never happen, since it is not possible to
+                * set XSS to a non-zero value---neither in L1 nor in L2.
+                * If if it were, XSS would have to be checked against
+                * the XSS exit bitmap in vmcs12.
+                */
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
         default:
                 return 1;
         }
@@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector)
         u16 status;
         u8 old;
  
+       if (vector == -1)
+               vector = 0;
+
         status = vmcs_read16(GUEST_INTR_STATUS);
         old = (u8)status & 0xff;
         if ((u8)vector != old) {
@@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector)
  
  static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
  {
+       if (!is_guest_mode(vcpu)) {
+               vmx_set_rvi(max_irr);
+               return;
+       }
+
         if (max_irr == -1)
                 return;
  
         /*
-        * If a vmexit is needed, vmx_check_nested_events handles it.
+        * In guest mode.  If a vmexit is needed, vmx_check_nested_events
+        * handles it.
          */
-       if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+       if (nested_exit_on_intr(vcpu))
                 return;
  
-       if (!is_guest_mode(vcpu)) {
-               vmx_set_rvi(max_irr);
-               return;
-       }
-
         /*
-        * Fall back to pre-APICv interrupt injection since L2
+        * Else, fall back to pre-APICv interrupt injection since L2
          * is run without virtual interrupt delivery.
          */
         if (!kvm_event_needs_reinjection(vcpu) &&
@@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void)
                 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
  }
  
+static bool vmx_xsaves_supported(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_XSAVES;
+}
+
  static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
  {
         u32 exit_intr_info;
@@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
         vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
         vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
  
+       if (nested_cpu_has_xsaves(vmcs12))
+               vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
         vmcs_write64(VMCS_LINK_POINTER, -1ull);
  
         exec_control = vmcs12->pin_based_vm_exec_control;
@@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
         vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
         if (vmx_mpx_supported())
                 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+       if (nested_cpu_has_xsaves(vmcs12))
+               vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
  
         /* update exit information fields: */
  
@@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
         .check_intercept = vmx_check_intercept,
         .handle_external_intr = vmx_handle_external_intr,
         .mpx_supported = vmx_mpx_supported,
+       .xsaves_supported = vmx_xsaves_supported,
  
         .check_nested_events = vmx_check_nested_events,
  
@@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = {
  
  static int __init vmx_init(void)
  {
-       int r, i, msr;
-
-       rdmsrl_safe(MSR_EFER, &host_efer);
-
-       for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
-               kvm_define_shared_msr(i, vmx_msr_index[i]);
-
-       vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_io_bitmap_a)
-               return -ENOMEM;
-
-       r = -ENOMEM;
-
-       vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_io_bitmap_b)
-               goto out;
-
-       vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy)
-               goto out1;
-
-       vmx_msr_bitmap_legacy_x2apic =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy_x2apic)
-               goto out2;
-
-       vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode)
-               goto out3;
-
-       vmx_msr_bitmap_longmode_x2apic =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode_x2apic)
-               goto out4;
-       vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_vmread_bitmap)
-               goto out5;
-
-       vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_vmwrite_bitmap)
-               goto out6;
-
-       memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
-       memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
-
-       /*
-        * Allow direct access to the PC debug port (it is often used for I/O
-        * delays, but the vmexits simply slow things down).
-        */
-       memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-       clear_bit(0x80, vmx_io_bitmap_a);
-
-       memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
-       memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
-       memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
-       set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
-
-       r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-                    __alignof__(struct vcpu_vmx), THIS_MODULE);
+       int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+                     __alignof__(struct vcpu_vmx), THIS_MODULE);
         if (r)
-               goto out7;
+               return r;
  
  #ifdef CONFIG_KEXEC
         rcu_assign_pointer(crash_vmclear_loaded_vmcss,
                            crash_vmclear_local_loaded_vmcss);
  #endif
  
-       vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
-       vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
-       vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
-
-       memcpy(vmx_msr_bitmap_legacy_x2apic,
-                       vmx_msr_bitmap_legacy, PAGE_SIZE);
-       memcpy(vmx_msr_bitmap_longmode_x2apic,
-                       vmx_msr_bitmap_longmode, PAGE_SIZE);
-
-       if (enable_apicv) {
-               for (msr = 0x800; msr <= 0x8ff; msr++)
-                       vmx_disable_intercept_msr_read_x2apic(msr);
-
-               /* According SDM, in x2apic mode, the whole id reg is used.
-                * But in KVM, it only use the highest eight bits. Need to
-                * intercept it */
-               vmx_enable_intercept_msr_read_x2apic(0x802);
-               /* TMCCT */
-               vmx_enable_intercept_msr_read_x2apic(0x839);
-               /* TPR */
-               vmx_disable_intercept_msr_write_x2apic(0x808);
-               /* EOI */
-               vmx_disable_intercept_msr_write_x2apic(0x80b);
-               /* SELF-IPI */
-               vmx_disable_intercept_msr_write_x2apic(0x83f);
-       }
-
-       if (enable_ept) {
-               kvm_mmu_set_mask_ptes(0ull,
-                       (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
-                       (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-                       0ull, VMX_EPT_EXECUTABLE_MASK);
-               ept_set_mmio_spte_mask();
-               kvm_enable_tdp();
-       } else
-               kvm_disable_tdp();
-
-       update_ple_window_actual_max();
-
         return 0;
-
-out7:
-       free_page((unsigned long)vmx_vmwrite_bitmap);
-out6:
-       free_page((unsigned long)vmx_vmread_bitmap);
-out5:
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out4:
-       free_page((unsigned long)vmx_msr_bitmap_longmode);
-out3:
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
-       free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
-       free_page((unsigned long)vmx_io_bitmap_b);
-out:
-       free_page((unsigned long)vmx_io_bitmap_a);
-       return r;
  }
  
  static void __exit vmx_exit(void)
  {
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-       free_page((unsigned long)vmx_msr_bitmap_legacy);
-       free_page((unsigned long)vmx_msr_bitmap_longmode);
-       free_page((unsigned long)vmx_io_bitmap_b);
-       free_page((unsigned long)vmx_io_bitmap_a);
-       free_page((unsigned long)vmx_vmwrite_bitmap);
-       free_page((unsigned long)vmx_vmread_bitmap);
-
  #ifdef CONFIG_KEXEC
         RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
         synchronize_rcu();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 0033df32a74585f69f5a8d83515f7182f1c7d7af..c259814200bd340b54014a1869d914ec3674caed 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,6 +27,7 @@
  #include "kvm_cache_regs.h"
  #include "x86.h"
  #include "cpuid.h"
+#include "assigned-dev.h"
  
  #include <linux/clocksource.h>
  #include <linux/interrupt.h>
@@ -353,6 +354,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
  
         if (!vcpu->arch.exception.pending) {
         queue:
+               if (has_error && !is_protmode(vcpu))
+                       has_error = false;
                 vcpu->arch.exception.pending = true;
                 vcpu->arch.exception.has_error_code = has_error;
                 vcpu->arch.exception.nr = nr;
@@ -455,6 +458,16 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
  }
  EXPORT_SYMBOL_GPL(kvm_require_cpl);
  
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
+{
+       if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+               return true;
+
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_dr);
+
  /*
   * This function will be used to read from the physical memory of the currently
   * running guest. The difference to kvm_read_guest_page is that this function
@@ -656,6 +669,12 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
         if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
                 return 1;
  
+       if (xcr0 & XSTATE_AVX512) {
+               if (!(xcr0 & XSTATE_YMM))
+                       return 1;
+               if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
+                       return 1;
+       }
         kvm_put_guest_xcr0(vcpu);
         vcpu->arch.xcr0 = xcr0;
  
@@ -732,6 +751,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
  
  int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
  {
+#ifdef CONFIG_X86_64
+       cr3 &= ~CR3_PCID_INVD;
+#endif
+
         if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
                 kvm_mmu_sync_roots(vcpu);
                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -811,8 +834,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
                         vcpu->arch.eff_db[dr] = val;
                 break;
         case 4:
-               if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-                       return 1; /* #UD */
                 /* fall through */
         case 6:
                 if (val & 0xffffffff00000000ULL)
@@ -821,8 +842,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
                 kvm_update_dr6(vcpu);
                 break;
         case 5:
-               if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-                       return 1; /* #UD */
                 /* fall through */
         default: /* 7 */
                 if (val & 0xffffffff00000000ULL)
@@ -837,27 +856,21 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
  
  int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
  {
-       int res;
-
-       res = __kvm_set_dr(vcpu, dr, val);
-       if (res > 0)
-               kvm_queue_exception(vcpu, UD_VECTOR);
-       else if (res < 0)
+       if (__kvm_set_dr(vcpu, dr, val)) {
                 kvm_inject_gp(vcpu, 0);
-
-       return res;
+               return 1;
+       }
+       return 0;
  }
  EXPORT_SYMBOL_GPL(kvm_set_dr);
  
-static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
  {
         switch (dr) {
         case 0 ... 3:
                 *val = vcpu->arch.db[dr];
                 break;
         case 4:
-               if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-                       return 1;
                 /* fall through */
         case 6:
                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
@@ -866,23 +879,11 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
                         *val = kvm_x86_ops->get_dr6(vcpu);
                 break;
         case 5:
-               if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-                       return 1;
                 /* fall through */
         default: /* 7 */
                 *val = vcpu->arch.dr7;
                 break;
         }
-
-       return 0;
-}
-
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
-{
-       if (_kvm_get_dr(vcpu, dr, val)) {
-               kvm_queue_exception(vcpu, UD_VECTOR);
-               return 1;
-       }
         return 0;
  }
  EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -1237,21 +1238,22 @@ void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
  {
  #ifdef CONFIG_X86_64
         bool vcpus_matched;
-       bool do_request = false;
         struct kvm_arch *ka = &vcpu->kvm->arch;
         struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
  
         vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
                          atomic_read(&vcpu->kvm->online_vcpus));
  
-       if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
-               if (!ka->use_master_clock)
-                       do_request = 1;
-
-       if (!vcpus_matched && ka->use_master_clock)
-                       do_request = 1;
-
-       if (do_request)
+       /*
+        * Once the masterclock is enabled, always perform request in
+        * order to update it.
+        *
+        * In order to enable masterclock, the host clocksource must be TSC
+        * and the vcpus need to have matched TSCs.  When that happens,
+        * perform request to enable masterclock.
+        */
+       if (ka->use_master_clock ||
+           (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
                 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
  
         trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1637,16 +1639,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
         vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
         vcpu->last_guest_tsc = tsc_timestamp;
  
+       if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+               &guest_hv_clock, sizeof(guest_hv_clock))))
+               return 0;
+
         /*
          * The interface expects us to write an even number signaling that the
          * update is finished. Since the guest won't see the intermediate
          * state, we just increase by 2 at the end.
          */
-       vcpu->hv_clock.version += 2;
-
-       if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
-               &guest_hv_clock, sizeof(guest_hv_clock))))
-               return 0;
+       vcpu->hv_clock.version = guest_hv_clock.version + 2;
  
         /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
         pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1662,6 +1664,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
  
         vcpu->hv_clock.flags = pvclock_flags;
  
+       trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+
         kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
                                 &vcpu->hv_clock,
                                 sizeof(vcpu->hv_clock));
@@ -2140,7 +2144,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
         case MSR_IA32_TSC_ADJUST:
                 if (guest_cpuid_has_tsc_adjust(vcpu)) {
                         if (!msr_info->host_initiated) {
-                               u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+                               s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
                                 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
                         }
                         vcpu->arch.ia32_tsc_adjust_msr = data;
@@ -3106,7 +3110,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
         unsigned long val;
  
         memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-       _kvm_get_dr(vcpu, 6, &val);
+       kvm_get_dr(vcpu, 6, &val);
         dbgregs->dr6 = val;
         dbgregs->dr7 = vcpu->arch.dr7;
         dbgregs->flags = 0;
@@ -3128,15 +3132,89 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
         return 0;
  }
  
+#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
+
+static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
+{
+       struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+       u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+       u64 valid;
+
+       /*
+        * Copy legacy XSAVE area, to avoid complications with CPUID
+        * leaves 0 and 1 in the loop below.
+        */
+       memcpy(dest, xsave, XSAVE_HDR_OFFSET);
+
+       /* Set XSTATE_BV */
+       *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
+
+       /*
+        * Copy each region from the possibly compacted offset to the
+        * non-compacted offset.
+        */
+       valid = xstate_bv & ~XSTATE_FPSSE;
+       while (valid) {
+               u64 feature = valid & -valid;
+               int index = fls64(feature) - 1;
+               void *src = get_xsave_addr(xsave, feature);
+
+               if (src) {
+                       u32 size, offset, ecx, edx;
+                       cpuid_count(XSTATE_CPUID, index,
+                                   &size, &offset, &ecx, &edx);
+                       memcpy(dest + offset, src, size);
+               }
+
+               valid -= feature;
+       }
+}
+
+static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
+{
+       struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+       u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
+       u64 valid;
+
+       /*
+        * Copy legacy XSAVE area, to avoid complications with CPUID
+        * leaves 0 and 1 in the loop below.
+        */
+       memcpy(xsave, src, XSAVE_HDR_OFFSET);
+
+       /* Set XSTATE_BV and possibly XCOMP_BV.  */
+       xsave->xsave_hdr.xstate_bv = xstate_bv;
+       if (cpu_has_xsaves)
+               xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+
+       /*
+        * Copy each region from the non-compacted offset to the
+        * possibly compacted offset.
+        */
+       valid = xstate_bv & ~XSTATE_FPSSE;
+       while (valid) {
+               u64 feature = valid & -valid;
+               int index = fls64(feature) - 1;
+               void *dest = get_xsave_addr(xsave, feature);
+
+               if (dest) {
+                       u32 size, offset, ecx, edx;
+                       cpuid_count(XSTATE_CPUID, index,
+                                   &size, &offset, &ecx, &edx);
+                       memcpy(dest, src + offset, size);
+               } else
+                       WARN_ON_ONCE(1);
+
+               valid -= feature;
+       }
+}
+
  static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                          struct kvm_xsave *guest_xsave)
  {
         if (cpu_has_xsave) {
-               memcpy(guest_xsave->region,
-                       &vcpu->arch.guest_fpu.state->xsave,
-                       vcpu->arch.guest_xstate_size);
-               *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
-                       vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+               memset(guest_xsave, 0, sizeof(struct kvm_xsave));
+               fill_xsave((u8 *) guest_xsave->region, vcpu);
         } else {
                 memcpy(guest_xsave->region,
                         &vcpu->arch.guest_fpu.state->fxsave,
@@ -3160,8 +3238,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                  */
                 if (xstate_bv & ~kvm_supported_xcr0())
                         return -EINVAL;
-               memcpy(&vcpu->arch.guest_fpu.state->xsave,
-                       guest_xsave->region, vcpu->arch.guest_xstate_size);
+               load_xsave(vcpu, (u8 *)guest_xsave->region);
         } else {
                 if (xstate_bv & ~XSTATE_FPSSE)
                         return -EINVAL;
@@ -4004,7 +4081,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
         }
  
         default:
-               ;
+               r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
         }
  out:
         return r;
@@ -4667,7 +4744,7 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
  
  int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
  {
-       return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+       return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
  }
  
  int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
@@ -5211,21 +5288,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
  
  static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
  {
-       struct kvm_run *kvm_run = vcpu->run;
-       unsigned long eip = vcpu->arch.emulate_ctxt.eip;
-       u32 dr6 = 0;
-
         if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
             (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
-               dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+               struct kvm_run *kvm_run = vcpu->run;
+               unsigned long eip = kvm_get_linear_rip(vcpu);
+               u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
                                            vcpu->arch.guest_debug_dr7,
                                            vcpu->arch.eff_db);
  
                 if (dr6 != 0) {
                         kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
-                       kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
-                               get_segment_base(vcpu, VCPU_SREG_CS);
-
+                       kvm_run->debug.arch.pc = eip;
                         kvm_run->debug.arch.exception = DB_VECTOR;
                         kvm_run->exit_reason = KVM_EXIT_DEBUG;
                         *r = EMULATE_USER_EXIT;
@@ -5235,7 +5308,8 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
  
         if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
             !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
-               dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+               unsigned long eip = kvm_get_linear_rip(vcpu);
+               u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
                                            vcpu->arch.dr7,
                                            vcpu->arch.db);
  
@@ -5365,7 +5439,9 @@ restart:
                 kvm_rip_write(vcpu, ctxt->eip);
                 if (r == EMULATE_DONE)
                         kvm_vcpu_check_singlestep(vcpu, rflags, &r);
-               __kvm_set_rflags(vcpu, ctxt->eflags);
+               if (!ctxt->have_exception ||
+                   exception_type(ctxt->exception.vector) == EXCPT_TRAP)
+                       __kvm_set_rflags(vcpu, ctxt->eflags);
  
                 /*
                  * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
@@ -5965,6 +6041,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
                         __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
                                              X86_EFLAGS_RF);
  
+               if (vcpu->arch.exception.nr == DB_VECTOR &&
+                   (vcpu->arch.dr7 & DR7_GD)) {
+                       vcpu->arch.dr7 &= ~DR7_GD;
+                       kvm_update_dr7(vcpu);
+               }
+
                 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
                                           vcpu->arch.exception.has_error_code,
                                           vcpu->arch.exception.error_code,
@@ -6873,6 +6955,9 @@ int fx_init(struct kvm_vcpu *vcpu)
                 return err;
  
         fpu_finit(&vcpu->arch.guest_fpu);
+       if (cpu_has_xsaves)
+               vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+                       host_xcr0 | XSTATE_COMPACTION_ENABLED;
  
         /*
          * Ensure guest xcr0 is valid for loading
@@ -7024,7 +7109,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
         kvm_x86_ops->vcpu_reset(vcpu);
  }
  
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
  {
         struct kvm_segment cs;
  
@@ -7256,6 +7341,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
         if (type)
                 return -EINVAL;
  
+       INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
         INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
         INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
         INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
@@ -7536,12 +7622,18 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
         return kvm_x86_ops->interrupt_allowed(vcpu);
  }
  
-bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
  {
-       unsigned long current_rip = kvm_rip_read(vcpu) +
-               get_segment_base(vcpu, VCPU_SREG_CS);
+       if (is_64_bit_mode(vcpu))
+               return kvm_rip_read(vcpu);
+       return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
+                    kvm_rip_read(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
  
-       return current_rip == linear_rip;
+bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+{
+       return kvm_get_linear_rip(vcpu) == linear_rip;
  }
  EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
  
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h

index 7cb9c45a5fe056b92b13c1f76454dbcea4333d42..cc1d61af61409cc176ec883bf4cedf3a94e1a228 100644 (file)
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -162,7 +162,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
  bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
  
  #define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
-                               | XSTATE_BNDREGS | XSTATE_BNDCSR)
+                               | XSTATE_BNDREGS | XSTATE_BNDCSR \
+                               | XSTATE_AVX512)
  extern u64 host_xcr0;
  
  extern u64 kvm_supported_xcr0(void);
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c

index fce7588962807771a29ce1336ecc559f7e235216..1ee27ac18de052e660fcafc148b6dbfd76a2499f 100644 (file)
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -87,6 +87,7 @@ static const struct usb_device_id ath3k_table[] = {
         { USB_DEVICE(0x04CA, 0x3007) },
         { USB_DEVICE(0x04CA, 0x3008) },
         { USB_DEVICE(0x04CA, 0x300b) },
+       { USB_DEVICE(0x04CA, 0x3010) },
         { USB_DEVICE(0x0930, 0x0219) },
         { USB_DEVICE(0x0930, 0x0220) },
         { USB_DEVICE(0x0930, 0x0227) },
@@ -140,6 +141,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
         { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c

index 31dd24ac99269ae36b04e1b05d20a80f4af65f0b..19cf2cf22e879816cc89112f76c5592d47f24e38 100644 (file)
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -167,6 +167,7 @@ static const struct usb_device_id blacklist_table[] = {
         { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
         { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig

index 77089399359b70bbcdccabc264c5e4e8089a1bdb..b899531498eb0dc7924e4587f67dbc3f7c116313 100644 (file)
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -38,6 +38,17 @@ config INFINIBAND_USER_MEM
         depends on INFINIBAND_USER_ACCESS != n
         default y
  
+config INFINIBAND_ON_DEMAND_PAGING
+       bool "InfiniBand on-demand paging support"
+       depends on INFINIBAND_USER_MEM
+       select MMU_NOTIFIER
+       default y
+       ---help---
+         On demand paging support for the InfiniBand subsystem.
+         Together with driver support this allows registration of
+         memory regions without pinning their pages, fetching the
+         pages on demand instead.
+
  config INFINIBAND_ADDR_TRANS
         bool
         depends on INFINIBAND
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile

index ffd0af6734af63120b78aa8402f2f48a1ec6fc20..acf73676444593704267ac9176696f95caa52335 100644 (file)
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=       ib_uverbs.o ib_ucm.o \
  ib_core-y :=                   packer.o ud_header.o verbs.o sysfs.o \
                                 device.o fmr_pool.o cache.o netlink.o
  ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
  
  ib_mad-y :=                    mad.o smi.o agent.o mad_rmpp.o
  
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c

index 8172d37f9addae8485041ad0cabc1d4261097647..f80da50d84a5b6585a10656b369bcc33aedd0507 100644 (file)
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -176,8 +176,8 @@ static void set_timeout(unsigned long time)
         unsigned long delay;
  
         delay = time - jiffies;
-       if ((long)delay <= 0)
-               delay = 1;
+       if ((long)delay < 0)
+               delay = 0;
  
         mod_delayed_work(addr_wq, &work, delay);
  }
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c

index d2360a8ef0b29ea6d6c9a3bebc05cf07b087a02f..fa17b552ff78bc16b1547215682f26649f5af7ac 100644 (file)
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -525,17 +525,22 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
         if (status)
                 process_join_error(group, status);
         else {
+               int mgids_changed, is_mgid0;
                 ib_find_pkey(group->port->dev->device, group->port->port_num,
                              be16_to_cpu(rec->pkey), &pkey_index);
  
                 spin_lock_irq(&group->port->lock);
-               group->rec = *rec;
                 if (group->state == MCAST_BUSY &&
                     group->pkey_index == MCAST_INVALID_PKEY_INDEX)
                         group->pkey_index = pkey_index;
-               if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+               mgids_changed = memcmp(&rec->mgid, &group->rec.mgid,
+                                      sizeof(group->rec.mgid));
+               group->rec = *rec;
+               if (mgids_changed) {
                         rb_erase(&group->node, &group->port->table);
-                       mcast_insert(group->port, group, 1);
+                       is_mgid0 = !memcmp(&mgid0, &group->rec.mgid,
+                                          sizeof(mgid0));
+                       mcast_insert(group->port, group, is_mgid0);
                 }
                 spin_unlock_irq(&group->port->lock);
         }
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c

index df0c4f605a219c1f3f5cd5faabeadc705622b65e..aec7a6aa2951db47bc6b5be969a29d1867688b23 100644 (file)
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -39,6 +39,7 @@
  #include <linux/hugetlb.h>
  #include <linux/dma-attrs.h>
  #include <linux/slab.h>
+#include <rdma/ib_umem_odp.h>
  
  #include "uverbs.h"
  
@@ -69,6 +70,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
  
  /**
   * ib_umem_get - Pin and DMA map userspace memory.
+ *
+ * If access flags indicate ODP memory, avoid pinning. Instead, stores
+ * the mm for future page fault handling in conjunction with MMU notifiers.
+ *
   * @context: userspace context to pin memory for
   * @addr: userspace virtual address to start at
   * @size: length of region to pin
@@ -103,17 +108,30 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
  
         umem->context   = context;
         umem->length    = size;
-       umem->offset    = addr & ~PAGE_MASK;
+       umem->address   = addr;
         umem->page_size = PAGE_SIZE;
         umem->pid       = get_task_pid(current, PIDTYPE_PID);
         /*
-        * We ask for writable memory if any access flags other than
-        * "remote read" are set.  "Local write" and "remote write"
+        * We ask for writable memory if any of the following
+        * access flags are set.  "Local write" and "remote write"
          * obviously require write access.  "Remote atomic" can do
          * things like fetch and add, which will modify memory, and
          * "MW bind" can change permissions by binding a window.
          */
-       umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
+       umem->writable  = !!(access &
+               (IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
+                IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
+
+       if (access & IB_ACCESS_ON_DEMAND) {
+               ret = ib_umem_odp_get(context, umem);
+               if (ret) {
+                       kfree(umem);
+                       return ERR_PTR(ret);
+               }
+               return umem;
+       }
+
+       umem->odp_data = NULL;
  
         /* We assume the memory is from hugetlb until proved otherwise */
         umem->hugetlb   = 1;
@@ -132,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
         if (!vma_list)
                 umem->hugetlb = 0;
  
-       npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
+       npages = ib_umem_num_pages(umem);
  
         down_write(&current->mm->mmap_sem);
  
@@ -235,6 +253,11 @@ void ib_umem_release(struct ib_umem *umem)
         struct task_struct *task;
         unsigned long diff;
  
+       if (umem->odp_data) {
+               ib_umem_odp_release(umem);
+               return;
+       }
+
         __ib_umem_release(umem->context->device, umem, 1);
  
         task = get_pid_task(umem->pid, PIDTYPE_PID);
@@ -246,7 +269,7 @@ void ib_umem_release(struct ib_umem *umem)
         if (!mm)
                 goto out;
  
-       diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+       diff = ib_umem_num_pages(umem);
  
         /*
          * We may be called with the mm's mmap_sem already held.  This
@@ -283,6 +306,9 @@ int ib_umem_page_count(struct ib_umem *umem)
         int n;
         struct scatterlist *sg;
  
+       if (umem->odp_data)
+               return ib_umem_num_pages(umem);
+
         shift = ilog2(umem->page_size);
  
         n = 0;
@@ -292,3 +318,37 @@ int ib_umem_page_count(struct ib_umem *umem)
         return n;
  }
  EXPORT_SYMBOL(ib_umem_page_count);
+
+/*
+ * Copy from the given ib_umem's pages to the given buffer.
+ *
+ * umem - the umem to copy from
+ * offset - offset to start copying from
+ * dst - destination buffer
+ * length - buffer length
+ *
+ * Returns 0 on success, or an error code.
+ */
+int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+                     size_t length)
+{
+       size_t end = offset + length;
+       int ret;
+
+       if (offset > umem->length || length > umem->length - offset) {
+               pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
+                      offset, umem->length, end);
+               return -EINVAL;
+       }
+
+       ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length,
+                                offset + ib_umem_offset(umem));
+
+       if (ret < 0)
+               return ret;
+       else if (ret != length)
+               return -EINVAL;
+       else
+               return 0;
+}
+EXPORT_SYMBOL(ib_umem_copy_from);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c

new file mode 100644 (file)

index 0000000..6095872
--- /dev/null
+++ b/drivers/infiniband/core/umem_odp.c
@@ -0,0 +1,668 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/vmalloc.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+
+static void ib_umem_notifier_start_account(struct ib_umem *item)
+{
+       mutex_lock(&item->odp_data->umem_mutex);
+
+       /* Only update private counters for this umem if it has them.
+        * Otherwise skip it. All page faults will be delayed for this umem. */
+       if (item->odp_data->mn_counters_active) {
+               int notifiers_count = item->odp_data->notifiers_count++;
+
+               if (notifiers_count == 0)
+                       /* Initialize the completion object for waiting on
+                        * notifiers. Since notifier_count is zero, no one
+                        * should be waiting right now. */
+                       reinit_completion(&item->odp_data->notifier_completion);
+       }
+       mutex_unlock(&item->odp_data->umem_mutex);
+}
+
+static void ib_umem_notifier_end_account(struct ib_umem *item)
+{
+       mutex_lock(&item->odp_data->umem_mutex);
+
+       /* Only update private counters for this umem if it has them.
+        * Otherwise skip it. All page faults will be delayed for this umem. */
+       if (item->odp_data->mn_counters_active) {
+               /*
+                * This sequence increase will notify the QP page fault that
+                * the page that is going to be mapped in the spte could have
+                * been freed.
+                */
+               ++item->odp_data->notifiers_seq;
+               if (--item->odp_data->notifiers_count == 0)
+                       complete_all(&item->odp_data->notifier_completion);
+       }
+       mutex_unlock(&item->odp_data->umem_mutex);
+}
+
+/* Account for a new mmu notifier in an ib_ucontext. */
+static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
+{
+       atomic_inc(&context->notifier_count);
+}
+
+/* Account for a terminating mmu notifier in an ib_ucontext.
+ *
+ * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
+ * the function takes the semaphore itself. */
+static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
+{
+       int zero_notifiers = atomic_dec_and_test(&context->notifier_count);
+
+       if (zero_notifiers &&
+           !list_empty(&context->no_private_counters)) {
+               /* No currently running mmu notifiers. Now is the chance to
+                * add private accounting to all previously added umems. */
+               struct ib_umem_odp *odp_data, *next;
+
+               /* Prevent concurrent mmu notifiers from working on the
+                * no_private_counters list. */
+               down_write(&context->umem_rwsem);
+
+               /* Read the notifier_count again, with the umem_rwsem
+                * semaphore taken for write. */
+               if (!atomic_read(&context->notifier_count)) {
+                       list_for_each_entry_safe(odp_data, next,
+                                                &context->no_private_counters,
+                                                no_private_counters) {
+                               mutex_lock(&odp_data->umem_mutex);
+                               odp_data->mn_counters_active = true;
+                               list_del(&odp_data->no_private_counters);
+                               complete_all(&odp_data->notifier_completion);
+                               mutex_unlock(&odp_data->umem_mutex);
+                       }
+               }
+
+               up_write(&context->umem_rwsem);
+       }
+}
+
+static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
+                                              u64 end, void *cookie) {
+       /*
+        * Increase the number of notifiers running, to
+        * prevent any further fault handling on this MR.
+        */
+       ib_umem_notifier_start_account(item);
+       item->odp_data->dying = 1;
+       /* Make sure that the fact the umem is dying is out before we release
+        * all pending page faults. */
+       smp_wmb();
+       complete_all(&item->odp_data->notifier_completion);
+       item->context->invalidate_range(item, ib_umem_start(item),
+                                       ib_umem_end(item));
+       return 0;
+}
+
+static void ib_umem_notifier_release(struct mmu_notifier *mn,
+                                    struct mm_struct *mm)
+{
+       struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+       if (!context->invalidate_range)
+               return;
+
+       ib_ucontext_notifier_start_account(context);
+       down_read(&context->umem_rwsem);
+       rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
+                                     ULLONG_MAX,
+                                     ib_umem_notifier_release_trampoline,
+                                     NULL);
+       up_read(&context->umem_rwsem);
+}
+
+static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
+                                     u64 end, void *cookie)
+{
+       ib_umem_notifier_start_account(item);
+       item->context->invalidate_range(item, start, start + PAGE_SIZE);
+       ib_umem_notifier_end_account(item);
+       return 0;
+}
+
+static void ib_umem_notifier_invalidate_page(struct mmu_notifier *mn,
+                                            struct mm_struct *mm,
+                                            unsigned long address)
+{
+       struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+       if (!context->invalidate_range)
+               return;
+
+       ib_ucontext_notifier_start_account(context);
+       down_read(&context->umem_rwsem);
+       rbt_ib_umem_for_each_in_range(&context->umem_tree, address,
+                                     address + PAGE_SIZE,
+                                     invalidate_page_trampoline, NULL);
+       up_read(&context->umem_rwsem);
+       ib_ucontext_notifier_end_account(context);
+}
+
+static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
+                                            u64 end, void *cookie)
+{
+       ib_umem_notifier_start_account(item);
+       item->context->invalidate_range(item, start, end);
+       return 0;
+}
+
+static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
+                                                   struct mm_struct *mm,
+                                                   unsigned long start,
+                                                   unsigned long end)
+{
+       struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+       if (!context->invalidate_range)
+               return;
+
+       ib_ucontext_notifier_start_account(context);
+       down_read(&context->umem_rwsem);
+       rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+                                     end,
+                                     invalidate_range_start_trampoline, NULL);
+       up_read(&context->umem_rwsem);
+}
+
+static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
+                                          u64 end, void *cookie)
+{
+       ib_umem_notifier_end_account(item);
+       return 0;
+}
+
+static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
+                                                 struct mm_struct *mm,
+                                                 unsigned long start,
+                                                 unsigned long end)
+{
+       struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+       if (!context->invalidate_range)
+               return;
+
+       down_read(&context->umem_rwsem);
+       rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+                                     end,
+                                     invalidate_range_end_trampoline, NULL);
+       up_read(&context->umem_rwsem);
+       ib_ucontext_notifier_end_account(context);
+}
+
+static struct mmu_notifier_ops ib_umem_notifiers = {
+       .release                    = ib_umem_notifier_release,
+       .invalidate_page            = ib_umem_notifier_invalidate_page,
+       .invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
+       .invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
+};
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
+{
+       int ret_val;
+       struct pid *our_pid;
+       struct mm_struct *mm = get_task_mm(current);
+
+       if (!mm)
+               return -EINVAL;
+
+       /* Prevent creating ODP MRs in child processes */
+       rcu_read_lock();
+       our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+       rcu_read_unlock();
+       put_pid(our_pid);
+       if (context->tgid != our_pid) {
+               ret_val = -EINVAL;
+               goto out_mm;
+       }
+
+       umem->hugetlb = 0;
+       umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
+       if (!umem->odp_data) {
+               ret_val = -ENOMEM;
+               goto out_mm;
+       }
+       umem->odp_data->umem = umem;
+
+       mutex_init(&umem->odp_data->umem_mutex);
+
+       init_completion(&umem->odp_data->notifier_completion);
+
+       umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+                                           sizeof(*umem->odp_data->page_list));
+       if (!umem->odp_data->page_list) {
+               ret_val = -ENOMEM;
+               goto out_odp_data;
+       }
+
+       umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+                                         sizeof(*umem->odp_data->dma_list));
+       if (!umem->odp_data->dma_list) {
+               ret_val = -ENOMEM;
+               goto out_page_list;
+       }
+
+       /*
+        * When using MMU notifiers, we will get a
+        * notification before the "current" task (and MM) is
+        * destroyed. We use the umem_rwsem semaphore to synchronize.
+        */
+       down_write(&context->umem_rwsem);
+       context->odp_mrs_count++;
+       if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+               rbt_ib_umem_insert(&umem->odp_data->interval_tree,
+                                  &context->umem_tree);
+       if (likely(!atomic_read(&context->notifier_count)))
+               umem->odp_data->mn_counters_active = true;
+       else
+               list_add(&umem->odp_data->no_private_counters,
+                        &context->no_private_counters);
+       downgrade_write(&context->umem_rwsem);
+
+       if (context->odp_mrs_count == 1) {
+               /*
+                * Note that at this point, no MMU notifier is running
+                * for this context!
+                */
+               atomic_set(&context->notifier_count, 0);
+               INIT_HLIST_NODE(&context->mn.hlist);
+               context->mn.ops = &ib_umem_notifiers;
+               /*
+                * Lock-dep detects a false positive for mmap_sem vs.
+                * umem_rwsem, due to not grasping downgrade_write correctly.
+                */
+               lockdep_off();
+               ret_val = mmu_notifier_register(&context->mn, mm);
+               lockdep_on();
+               if (ret_val) {
+                       pr_err("Failed to register mmu_notifier %d\n", ret_val);
+                       ret_val = -EBUSY;
+                       goto out_mutex;
+               }
+       }
+
+       up_read(&context->umem_rwsem);
+
+       /*
+        * Note that doing an mmput can cause a notifier for the relevant mm.
+        * If the notifier is called while we hold the umem_rwsem, this will
+        * cause a deadlock. Therefore, we release the reference only after we
+        * released the semaphore.
+        */
+       mmput(mm);
+       return 0;
+
+out_mutex:
+       up_read(&context->umem_rwsem);
+       vfree(umem->odp_data->dma_list);
+out_page_list:
+       vfree(umem->odp_data->page_list);
+out_odp_data:
+       kfree(umem->odp_data);
+out_mm:
+       mmput(mm);
+       return ret_val;
+}
+
+void ib_umem_odp_release(struct ib_umem *umem)
+{
+       struct ib_ucontext *context = umem->context;
+
+       /*
+        * Ensure that no more pages are mapped in the umem.
+        *
+        * It is the driver's responsibility to ensure, before calling us,
+        * that the hardware will not attempt to access the MR any more.
+        */
+       ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem),
+                                   ib_umem_end(umem));
+
+       down_write(&context->umem_rwsem);
+       if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+               rbt_ib_umem_remove(&umem->odp_data->interval_tree,
+                                  &context->umem_tree);
+       context->odp_mrs_count--;
+       if (!umem->odp_data->mn_counters_active) {
+               list_del(&umem->odp_data->no_private_counters);
+               complete_all(&umem->odp_data->notifier_completion);
+       }
+
+       /*
+        * Downgrade the lock to a read lock. This ensures that the notifiers
+        * (who lock the mutex for reading) will be able to finish, and we
+        * will be able to enventually obtain the mmu notifiers SRCU. Note
+        * that since we are doing it atomically, no other user could register
+        * and unregister while we do the check.
+        */
+       downgrade_write(&context->umem_rwsem);
+       if (!context->odp_mrs_count) {
+               struct task_struct *owning_process = NULL;
+               struct mm_struct *owning_mm        = NULL;
+
+               owning_process = get_pid_task(context->tgid,
+                                             PIDTYPE_PID);
+               if (owning_process == NULL)
+                       /*
+                        * The process is already dead, notifier were removed
+                        * already.
+                        */
+                       goto out;
+
+               owning_mm = get_task_mm(owning_process);
+               if (owning_mm == NULL)
+                       /*
+                        * The process' mm is already dead, notifier were
+                        * removed already.
+                        */
+                       goto out_put_task;
+               mmu_notifier_unregister(&context->mn, owning_mm);
+
+               mmput(owning_mm);
+
+out_put_task:
+               put_task_struct(owning_process);
+       }
+out:
+       up_read(&context->umem_rwsem);
+
+       vfree(umem->odp_data->dma_list);
+       vfree(umem->odp_data->page_list);
+       kfree(umem->odp_data);
+       kfree(umem);
+}
+
+/*
+ * Map for DMA and insert a single page into the on-demand paging page tables.
+ *
+ * @umem: the umem to insert the page to.
+ * @page_index: index in the umem to add the page to.
+ * @page: the page struct to map and add.
+ * @access_mask: access permissions needed for this page.
+ * @current_seq: sequence number for synchronization with invalidations.
+ *               the sequence number is taken from
+ *               umem->odp_data->notifiers_seq.
+ *
+ * The function returns -EFAULT if the DMA mapping operation fails. It returns
+ * -EAGAIN if a concurrent invalidation prevents us from updating the page.
+ *
+ * The page is released via put_page even if the operation failed. For
+ * on-demand pinning, the page is released whenever it isn't stored in the
+ * umem.
+ */
+static int ib_umem_odp_map_dma_single_page(
+               struct ib_umem *umem,
+               int page_index,
+               u64 base_virt_addr,
+               struct page *page,
+               u64 access_mask,
+               unsigned long current_seq)
+{
+       struct ib_device *dev = umem->context->device;
+       dma_addr_t dma_addr;
+       int stored_page = 0;
+       int remove_existing_mapping = 0;
+       int ret = 0;
+
+       mutex_lock(&umem->odp_data->umem_mutex);
+       /*
+        * Note: we avoid writing if seq is different from the initial seq, to
+        * handle case of a racing notifier. This check also allows us to bail
+        * early if we have a notifier running in parallel with us.
+        */
+       if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
+               ret = -EAGAIN;
+               goto out;
+       }
+       if (!(umem->odp_data->dma_list[page_index])) {
+               dma_addr = ib_dma_map_page(dev,
+                                          page,
+                                          0, PAGE_SIZE,
+                                          DMA_BIDIRECTIONAL);
+               if (ib_dma_mapping_error(dev, dma_addr)) {
+                       ret = -EFAULT;
+                       goto out;
+               }
+               umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
+               umem->odp_data->page_list[page_index] = page;
+               stored_page = 1;
+       } else if (umem->odp_data->page_list[page_index] == page) {
+               umem->odp_data->dma_list[page_index] |= access_mask;
+       } else {
+               pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
+                      umem->odp_data->page_list[page_index], page);
+               /* Better remove the mapping now, to prevent any further
+                * damage. */
+               remove_existing_mapping = 1;
+       }
+
+out:
+       mutex_unlock(&umem->odp_data->umem_mutex);
+
+       /* On Demand Paging - avoid pinning the page */
+       if (umem->context->invalidate_range || !stored_page)
+               put_page(page);
+
+       if (remove_existing_mapping && umem->context->invalidate_range) {
+               invalidate_page_trampoline(
+                       umem,
+                       base_virt_addr + (page_index * PAGE_SIZE),
+                       base_virt_addr + ((page_index+1)*PAGE_SIZE),
+                       NULL);
+               ret = -EAGAIN;
+       }
+
+       return ret;
+}
+
+/**
+ * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ *
+ * Pins the range of pages passed in the argument, and maps them to
+ * DMA addresses. The DMA addresses of the mapped pages is updated in
+ * umem->odp_data->dma_list.
+ *
+ * Returns the number of pages mapped in success, negative error code
+ * for failure.
+ * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
+ * the function from completing its task.
+ *
+ * @umem: the umem to map and pin
+ * @user_virt: the address from which we need to map.
+ * @bcnt: the minimal number of bytes to pin and map. The mapping might be
+ *        bigger due to alignment, and may also be smaller in case of an error
+ *        pinning or mapping a page. The actual pages mapped is returned in
+ *        the return value.
+ * @access_mask: bit mask of the requested access permissions for the given
+ *               range.
+ * @current_seq: the MMU notifiers sequance value for synchronization with
+ *               invalidations. the sequance number is read from
+ *               umem->odp_data->notifiers_seq before calling this function
+ */
+int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
+                             u64 access_mask, unsigned long current_seq)
+{
+       struct task_struct *owning_process  = NULL;
+       struct mm_struct   *owning_mm       = NULL;
+       struct page       **local_page_list = NULL;
+       u64 off;
+       int j, k, ret = 0, start_idx, npages = 0;
+       u64 base_virt_addr;
+
+       if (access_mask == 0)
+               return -EINVAL;
+
+       if (user_virt < ib_umem_start(umem) ||
+           user_virt + bcnt > ib_umem_end(umem))
+               return -EFAULT;
+
+       local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
+       if (!local_page_list)
+               return -ENOMEM;
+
+       off = user_virt & (~PAGE_MASK);
+       user_virt = user_virt & PAGE_MASK;
+       base_virt_addr = user_virt;
+       bcnt += off; /* Charge for the first page offset as well. */
+
+       owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
+       if (owning_process == NULL) {
+               ret = -EINVAL;
+               goto out_no_task;
+       }
+
+       owning_mm = get_task_mm(owning_process);
+       if (owning_mm == NULL) {
+               ret = -EINVAL;
+               goto out_put_task;
+       }
+
+       start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
+       k = start_idx;
+
+       while (bcnt > 0) {
+               const size_t gup_num_pages =
+                       min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
+                             PAGE_SIZE / sizeof(struct page *));
+
+               down_read(&owning_mm->mmap_sem);
+               /*
+                * Note: this might result in redundent page getting. We can
+                * avoid this by checking dma_list to be 0 before calling
+                * get_user_pages. However, this make the code much more
+                * complex (and doesn't gain us much performance in most use
+                * cases).
+                */
+               npages = get_user_pages(owning_process, owning_mm, user_virt,
+                                       gup_num_pages,
+                                       access_mask & ODP_WRITE_ALLOWED_BIT, 0,
+                                       local_page_list, NULL);
+               up_read(&owning_mm->mmap_sem);
+
+               if (npages < 0)
+                       break;
+
+               bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
+               user_virt += npages << PAGE_SHIFT;
+               for (j = 0; j < npages; ++j) {
+                       ret = ib_umem_odp_map_dma_single_page(
+                               umem, k, base_virt_addr, local_page_list[j],
+                               access_mask, current_seq);
+                       if (ret < 0)
+                               break;
+                       k++;
+               }
+
+               if (ret < 0) {
+                       /* Release left over pages when handling errors. */
+                       for (++j; j < npages; ++j)
+                               put_page(local_page_list[j]);
+                       break;
+               }
+       }
+
+       if (ret >= 0) {
+               if (npages < 0 && k == start_idx)
+                       ret = npages;
+               else
+                       ret = k - start_idx;
+       }
+
+       mmput(owning_mm);
+out_put_task:
+       put_task_struct(owning_process);
+out_no_task:
+       free_page((unsigned long)local_page_list);
+       return ret;
+}
+EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
+                                u64 bound)
+{
+       int idx;
+       u64 addr;
+       struct ib_device *dev = umem->context->device;
+
+       virt  = max_t(u64, virt,  ib_umem_start(umem));
+       bound = min_t(u64, bound, ib_umem_end(umem));
+       /* Note that during the run of this function, the
+        * notifiers_count of the MR is > 0, preventing any racing
+        * faults from completion. We might be racing with other
+        * invalidations, so we must make sure we free each page only
+        * once. */
+       for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
+               idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+               mutex_lock(&umem->odp_data->umem_mutex);
+               if (umem->odp_data->page_list[idx]) {
+                       struct page *page = umem->odp_data->page_list[idx];
+                       struct page *head_page = compound_head(page);
+                       dma_addr_t dma = umem->odp_data->dma_list[idx];
+                       dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
+
+                       WARN_ON(!dma_addr);
+
+                       ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+                                         DMA_BIDIRECTIONAL);
+                       if (dma & ODP_WRITE_ALLOWED_BIT)
+                               /*
+                                * set_page_dirty prefers being called with
+                                * the page lock. However, MMU notifiers are
+                                * called sometimes with and sometimes without
+                                * the lock. We rely on the umem_mutex instead
+                                * to prevent other mmu notifiers from
+                                * continuing and allowing the page mapping to
+                                * be removed.
+                                */
+                               set_page_dirty(head_page);
+                       /* on demand pinning support */
+                       if (!umem->context->invalidate_range)
+                               put_page(page);
+                       umem->odp_data->page_list[idx] = NULL;
+                       umem->odp_data->dma_list[idx] = 0;
+               }
+               mutex_unlock(&umem->odp_data->umem_mutex);
+       }
+}
+EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c

new file mode 100644 (file)

index 0000000..727d788
--- /dev/null
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+#include <rdma/ib_umem_odp.h>
+
+/*
+ * The ib_umem list keeps track of memory regions for which the HW
+ * device request to receive notification when the related memory
+ * mapping is changed.
+ *
+ * ib_umem_lock protects the list.
+ */
+
+static inline u64 node_start(struct umem_odp_node *n)
+{
+       struct ib_umem_odp *umem_odp =
+                       container_of(n, struct ib_umem_odp, interval_tree);
+
+       return ib_umem_start(umem_odp->umem);
+}
+
+/* Note that the representation of the intervals in the interval tree
+ * considers the ending point as contained in the interval, while the
+ * function ib_umem_end returns the first address which is not contained
+ * in the umem.
+ */
+static inline u64 node_last(struct umem_odp_node *n)
+{
+       struct ib_umem_odp *umem_odp =
+                       container_of(n, struct ib_umem_odp, interval_tree);
+
+       return ib_umem_end(umem_odp->umem) - 1;
+}
+
+INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
+                    node_start, node_last, , rbt_ib_umem)
+
+/* @last is not a part of the interval. See comment for function
+ * node_last.
+ */
+int rbt_ib_umem_for_each_in_range(struct rb_root *root,
+                                 u64 start, u64 last,
+                                 umem_call_back cb,
+                                 void *cookie)
+{
+       int ret_val = 0;
+       struct umem_odp_node *node;
+       struct ib_umem_odp *umem;
+
+       if (unlikely(start == last))
+               return ret_val;
+
+       for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
+                       node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+               umem = container_of(node, struct ib_umem_odp, interval_tree);
+               ret_val = cb(umem->umem, start, last, cookie) || ret_val;
+       }
+
+       return ret_val;
+}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h

index 643c08a025a52d015431b8a27be1ddcacbd36845..b716b08156446e186c9ae608f3f4e6343c6f200f 100644 (file)
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -258,5 +258,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd);
  
  IB_UVERBS_DECLARE_EX_CMD(create_flow);
  IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
+IB_UVERBS_DECLARE_EX_CMD(query_device);
  
  #endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c

index 5ba2a86aab6a8ac4a5b6f5693b6627d0832a5f6c..532d8eba8b0203ab65a2a8ed1f096389253b1dca 100644 (file)
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -36,6 +36,7 @@
  #include <linux/file.h>
  #include <linux/fs.h>
  #include <linux/slab.h>
+#include <linux/sched.h>
  
  #include <asm/uaccess.h>
  
@@ -288,6 +289,9 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
         struct ib_uverbs_get_context_resp resp;
         struct ib_udata                   udata;
         struct ib_device                 *ibdev = file->device->ib_dev;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       struct ib_device_attr             dev_attr;
+#endif
         struct ib_ucontext               *ucontext;
         struct file                      *filp;
         int ret;
@@ -325,8 +329,25 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
         INIT_LIST_HEAD(&ucontext->ah_list);
         INIT_LIST_HEAD(&ucontext->xrcd_list);
         INIT_LIST_HEAD(&ucontext->rule_list);
+       rcu_read_lock();
+       ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+       rcu_read_unlock();
         ucontext->closing = 0;
  
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       ucontext->umem_tree = RB_ROOT;
+       init_rwsem(&ucontext->umem_rwsem);
+       ucontext->odp_mrs_count = 0;
+       INIT_LIST_HEAD(&ucontext->no_private_counters);
+
+       ret = ib_query_device(ibdev, &dev_attr);
+       if (ret)
+               goto err_free;
+       if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+               ucontext->invalidate_range = NULL;
+
+#endif
+
         resp.num_comp_vectors = file->device->num_comp_vectors;
  
         ret = get_unused_fd_flags(O_CLOEXEC);
@@ -371,6 +392,7 @@ err_fd:
         put_unused_fd(resp.async_fd);
  
  err_free:
+       put_pid(ucontext->tgid);
         ibdev->dealloc_ucontext(ucontext);
  
  err:
@@ -378,6 +400,52 @@ err:
         return ret;
  }
  
+static void copy_query_dev_fields(struct ib_uverbs_file *file,
+                                 struct ib_uverbs_query_device_resp *resp,
+                                 struct ib_device_attr *attr)
+{
+       resp->fw_ver            = attr->fw_ver;
+       resp->node_guid         = file->device->ib_dev->node_guid;
+       resp->sys_image_guid    = attr->sys_image_guid;
+       resp->max_mr_size       = attr->max_mr_size;
+       resp->page_size_cap     = attr->page_size_cap;
+       resp->vendor_id         = attr->vendor_id;
+       resp->vendor_part_id    = attr->vendor_part_id;
+       resp->hw_ver            = attr->hw_ver;
+       resp->max_qp            = attr->max_qp;
+       resp->max_qp_wr         = attr->max_qp_wr;
+       resp->device_cap_flags  = attr->device_cap_flags;
+       resp->max_sge           = attr->max_sge;
+       resp->max_sge_rd        = attr->max_sge_rd;
+       resp->max_cq            = attr->max_cq;
+       resp->max_cqe           = attr->max_cqe;
+       resp->max_mr            = attr->max_mr;
+       resp->max_pd            = attr->max_pd;
+       resp->max_qp_rd_atom    = attr->max_qp_rd_atom;
+       resp->max_ee_rd_atom    = attr->max_ee_rd_atom;
+       resp->max_res_rd_atom   = attr->max_res_rd_atom;
+       resp->max_qp_init_rd_atom       = attr->max_qp_init_rd_atom;
+       resp->max_ee_init_rd_atom       = attr->max_ee_init_rd_atom;
+       resp->atomic_cap                = attr->atomic_cap;
+       resp->max_ee                    = attr->max_ee;
+       resp->max_rdd                   = attr->max_rdd;
+       resp->max_mw                    = attr->max_mw;
+       resp->max_raw_ipv6_qp           = attr->max_raw_ipv6_qp;
+       resp->max_raw_ethy_qp           = attr->max_raw_ethy_qp;
+       resp->max_mcast_grp             = attr->max_mcast_grp;
+       resp->max_mcast_qp_attach       = attr->max_mcast_qp_attach;
+       resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
+       resp->max_ah                    = attr->max_ah;
+       resp->max_fmr                   = attr->max_fmr;
+       resp->max_map_per_fmr           = attr->max_map_per_fmr;
+       resp->max_srq                   = attr->max_srq;
+       resp->max_srq_wr                = attr->max_srq_wr;
+       resp->max_srq_sge               = attr->max_srq_sge;
+       resp->max_pkeys                 = attr->max_pkeys;
+       resp->local_ca_ack_delay        = attr->local_ca_ack_delay;
+       resp->phys_port_cnt             = file->device->ib_dev->phys_port_cnt;
+}
+
  ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
                                const char __user *buf,
                                int in_len, int out_len)
@@ -398,47 +466,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
                 return ret;
  
         memset(&resp, 0, sizeof resp);
-
-       resp.fw_ver                    = attr.fw_ver;
-       resp.node_guid                 = file->device->ib_dev->node_guid;
-       resp.sys_image_guid            = attr.sys_image_guid;
-       resp.max_mr_size               = attr.max_mr_size;
-       resp.page_size_cap             = attr.page_size_cap;
-       resp.vendor_id                 = attr.vendor_id;
-       resp.vendor_part_id            = attr.vendor_part_id;
-       resp.hw_ver                    = attr.hw_ver;
-       resp.max_qp                    = attr.max_qp;
-       resp.max_qp_wr                 = attr.max_qp_wr;
-       resp.device_cap_flags          = attr.device_cap_flags;
-       resp.max_sge                   = attr.max_sge;
-       resp.max_sge_rd                = attr.max_sge_rd;
-       resp.max_cq                    = attr.max_cq;
-       resp.max_cqe                   = attr.max_cqe;
-       resp.max_mr                    = attr.max_mr;
-       resp.max_pd                    = attr.max_pd;
-       resp.max_qp_rd_atom            = attr.max_qp_rd_atom;
-       resp.max_ee_rd_atom            = attr.max_ee_rd_atom;
-       resp.max_res_rd_atom           = attr.max_res_rd_atom;
-       resp.max_qp_init_rd_atom       = attr.max_qp_init_rd_atom;
-       resp.max_ee_init_rd_atom       = attr.max_ee_init_rd_atom;
-       resp.atomic_cap                = attr.atomic_cap;
-       resp.max_ee                    = attr.max_ee;
-       resp.max_rdd                   = attr.max_rdd;
-       resp.max_mw                    = attr.max_mw;
-       resp.max_raw_ipv6_qp           = attr.max_raw_ipv6_qp;
-       resp.max_raw_ethy_qp           = attr.max_raw_ethy_qp;
-       resp.max_mcast_grp             = attr.max_mcast_grp;
-       resp.max_mcast_qp_attach       = attr.max_mcast_qp_attach;
-       resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
-       resp.max_ah                    = attr.max_ah;
-       resp.max_fmr                   = attr.max_fmr;
-       resp.max_map_per_fmr           = attr.max_map_per_fmr;
-       resp.max_srq                   = attr.max_srq;
-       resp.max_srq_wr                = attr.max_srq_wr;
-       resp.max_srq_sge               = attr.max_srq_sge;
-       resp.max_pkeys                 = attr.max_pkeys;
-       resp.local_ca_ack_delay        = attr.local_ca_ack_delay;
-       resp.phys_port_cnt             = file->device->ib_dev->phys_port_cnt;
+       copy_query_dev_fields(file, &resp, &attr);
  
         if (copy_to_user((void __user *) (unsigned long) cmd.response,
                          &resp, sizeof resp))
@@ -947,6 +975,18 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
                 goto err_free;
         }
  
+       if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
+               struct ib_device_attr attr;
+
+               ret = ib_query_device(pd->device, &attr);
+               if (ret || !(attr.device_cap_flags &
+                               IB_DEVICE_ON_DEMAND_PAGING)) {
+                       pr_debug("ODP support not available\n");
+                       ret = -EINVAL;
+                       goto err_put;
+               }
+       }
+
         mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
                                      cmd.access_flags, &udata);
         if (IS_ERR(mr)) {
@@ -3253,3 +3293,52 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
  
         return ret ? ret : in_len;
  }
+
+int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
+                             struct ib_udata *ucore,
+                             struct ib_udata *uhw)
+{
+       struct ib_uverbs_ex_query_device_resp resp;
+       struct ib_uverbs_ex_query_device  cmd;
+       struct ib_device_attr attr;
+       struct ib_device *device;
+       int err;
+
+       device = file->device->ib_dev;
+       if (ucore->inlen < sizeof(cmd))
+               return -EINVAL;
+
+       err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+       if (err)
+               return err;
+
+       if (cmd.reserved)
+               return -EINVAL;
+
+       err = device->query_device(device, &attr);
+       if (err)
+               return err;
+
+       memset(&resp, 0, sizeof(resp));
+       copy_query_dev_fields(file, &resp.base, &attr);
+       resp.comp_mask = 0;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       if (cmd.comp_mask & IB_USER_VERBS_EX_QUERY_DEVICE_ODP) {
+               resp.odp_caps.general_caps = attr.odp_caps.general_caps;
+               resp.odp_caps.per_transport_caps.rc_odp_caps =
+                       attr.odp_caps.per_transport_caps.rc_odp_caps;
+               resp.odp_caps.per_transport_caps.uc_odp_caps =
+                       attr.odp_caps.per_transport_caps.uc_odp_caps;
+               resp.odp_caps.per_transport_caps.ud_odp_caps =
+                       attr.odp_caps.per_transport_caps.ud_odp_caps;
+               resp.comp_mask |= IB_USER_VERBS_EX_QUERY_DEVICE_ODP;
+       }
+#endif
+
+       err = ib_copy_to_udata(ucore, &resp, sizeof(resp));
+       if (err)
+               return err;
+
+       return 0;
+}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c

index 71ab83fde47292e95315c2170b00fec6277b7861..e6c23b9eab336818fa785bae49f5c78c47221fbb 100644 (file)
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -122,7 +122,8 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
                                     struct ib_udata *ucore,
                                     struct ib_udata *uhw) = {
         [IB_USER_VERBS_EX_CMD_CREATE_FLOW]      = ib_uverbs_ex_create_flow,
-       [IB_USER_VERBS_EX_CMD_DESTROY_FLOW]     = ib_uverbs_ex_destroy_flow
+       [IB_USER_VERBS_EX_CMD_DESTROY_FLOW]     = ib_uverbs_ex_destroy_flow,
+       [IB_USER_VERBS_EX_CMD_QUERY_DEVICE]     = ib_uverbs_ex_query_device
  };
  
  static void ib_uverbs_add_one(struct ib_device *device);
@@ -296,6 +297,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                 kfree(uobj);
         }
  
+       put_pid(context->tgid);
+
         return context->device->dealloc_ucontext(context);
  }
  
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c

index c2b89cc5dbcad5153ef3e3d072ea0c5e3d384caa..f93eb8da7b5ad443900c3b8b423da505d0531a95 100644 (file)
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -879,7 +879,8 @@ int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
                 if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
                         rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
                         rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
-                       qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+                       if (!(*qp_attr_mask & IB_QP_VID))
+                               qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
                 } else {
                         ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
                                         qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c

index 2d5cbf4363e4de0758a4d7260341a136b3ba2544..bdf3507810cb767767cd8496ea510f9a14497f7c 100644 (file)
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -476,7 +476,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                          c2mr->umem->page_size,
                                          i,
                                          length,
-                                        c2mr->umem->offset,
+                                        ib_umem_offset(c2mr->umem),
                                          &kva,
                                          c2_convert_access(acc),
                                          c2mr);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c

index 4b8c6116c058247f4980fa6e06ec986700ef0922..9edc200b311d861a9faa3eb0c051c5c9c7fbf3cc 100644 (file)
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1640,7 +1640,8 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
                 __state_set(&ep->com, MPA_REQ_RCVD);
  
                 /* drive upcall */
-               mutex_lock(&ep->parent_ep->com.mutex);
+               mutex_lock_nested(&ep->parent_ep->com.mutex,
+                                 SINGLE_DEPTH_NESTING);
                 if (ep->parent_ep->com.state != DEAD) {
                         if (connect_request_upcall(ep))
                                 abort_connection(ep, skb, GFP_KERNEL);
@@ -3126,6 +3127,8 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
                 err = c4iw_wait_for_reply(&ep->com.dev->rdev,
                                           &ep->com.wr_wait,
                                           0, 0, __func__);
+       else if (err > 0)
+               err = net_xmit_errno(err);
         if (err)
                 pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
                        err, ep->stid,
@@ -3159,6 +3162,8 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
                         err = c4iw_wait_for_reply(&ep->com.dev->rdev,
                                                   &ep->com.wr_wait,
                                                   0, 0, __func__);
+               else if (err > 0)
+                       err = net_xmit_errno(err);
         }
         if (err)
                 pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c

index 72f1f052e88c3886596004b16243c196a789c172..eb5df4e62703d8adb254a9a4f6163a90d40ebd18 100644 (file)
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -670,7 +670,7 @@ static int ep_open(struct inode *inode, struct file *file)
         idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
         spin_unlock_irq(&epd->devp->lock);
  
-       epd->bufsize = count * 160;
+       epd->bufsize = count * 240;
         epd->buf = vmalloc(epd->bufsize);
         if (!epd->buf) {
                 ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c

index 0744455cd88b75f67a948be3f787f915ff8d7795..cb43c2299ac00b94ec4252a6074013ecf2730955 100644 (file)
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -50,6 +50,13 @@ static int inline_threshold = C4IW_INLINE_THRESHOLD;
  module_param(inline_threshold, int, 0644);
  MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)");
  
+static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length)
+{
+       return (is_t4(dev->rdev.lldi.adapter_type) ||
+               is_t5(dev->rdev.lldi.adapter_type)) &&
+               length >= 8*1024*1024*1024ULL;
+}
+
  static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
                                        u32 len, dma_addr_t data, int wait)
  {
@@ -369,9 +376,11 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
         int ret;
  
         ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
-                             FW_RI_STAG_NSMR, mhp->attr.perms,
+                             FW_RI_STAG_NSMR, mhp->attr.len ?
+                             mhp->attr.perms : 0,
                               mhp->attr.mw_bind_enable, mhp->attr.zbva,
-                             mhp->attr.va_fbo, mhp->attr.len, shift - 12,
+                             mhp->attr.va_fbo, mhp->attr.len ?
+                             mhp->attr.len : -1, shift - 12,
                               mhp->attr.pbl_size, mhp->attr.pbl_addr);
         if (ret)
                 return ret;
@@ -536,6 +545,11 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
                         return ret;
         }
  
+       if (mr_exceeds_hw_limits(rhp, total_size)) {
+               kfree(page_list);
+               return -EINVAL;
+       }
+
         ret = reregister_mem(rhp, php, &mh, shift, npages);
         kfree(page_list);
         if (ret)
@@ -596,6 +610,12 @@ struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
         if (ret)
                 goto err;
  
+       if (mr_exceeds_hw_limits(rhp, total_size)) {
+               kfree(page_list);
+               ret = -EINVAL;
+               goto err;
+       }
+
         ret = alloc_pbl(mhp, npages);
         if (ret) {
                 kfree(page_list);
@@ -699,6 +719,10 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  
         php = to_c4iw_pd(pd);
         rhp = php->rhp;
+
+       if (mr_exceeds_hw_limits(rhp, length))
+               return ERR_PTR(-EINVAL);
+
         mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
         if (!mhp)
                 return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c

index 2ed3ece2b2ee38de0bf38525914ffb85cb7ed80d..bb85d479e66eec254fa5c389e9dc4cc335248bb4 100644 (file)
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1538,9 +1538,9 @@ err:
         set_state(qhp, C4IW_QP_STATE_ERROR);
         free = 1;
         abort = 1;
-       wake_up(&qhp->wait);
         BUG_ON(!ep);
         flush_qp(qhp);
+       wake_up(&qhp->wait);
  out:
         mutex_unlock(&qhp->mutex);
  
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c

index 3488e8c9fcb44b89211e0650c47a87a2fd469e65..f914b30999f8d1519ee28edeb134762cf4796b52 100644 (file)
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -399,7 +399,7 @@ reg_user_mr_fallback:
         pginfo.num_kpages = num_kpages;
         pginfo.num_hwpages = num_hwpages;
         pginfo.u.usr.region = e_mr->umem;
-       pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
+       pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
         pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
         ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
                           e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c

index 5e61e9bff697b6c3d837e4281ff5f4231306076b..c7278f6a8217798c3572366828491aa1c57d5b49 100644 (file)
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -214,7 +214,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
         mr->mr.user_base = start;
         mr->mr.iova = virt_addr;
         mr->mr.length = length;
-       mr->mr.offset = umem->offset;
+       mr->mr.offset = ib_umem_offset(umem);
         mr->mr.access_flags = mr_access_flags;
         mr->mr.max_segs = n;
         mr->umem = umem;
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c

index 8f9325cfc85d3638dbb407573e30d3388cf3e934..c36ccbd9a644f16e85cdcb4a323cbf1803683292 100644 (file)
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -223,7 +223,6 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
  
         if (flags & IB_MR_REREG_TRANS) {
                 int shift;
-               int err;
                 int n;
  
                 mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile

index 4ea0135af48461878b5bc7d3da5cbd2065f1a4e6..27a70159e2ea8813ad7cbffc4913b3561897af34 100644 (file)
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,3 +1,4 @@
  obj-$(CONFIG_MLX5_INFINIBAND)  += mlx5_ib.o
  
  mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
+mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c

index 1ba6c42e4df80cef8bddce8dfd0bc9525e84123a..8a87404e9c76e763709e478b17b00485f38e678f 100644 (file)
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -244,6 +244,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                                            props->max_mcast_grp;
         props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
  
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       if (dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)
+               props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
+       props->odp_caps = dev->odp_caps;
+#endif
+
  out:
         kfree(in_mad);
         kfree(out_mad);
@@ -568,6 +574,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
                         goto out_count;
         }
  
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
+#endif
+
         INIT_LIST_HEAD(&context->db_page_list);
         mutex_init(&context->db_page_mutex);
  
@@ -858,7 +868,7 @@ static ssize_t show_reg_pages(struct device *device,
         struct mlx5_ib_dev *dev =
                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
  
-       return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages);
+       return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
  }
  
  static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -1321,6 +1331,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
+       dev->ib_dev.uverbs_ex_cmd_mask =
+               (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
  
         dev->ib_dev.query_device        = mlx5_ib_query_device;
         dev->ib_dev.query_port          = mlx5_ib_query_port;
@@ -1366,6 +1378,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
         dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
         dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
  
+       mlx5_ib_internal_query_odp_caps(dev);
+
         if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) {
                 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
                 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
@@ -1379,16 +1393,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                 goto err_eqs;
  
         mutex_init(&dev->cap_mask_mutex);
-       spin_lock_init(&dev->mr_lock);
  
         err = create_dev_resources(&dev->devr);
         if (err)
                 goto err_eqs;
  
-       err = ib_register_device(&dev->ib_dev, NULL);
+       err = mlx5_ib_odp_init_one(dev);
         if (err)
                 goto err_rsrc;
  
+       err = ib_register_device(&dev->ib_dev, NULL);
+       if (err)
+               goto err_odp;
+
         err = create_umr_res(dev);
         if (err)
                 goto err_dev;
@@ -1410,6 +1427,9 @@ err_umrc:
  err_dev:
         ib_unregister_device(&dev->ib_dev);
  
+err_odp:
+       mlx5_ib_odp_remove_one(dev);
+
  err_rsrc:
         destroy_dev_resources(&dev->devr);
  
@@ -1425,8 +1445,10 @@ err_dealloc:
  static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
  {
         struct mlx5_ib_dev *dev = context;
+
         ib_unregister_device(&dev->ib_dev);
         destroy_umrc_res(dev);
+       mlx5_ib_odp_remove_one(dev);
         destroy_dev_resources(&dev->devr);
         free_comp_eqs(dev);
         ib_dealloc_device(&dev->ib_dev);
@@ -1440,15 +1462,30 @@ static struct mlx5_interface mlx5_ib_interface = {
  
  static int __init mlx5_ib_init(void)
  {
+       int err;
+
         if (deprecated_prof_sel != 2)
                 pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
  
-       return mlx5_register_interface(&mlx5_ib_interface);
+       err = mlx5_ib_odp_init();
+       if (err)
+               return err;
+
+       err = mlx5_register_interface(&mlx5_ib_interface);
+       if (err)
+               goto clean_odp;
+
+       return err;
+
+clean_odp:
+       mlx5_ib_odp_cleanup();
+       return err;
  }
  
  static void __exit mlx5_ib_cleanup(void)
  {
         mlx5_unregister_interface(&mlx5_ib_interface);
+       mlx5_ib_odp_cleanup();
  }
  
  module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c

index dae07eae95073b8d08b42de972e62600560d17e3..b56e4c5593ee92f3fe69d542c77919ee93e3cfc7 100644 (file)
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -32,6 +32,7 @@
  
  #include <linux/module.h>
  #include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
  #include "mlx5_ib.h"
  
  /* @umem: umem object to scan
@@ -57,6 +58,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
         int entry;
         unsigned long page_shift = ilog2(umem->page_size);
  
+       /* With ODP we must always match OS page size. */
+       if (umem->odp_data) {
+               *count = ib_umem_page_count(umem);
+               *shift = PAGE_SHIFT;
+               *ncont = *count;
+               if (order)
+                       *order = ilog2(roundup_pow_of_two(*count));
+
+               return;
+       }
+
         addr = addr >> page_shift;
         tmp = (unsigned long)addr;
         m = find_first_bit(&tmp, sizeof(tmp));
@@ -108,8 +120,36 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
         *count = i;
  }
  
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-                         int page_shift, __be64 *pas, int umr)
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
+{
+       u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
+
+       if (umem_dma & ODP_READ_ALLOWED_BIT)
+               mtt_entry |= MLX5_IB_MTT_READ;
+       if (umem_dma & ODP_WRITE_ALLOWED_BIT)
+               mtt_entry |= MLX5_IB_MTT_WRITE;
+
+       return mtt_entry;
+}
+#endif
+
+/*
+ * Populate the given array with bus addresses from the umem.
+ *
+ * dev - mlx5_ib device
+ * umem - umem to use to fill the pages
+ * page_shift - determines the page size used in the resulting array
+ * offset - offset into the umem to start from,
+ *          only implemented for ODP umems
+ * num_pages - total number of pages to fill
+ * pas - bus addresses array to fill
+ * access_flags - access flags to set on all present pages.
+                 use enum mlx5_ib_mtt_access_flags for this.
+ */
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+                           int page_shift, size_t offset, size_t num_pages,
+                           __be64 *pas, int access_flags)
  {
         unsigned long umem_page_shift = ilog2(umem->page_size);
         int shift = page_shift - umem_page_shift;
@@ -120,6 +160,21 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
         int len;
         struct scatterlist *sg;
         int entry;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       const bool odp = umem->odp_data != NULL;
+
+       if (odp) {
+               WARN_ON(shift != 0);
+               WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
+
+               for (i = 0; i < num_pages; ++i) {
+                       dma_addr_t pa = umem->odp_data->dma_list[offset + i];
+
+                       pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
+               }
+               return;
+       }
+#endif
  
         i = 0;
         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
@@ -128,8 +183,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
                 for (k = 0; k < len; k++) {
                         if (!(i & mask)) {
                                 cur = base + (k << umem_page_shift);
-                               if (umr)
-                                       cur |= 3;
+                               cur |= access_flags;
  
                                 pas[i >> shift] = cpu_to_be64(cur);
                                 mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
@@ -142,6 +196,13 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
         }
  }
  
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+                         int page_shift, __be64 *pas, int access_flags)
+{
+       return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
+                                     ib_umem_num_pages(umem), pas,
+                                     access_flags);
+}
  int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
  {
         u64 page_size;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h

index 386780f0d1e18304927d8a3c42705c4be916b731..83f22fe297c8ac522efa80001819990cdc31508b 100644 (file)
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -111,6 +111,8 @@ struct mlx5_ib_pd {
   */
  
  #define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
+#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
+#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
  #define MLX5_IB_QPT_REG_UMR    IB_QPT_RESERVED1
  #define MLX5_IB_WR_UMR         IB_WR_RESERVED1
  
@@ -147,6 +149,29 @@ enum {
         MLX5_QP_EMPTY
  };
  
+/*
+ * Connect-IB can trigger up to four concurrent pagefaults
+ * per-QP.
+ */
+enum mlx5_ib_pagefault_context {
+       MLX5_IB_PAGEFAULT_RESPONDER_READ,
+       MLX5_IB_PAGEFAULT_REQUESTOR_READ,
+       MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
+       MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
+       MLX5_IB_PAGEFAULT_CONTEXTS
+};
+
+static inline enum mlx5_ib_pagefault_context
+       mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
+{
+       return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
+}
+
+struct mlx5_ib_pfault {
+       struct work_struct      work;
+       struct mlx5_pagefault   mpfault;
+};
+
  struct mlx5_ib_qp {
         struct ib_qp            ibqp;
         struct mlx5_core_qp     mqp;
@@ -192,6 +217,21 @@ struct mlx5_ib_qp {
  
         /* Store signature errors */
         bool                    signature_en;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       /*
+        * A flag that is true for QP's that are in a state that doesn't
+        * allow page faults, and shouldn't schedule any more faults.
+        */
+       int                     disable_page_faults;
+       /*
+        * The disable_page_faults_lock protects a QP's disable_page_faults
+        * field, allowing for a thread to atomically check whether the QP
+        * allows page faults, and if so schedule a page fault.
+        */
+       spinlock_t              disable_page_faults_lock;
+       struct mlx5_ib_pfault   pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
+#endif
  };
  
  struct mlx5_ib_cq_buf {
@@ -206,6 +246,19 @@ enum mlx5_ib_qp_flags {
         MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
  };
  
+struct mlx5_umr_wr {
+       union {
+               u64                     virt_addr;
+               u64                     offset;
+       } target;
+       struct ib_pd                   *pd;
+       unsigned int                    page_shift;
+       unsigned int                    npages;
+       u32                             length;
+       int                             access_flags;
+       u32                             mkey;
+};
+
  struct mlx5_shared_mr_info {
         int mr_id;
         struct ib_umem          *umem;
@@ -253,6 +306,13 @@ struct mlx5_ib_xrcd {
         u32                     xrcdn;
  };
  
+enum mlx5_ib_mtt_access_flags {
+       MLX5_IB_MTT_READ  = (1 << 0),
+       MLX5_IB_MTT_WRITE = (1 << 1),
+};
+
+#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
+
  struct mlx5_ib_mr {
         struct ib_mr            ibmr;
         struct mlx5_core_mr     mmr;
@@ -261,12 +321,11 @@ struct mlx5_ib_mr {
         struct list_head        list;
         int                     order;
         int                     umred;
-       __be64                  *pas;
-       dma_addr_t              dma;
         int                     npages;
         struct mlx5_ib_dev     *dev;
         struct mlx5_create_mkey_mbox_out out;
         struct mlx5_core_sig_ctx    *sig;
+       int                     live;
  };
  
  struct mlx5_ib_fast_reg_page_list {
@@ -372,11 +431,18 @@ struct mlx5_ib_dev {
         struct umr_common               umrc;
         /* sync used page count stats
          */
-       spinlock_t                      mr_lock;
         struct mlx5_ib_resources        devr;
         struct mlx5_mr_cache            cache;
         struct timer_list               delay_timer;
         int                             fill_delay;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       struct ib_odp_caps      odp_caps;
+       /*
+        * Sleepable RCU that prevents destruction of MRs while they are still
+        * being used by a page fault handler.
+        */
+       struct srcu_struct      mr_srcu;
+#endif
  };
  
  static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -490,6 +556,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                       struct ib_recv_wr **bad_wr);
  void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+                         void *buffer, u32 length);
  struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
                                 int vector, struct ib_ucontext *context,
                                 struct ib_udata *udata);
@@ -502,6 +570,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
  struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                   u64 virt_addr, int access_flags,
                                   struct ib_udata *udata);
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
+                      int npages, int zap);
  int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
  int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
  struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
@@ -533,8 +603,11 @@ int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
  void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
  void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
                         int *ncont, int *order);
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+                           int page_shift, size_t offset, size_t num_pages,
+                           __be64 *pas, int access_flags);
  void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-                         int page_shift, __be64 *pas, int umr);
+                         int page_shift, __be64 *pas, int access_flags);
  void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
  int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
  int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
@@ -544,6 +617,38 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
  int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
                             struct ib_mr_status *mr_status);
  
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+extern struct workqueue_struct *mlx5_ib_page_fault_wq;
+
+int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev);
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+                              struct mlx5_ib_pfault *pfault);
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
+int __init mlx5_ib_odp_init(void);
+void mlx5_ib_odp_cleanup(void);
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
+void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+                             unsigned long end);
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
+{
+       return 0;
+}
+
+static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)                {}
+static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)   {}
+static inline int mlx5_ib_odp_init(void) { return 0; }
+static inline void mlx5_ib_odp_cleanup(void)                           {}
+static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
+static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)  {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
  static inline void init_query_mad(struct ib_smp *mad)
  {
         mad->base_version  = 1;
@@ -561,4 +666,7 @@ static inline u8 convert_access(int acc)
                MLX5_PERM_LOCAL_READ;
  }
  
+#define MLX5_MAX_UMR_SHIFT 16
+#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+
  #endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c

index 5a80dd9937612f5804d64135838ff389570e6b6e..32a28bd50b20ae08c41a9a7086b72045f7934c3d 100644 (file)
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -37,21 +37,34 @@
  #include <linux/export.h>
  #include <linux/delay.h>
  #include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/ib_verbs.h>
  #include "mlx5_ib.h"
  
  enum {
         MAX_PENDING_REG_MR = 8,
  };
  
-enum {
-       MLX5_UMR_ALIGN  = 2048
-};
+#define MLX5_UMR_ALIGN 2048
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static __be64 mlx5_ib_update_mtt_emergency_buffer[
+               MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
+       __aligned(MLX5_UMR_ALIGN);
+static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
+#endif
+
+static int clean_mr(struct mlx5_ib_mr *mr);
  
-static __be64 *mr_align(__be64 *ptr, int align)
+static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  {
-       unsigned long mask = align - 1;
+       int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
  
-       return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       /* Wait until all page fault handlers using the mr complete. */
+       synchronize_srcu(&dev->mr_srcu);
+#endif
+
+       return err;
  }
  
  static int order2idx(struct mlx5_ib_dev *dev, int order)
@@ -146,7 +159,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
                 mr->order = ent->order;
                 mr->umred = 1;
                 mr->dev = dev;
-               in->seg.status = 1 << 6;
+               in->seg.status = MLX5_MKEY_STATUS_FREE;
                 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
                 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
                 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
@@ -191,7 +204,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
                 ent->cur--;
                 ent->size--;
                 spin_unlock_irq(&ent->lock);
-               err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+               err = destroy_mkey(dev, mr);
                 if (err)
                         mlx5_ib_warn(dev, "failed destroy mkey\n");
                 else
@@ -482,7 +495,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
                 ent->cur--;
                 ent->size--;
                 spin_unlock_irq(&ent->lock);
-               err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+               err = destroy_mkey(dev, mr);
                 if (err)
                         mlx5_ib_warn(dev, "failed destroy mkey\n");
                 else
@@ -668,7 +681,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
  
  static int use_umr(int order)
  {
-       return order <= 17;
+       return order <= MLX5_MAX_UMR_SHIFT;
  }
  
  static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
@@ -678,6 +691,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
  {
         struct mlx5_ib_dev *dev = to_mdev(pd->device);
         struct ib_mr *mr = dev->umrc.mr;
+       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
  
         sg->addr = dma;
         sg->length = ALIGN(sizeof(u64) * n, 64);
@@ -692,21 +706,24 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
                 wr->num_sge = 0;
  
         wr->opcode = MLX5_IB_WR_UMR;
-       wr->wr.fast_reg.page_list_len = n;
-       wr->wr.fast_reg.page_shift = page_shift;
-       wr->wr.fast_reg.rkey = key;
-       wr->wr.fast_reg.iova_start = virt_addr;
-       wr->wr.fast_reg.length = len;
-       wr->wr.fast_reg.access_flags = access_flags;
-       wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
+
+       umrwr->npages = n;
+       umrwr->page_shift = page_shift;
+       umrwr->mkey = key;
+       umrwr->target.virt_addr = virt_addr;
+       umrwr->length = len;
+       umrwr->access_flags = access_flags;
+       umrwr->pd = pd;
  }
  
  static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
                                struct ib_send_wr *wr, u32 key)
  {
-       wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
+       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
+       wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
         wr->opcode = MLX5_IB_WR_UMR;
-       wr->wr.fast_reg.rkey = key;
+       umrwr->mkey = key;
  }
  
  void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
@@ -742,7 +759,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
         struct ib_send_wr wr, *bad;
         struct mlx5_ib_mr *mr;
         struct ib_sge sg;
-       int size = sizeof(u64) * npages;
+       int size;
+       __be64 *mr_pas;
+       __be64 *pas;
+       dma_addr_t dma;
         int err = 0;
         int i;
  
@@ -761,25 +781,31 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
         if (!mr)
                 return ERR_PTR(-EAGAIN);
  
-       mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
-       if (!mr->pas) {
+       /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
+        * To avoid copying garbage after the pas array, we allocate
+        * a little more. */
+       size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
+       mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+       if (!mr_pas) {
                 err = -ENOMEM;
                 goto free_mr;
         }
  
-       mlx5_ib_populate_pas(dev, umem, page_shift,
-                            mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
+       pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
+       mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
+       /* Clear padding after the actual pages. */
+       memset(pas + npages, 0, size - npages * sizeof(u64));
  
-       mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
-                                DMA_TO_DEVICE);
-       if (dma_mapping_error(ddev, mr->dma)) {
+       dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+       if (dma_mapping_error(ddev, dma)) {
                 err = -ENOMEM;
                 goto free_pas;
         }
  
         memset(&wr, 0, sizeof(wr));
         wr.wr_id = (u64)(unsigned long)&umr_context;
-       prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
+       prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
+                        virt_addr, len, access_flags);
  
         mlx5_ib_init_umr_context(&umr_context);
         down(&umrc->sem);
@@ -799,12 +825,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
         mr->mmr.size = len;
         mr->mmr.pd = to_mpd(pd)->pdn;
  
+       mr->live = 1;
+
  unmap_dma:
         up(&umrc->sem);
-       dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+       dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
  
  free_pas:
-       kfree(mr->pas);
+       kfree(mr_pas);
  
  free_mr:
         if (err) {
@@ -815,6 +843,128 @@ free_mr:
         return mr;
  }
  
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
+                      int zap)
+{
+       struct mlx5_ib_dev *dev = mr->dev;
+       struct device *ddev = dev->ib_dev.dma_device;
+       struct umr_common *umrc = &dev->umrc;
+       struct mlx5_ib_umr_context umr_context;
+       struct ib_umem *umem = mr->umem;
+       int size;
+       __be64 *pas;
+       dma_addr_t dma;
+       struct ib_send_wr wr, *bad;
+       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
+       struct ib_sge sg;
+       int err = 0;
+       const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
+       const int page_index_mask = page_index_alignment - 1;
+       size_t pages_mapped = 0;
+       size_t pages_to_map = 0;
+       size_t pages_iter = 0;
+       int use_emergency_buf = 0;
+
+       /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
+        * so we need to align the offset and length accordingly */
+       if (start_page_index & page_index_mask) {
+               npages += start_page_index & page_index_mask;
+               start_page_index &= ~page_index_mask;
+       }
+
+       pages_to_map = ALIGN(npages, page_index_alignment);
+
+       if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
+               return -EINVAL;
+
+       size = sizeof(u64) * pages_to_map;
+       size = min_t(int, PAGE_SIZE, size);
+       /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
+        * code, when we are called from an invalidation. The pas buffer must
+        * be 2k-aligned for Connect-IB. */
+       pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
+       if (!pas) {
+               mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
+               pas = mlx5_ib_update_mtt_emergency_buffer;
+               size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
+               use_emergency_buf = 1;
+               mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+               memset(pas, 0, size);
+       }
+       pages_iter = size / sizeof(u64);
+       dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+       if (dma_mapping_error(ddev, dma)) {
+               mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
+               err = -ENOMEM;
+               goto free_pas;
+       }
+
+       for (pages_mapped = 0;
+            pages_mapped < pages_to_map && !err;
+            pages_mapped += pages_iter, start_page_index += pages_iter) {
+               dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
+
+               npages = min_t(size_t,
+                              pages_iter,
+                              ib_umem_num_pages(umem) - start_page_index);
+
+               if (!zap) {
+                       __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
+                                              start_page_index, npages, pas,
+                                              MLX5_IB_MTT_PRESENT);
+                       /* Clear padding after the pages brought from the
+                        * umem. */
+                       memset(pas + npages, 0, size - npages * sizeof(u64));
+               }
+
+               dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+
+               memset(&wr, 0, sizeof(wr));
+               wr.wr_id = (u64)(unsigned long)&umr_context;
+
+               sg.addr = dma;
+               sg.length = ALIGN(npages * sizeof(u64),
+                               MLX5_UMR_MTT_ALIGNMENT);
+               sg.lkey = dev->umrc.mr->lkey;
+
+               wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+                               MLX5_IB_SEND_UMR_UPDATE_MTT;
+               wr.sg_list = &sg;
+               wr.num_sge = 1;
+               wr.opcode = MLX5_IB_WR_UMR;
+               umrwr->npages = sg.length / sizeof(u64);
+               umrwr->page_shift = PAGE_SHIFT;
+               umrwr->mkey = mr->mmr.key;
+               umrwr->target.offset = start_page_index;
+
+               mlx5_ib_init_umr_context(&umr_context);
+               down(&umrc->sem);
+               err = ib_post_send(umrc->qp, &wr, &bad);
+               if (err) {
+                       mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
+               } else {
+                       wait_for_completion(&umr_context.done);
+                       if (umr_context.status != IB_WC_SUCCESS) {
+                               mlx5_ib_err(dev, "UMR completion failed, code %d\n",
+                                           umr_context.status);
+                               err = -EFAULT;
+                       }
+               }
+               up(&umrc->sem);
+       }
+       dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+
+free_pas:
+       if (!use_emergency_buf)
+               free_page((unsigned long)pas);
+       else
+               mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+
+       return err;
+}
+#endif
+
  static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
                                      u64 length, struct ib_umem *umem,
                                      int npages, int page_shift,
@@ -825,6 +975,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
         struct mlx5_ib_mr *mr;
         int inlen;
         int err;
+       bool pg_cap = !!(dev->mdev->caps.gen.flags &
+                        MLX5_DEV_CAP_FLAG_ON_DMND_PG);
  
         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
         if (!mr)
@@ -836,8 +988,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
                 err = -ENOMEM;
                 goto err_1;
         }
-       mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
+       mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
+                            pg_cap ? MLX5_IB_MTT_PRESENT : 0);
  
+       /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
+        * in the page list submitted with the command. */
+       in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
         in->seg.flags = convert_access(access_flags) |
                 MLX5_ACCESS_MODE_MTT;
         in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
@@ -856,6 +1012,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
                 goto err_2;
         }
         mr->umem = umem;
+       mr->live = 1;
         kvfree(in);
  
         mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
@@ -910,6 +1067,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                         mlx5_ib_dbg(dev, "cache empty for order %d", order);
                         mr = NULL;
                 }
+       } else if (access_flags & IB_ACCESS_ON_DEMAND) {
+               err = -EINVAL;
+               pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
+               goto error;
         }
  
         if (!mr)
@@ -925,16 +1086,51 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  
         mr->umem = umem;
         mr->npages = npages;
-       spin_lock(&dev->mr_lock);
-       dev->mdev->priv.reg_pages += npages;
-       spin_unlock(&dev->mr_lock);
+       atomic_add(npages, &dev->mdev->priv.reg_pages);
         mr->ibmr.lkey = mr->mmr.key;
         mr->ibmr.rkey = mr->mmr.key;
  
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       if (umem->odp_data) {
+               /*
+                * This barrier prevents the compiler from moving the
+                * setting of umem->odp_data->private to point to our
+                * MR, before reg_umr finished, to ensure that the MR
+                * initialization have finished before starting to
+                * handle invalidations.
+                */
+               smp_wmb();
+               mr->umem->odp_data->private = mr;
+               /*
+                * Make sure we will see the new
+                * umem->odp_data->private value in the invalidation
+                * routines, before we can get page faults on the
+                * MR. Page faults can happen once we put the MR in
+                * the tree, below this line. Without the barrier,
+                * there can be a fault handling and an invalidation
+                * before umem->odp_data->private == mr is visible to
+                * the invalidation handler.
+                */
+               smp_wmb();
+       }
+#endif
+
         return &mr->ibmr;
  
  error:
+       /*
+        * Destroy the umem *before* destroying the MR, to ensure we
+        * will not have any in-flight notifiers when destroying the
+        * MR.
+        *
+        * As the MR is completely invalid to begin with, and this
+        * error path is only taken if we can't push the mr entry into
+        * the pagefault tree, this is safe.
+        */
+
         ib_umem_release(umem);
+       /* Kill the MR, and return an error code. */
+       clean_mr(mr);
         return ERR_PTR(err);
  }
  
@@ -971,17 +1167,14 @@ error:
         return err;
  }
  
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+static int clean_mr(struct mlx5_ib_mr *mr)
  {
-       struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
-       struct mlx5_ib_mr *mr = to_mmr(ibmr);
-       struct ib_umem *umem = mr->umem;
-       int npages = mr->npages;
+       struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
         int umred = mr->umred;
         int err;
  
         if (!umred) {
-               err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+               err = destroy_mkey(dev, mr);
                 if (err) {
                         mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
                                      mr->mmr.key, err);
@@ -996,15 +1189,47 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
                 free_cached_mr(dev, mr);
         }
  
-       if (umem) {
+       if (!umred)
+               kfree(mr);
+
+       return 0;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+       struct mlx5_ib_mr *mr = to_mmr(ibmr);
+       int npages = mr->npages;
+       struct ib_umem *umem = mr->umem;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       if (umem && umem->odp_data) {
+               /* Prevent new page faults from succeeding */
+               mr->live = 0;
+               /* Wait for all running page-fault handlers to finish. */
+               synchronize_srcu(&dev->mr_srcu);
+               /* Destroy all page mappings */
+               mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+                                        ib_umem_end(umem));
+               /*
+                * We kill the umem before the MR for ODP,
+                * so that there will not be any invalidations in
+                * flight, looking at the *mr struct.
+                */
                 ib_umem_release(umem);
-               spin_lock(&dev->mr_lock);
-               dev->mdev->priv.reg_pages -= npages;
-               spin_unlock(&dev->mr_lock);
+               atomic_sub(npages, &dev->mdev->priv.reg_pages);
+
+               /* Avoid double-freeing the umem. */
+               umem = NULL;
         }
+#endif
  
-       if (!umred)
-               kfree(mr);
+       clean_mr(mr);
+
+       if (umem) {
+               ib_umem_release(umem);
+               atomic_sub(npages, &dev->mdev->priv.reg_pages);
+       }
  
         return 0;
  }
@@ -1028,7 +1253,7 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
                 goto err_free;
         }
  
-       in->seg.status = 1 << 6; /* free */
+       in->seg.status = MLX5_MKEY_STATUS_FREE;
         in->seg.xlt_oct_size = cpu_to_be32(ndescs);
         in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
         in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
@@ -1113,7 +1338,7 @@ int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
                 kfree(mr->sig);
         }
  
-       err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+       err = destroy_mkey(dev, mr);
         if (err) {
                 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
                              mr->mmr.key, err);
@@ -1143,7 +1368,7 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
                 goto err_free;
         }
  
-       in->seg.status = 1 << 6; /* free */
+       in->seg.status = MLX5_MKEY_STATUS_FREE;
         in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
         in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
         in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c

new file mode 100644 (file)

index 0000000..a2c541c
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -0,0 +1,798 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+
+#include "mlx5_ib.h"
+
+#define MAX_PREFETCH_LEN (4*1024*1024U)
+
+/* Timeout in ms to wait for an active mmu notifier to complete when handling
+ * a pagefault. */
+#define MMU_NOTIFIER_TIMEOUT 1000
+
+struct workqueue_struct *mlx5_ib_page_fault_wq;
+
+void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+                             unsigned long end)
+{
+       struct mlx5_ib_mr *mr;
+       const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(u64)) - 1;
+       u64 idx = 0, blk_start_idx = 0;
+       int in_block = 0;
+       u64 addr;
+
+       if (!umem || !umem->odp_data) {
+               pr_err("invalidation called on NULL umem or non-ODP umem\n");
+               return;
+       }
+
+       mr = umem->odp_data->private;
+
+       if (!mr || !mr->ibmr.pd)
+               return;
+
+       start = max_t(u64, ib_umem_start(umem), start);
+       end = min_t(u64, ib_umem_end(umem), end);
+
+       /*
+        * Iteration one - zap the HW's MTTs. The notifiers_count ensures that
+        * while we are doing the invalidation, no page fault will attempt to
+        * overwrite the same MTTs.  Concurent invalidations might race us,
+        * but they will write 0s as well, so no difference in the end result.
+        */
+
+       for (addr = start; addr < end; addr += (u64)umem->page_size) {
+               idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+               /*
+                * Strive to write the MTTs in chunks, but avoid overwriting
+                * non-existing MTTs. The huristic here can be improved to
+                * estimate the cost of another UMR vs. the cost of bigger
+                * UMR.
+                */
+               if (umem->odp_data->dma_list[idx] &
+                   (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
+                       if (!in_block) {
+                               blk_start_idx = idx;
+                               in_block = 1;
+                       }
+               } else {
+                       u64 umr_offset = idx & umr_block_mask;
+
+                       if (in_block && umr_offset == 0) {
+                               mlx5_ib_update_mtt(mr, blk_start_idx,
+                                                  idx - blk_start_idx, 1);
+                               in_block = 0;
+                       }
+               }
+       }
+       if (in_block)
+               mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1,
+                                  1);
+
+       /*
+        * We are now sure that the device will not access the
+        * memory. We can safely unmap it, and mark it as dirty if
+        * needed.
+        */
+
+       ib_umem_odp_unmap_dma_pages(umem, start, end);
+}
+
+#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do {        \
+       if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name)  \
+               ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name;       \
+} while (0)
+
+int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
+{
+       int err;
+       struct mlx5_odp_caps hw_caps;
+       struct ib_odp_caps *caps = &dev->odp_caps;
+
+       memset(caps, 0, sizeof(*caps));
+
+       if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG))
+               return 0;
+
+       err = mlx5_query_odp_caps(dev->mdev, &hw_caps);
+       if (err)
+               goto out;
+
+       caps->general_caps = IB_ODP_SUPPORT;
+       COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.ud_odp_caps,
+                              SEND);
+       COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+                              SEND);
+       COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+                              RECV);
+       COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+                              WRITE);
+       COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+                              READ);
+
+out:
+       return err;
+}
+
+static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
+                                                  u32 key)
+{
+       u32 base_key = mlx5_base_mkey(key);
+       struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key);
+       struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr);
+
+       if (!mmr || mmr->key != key || !mr->live)
+               return NULL;
+
+       return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
+static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
+                                     struct mlx5_ib_pfault *pfault,
+                                     int error) {
+       struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+       int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+                                             pfault->mpfault.flags,
+                                             error);
+       if (ret)
+               pr_err("Failed to resolve the page fault on QP 0x%x\n",
+                      qp->mqp.qpn);
+}
+
+/*
+ * Handle a single data segment in a page-fault WQE.
+ *
+ * Returns number of pages retrieved on success. The caller will continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ *  page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ *  abort the page fault handling and possibly move the QP to an error state.
+ * On other errors the QP should also be closed with an error.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
+                                        struct mlx5_ib_pfault *pfault,
+                                        u32 key, u64 io_virt, size_t bcnt,
+                                        u32 *bytes_mapped)
+{
+       struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device);
+       int srcu_key;
+       unsigned int current_seq;
+       u64 start_idx;
+       int npages = 0, ret = 0;
+       struct mlx5_ib_mr *mr;
+       u64 access_mask = ODP_READ_ALLOWED_BIT;
+
+       srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
+       mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
+       /*
+        * If we didn't find the MR, it means the MR was closed while we were
+        * handling the ODP event. In this case we return -EFAULT so that the
+        * QP will be closed.
+        */
+       if (!mr || !mr->ibmr.pd) {
+               pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+                      key);
+               ret = -EFAULT;
+               goto srcu_unlock;
+       }
+       if (!mr->umem->odp_data) {
+               pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+                        key);
+               if (bytes_mapped)
+                       *bytes_mapped +=
+                               (bcnt - pfault->mpfault.bytes_committed);
+               goto srcu_unlock;
+       }
+       if (mr->ibmr.pd != qp->ibqp.pd) {
+               pr_err("Page-fault with different PDs for QP and MR.\n");
+               ret = -EFAULT;
+               goto srcu_unlock;
+       }
+
+       current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
+       /*
+        * Ensure the sequence number is valid for some time before we call
+        * gup.
+        */
+       smp_rmb();
+
+       /*
+        * Avoid branches - this code will perform correctly
+        * in all iterations (in iteration 2 and above,
+        * bytes_committed == 0).
+        */
+       io_virt += pfault->mpfault.bytes_committed;
+       bcnt -= pfault->mpfault.bytes_committed;
+
+       start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT;
+
+       if (mr->umem->writable)
+               access_mask |= ODP_WRITE_ALLOWED_BIT;
+       npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
+                                          access_mask, current_seq);
+       if (npages < 0) {
+               ret = npages;
+               goto srcu_unlock;
+       }
+
+       if (npages > 0) {
+               mutex_lock(&mr->umem->odp_data->umem_mutex);
+               if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+                       /*
+                        * No need to check whether the MTTs really belong to
+                        * this MR, since ib_umem_odp_map_dma_pages already
+                        * checks this.
+                        */
+                       ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0);
+               } else {
+                       ret = -EAGAIN;
+               }
+               mutex_unlock(&mr->umem->odp_data->umem_mutex);
+               if (ret < 0) {
+                       if (ret != -EAGAIN)
+                               pr_err("Failed to update mkey page tables\n");
+                       goto srcu_unlock;
+               }
+
+               if (bytes_mapped) {
+                       u32 new_mappings = npages * PAGE_SIZE -
+                               (io_virt - round_down(io_virt, PAGE_SIZE));
+                       *bytes_mapped += min_t(u32, new_mappings, bcnt);
+               }
+       }
+
+srcu_unlock:
+       if (ret == -EAGAIN) {
+               if (!mr->umem->odp_data->dying) {
+                       struct ib_umem_odp *odp_data = mr->umem->odp_data;
+                       unsigned long timeout =
+                               msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
+
+                       if (!wait_for_completion_timeout(
+                                       &odp_data->notifier_completion,
+                                       timeout)) {
+                               pr_warn("timeout waiting for mmu notifier completion\n");
+                       }
+               } else {
+                       /* The MR is being killed, kill the QP as well. */
+                       ret = -EFAULT;
+               }
+       }
+       srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
+       pfault->mpfault.bytes_committed = 0;
+       return ret ? ret : npages;
+}
+
+/**
+ * Parse a series of data segments for page fault handling.
+ *
+ * @qp the QP on which the fault occurred.
+ * @pfault contains page fault information.
+ * @wqe points at the first data segment in the WQE.
+ * @wqe_end points after the end of the WQE.
+ * @bytes_mapped receives the number of bytes that the function was able to
+ *               map. This allows the caller to decide intelligently whether
+ *               enough memory was mapped to resolve the page fault
+ *               successfully (e.g. enough for the next MTU, or the entire
+ *               WQE).
+ * @total_wqe_bytes receives the total data size of this WQE in bytes (minus
+ *                  the committed bytes).
+ *
+ * Returns the number of pages loaded if positive, zero for an empty WQE, or a
+ * negative error code.
+ */
+static int pagefault_data_segments(struct mlx5_ib_qp *qp,
+                                  struct mlx5_ib_pfault *pfault, void *wqe,
+                                  void *wqe_end, u32 *bytes_mapped,
+                                  u32 *total_wqe_bytes, int receive_queue)
+{
+       int ret = 0, npages = 0;
+       u64 io_virt;
+       u32 key;
+       u32 byte_count;
+       size_t bcnt;
+       int inline_segment;
+
+       /* Skip SRQ next-WQE segment. */
+       if (receive_queue && qp->ibqp.srq)
+               wqe += sizeof(struct mlx5_wqe_srq_next_seg);
+
+       if (bytes_mapped)
+               *bytes_mapped = 0;
+       if (total_wqe_bytes)
+               *total_wqe_bytes = 0;
+
+       while (wqe < wqe_end) {
+               struct mlx5_wqe_data_seg *dseg = wqe;
+
+               io_virt = be64_to_cpu(dseg->addr);
+               key = be32_to_cpu(dseg->lkey);
+               byte_count = be32_to_cpu(dseg->byte_count);
+               inline_segment = !!(byte_count &  MLX5_INLINE_SEG);
+               bcnt           = byte_count & ~MLX5_INLINE_SEG;
+
+               if (inline_segment) {
+                       bcnt = bcnt & MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK;
+                       wqe += ALIGN(sizeof(struct mlx5_wqe_inline_seg) + bcnt,
+                                    16);
+               } else {
+                       wqe += sizeof(*dseg);
+               }
+
+               /* receive WQE end of sg list. */
+               if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY &&
+                   io_virt == 0)
+                       break;
+
+               if (!inline_segment && total_wqe_bytes) {
+                       *total_wqe_bytes += bcnt - min_t(size_t, bcnt,
+                                       pfault->mpfault.bytes_committed);
+               }
+
+               /* A zero length data segment designates a length of 2GB. */
+               if (bcnt == 0)
+                       bcnt = 1U << 31;
+
+               if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) {
+                       pfault->mpfault.bytes_committed -=
+                               min_t(size_t, bcnt,
+                                     pfault->mpfault.bytes_committed);
+                       continue;
+               }
+
+               ret = pagefault_single_data_segment(qp, pfault, key, io_virt,
+                                                   bcnt, bytes_mapped);
+               if (ret < 0)
+                       break;
+               npages += ret;
+       }
+
+       return ret < 0 ? ret : npages;
+}
+
+/*
+ * Parse initiator WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_initiator_pfault_handler(
+       struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+       void **wqe, void **wqe_end, int wqe_length)
+{
+       struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+       struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
+       u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+       unsigned ds, opcode;
+#if defined(DEBUG)
+       u32 ctrl_wqe_index, ctrl_qpn;
+#endif
+
+       ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+       if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
+               mlx5_ib_err(dev, "Unable to read the complete WQE. ds = 0x%x, ret = 0x%x\n",
+                           ds, wqe_length);
+               return -EFAULT;
+       }
+
+       if (ds == 0) {
+               mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
+                           wqe_index, qp->mqp.qpn);
+               return -EFAULT;
+       }
+
+#if defined(DEBUG)
+       ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
+                       MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
+                       MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
+       if (wqe_index != ctrl_wqe_index) {
+               mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
+                           wqe_index, qp->mqp.qpn,
+                           ctrl_wqe_index);
+               return -EFAULT;
+       }
+
+       ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
+               MLX5_WQE_CTRL_QPN_SHIFT;
+       if (qp->mqp.qpn != ctrl_qpn) {
+               mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
+                           wqe_index, qp->mqp.qpn,
+                           ctrl_qpn);
+               return -EFAULT;
+       }
+#endif /* DEBUG */
+
+       *wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
+       *wqe += sizeof(*ctrl);
+
+       opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
+                MLX5_WQE_CTRL_OPCODE_MASK;
+       switch (qp->ibqp.qp_type) {
+       case IB_QPT_RC:
+               switch (opcode) {
+               case MLX5_OPCODE_SEND:
+               case MLX5_OPCODE_SEND_IMM:
+               case MLX5_OPCODE_SEND_INVAL:
+                       if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+                             IB_ODP_SUPPORT_SEND))
+                               goto invalid_transport_or_opcode;
+                       break;
+               case MLX5_OPCODE_RDMA_WRITE:
+               case MLX5_OPCODE_RDMA_WRITE_IMM:
+                       if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+                             IB_ODP_SUPPORT_WRITE))
+                               goto invalid_transport_or_opcode;
+                       *wqe += sizeof(struct mlx5_wqe_raddr_seg);
+                       break;
+               case MLX5_OPCODE_RDMA_READ:
+                       if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+                             IB_ODP_SUPPORT_READ))
+                               goto invalid_transport_or_opcode;
+                       *wqe += sizeof(struct mlx5_wqe_raddr_seg);
+                       break;
+               default:
+                       goto invalid_transport_or_opcode;
+               }
+               break;
+       case IB_QPT_UD:
+               switch (opcode) {
+               case MLX5_OPCODE_SEND:
+               case MLX5_OPCODE_SEND_IMM:
+                       if (!(dev->odp_caps.per_transport_caps.ud_odp_caps &
+                             IB_ODP_SUPPORT_SEND))
+                               goto invalid_transport_or_opcode;
+                       *wqe += sizeof(struct mlx5_wqe_datagram_seg);
+                       break;
+               default:
+                       goto invalid_transport_or_opcode;
+               }
+               break;
+       default:
+invalid_transport_or_opcode:
+               mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode or transport. transport: 0x%x opcode: 0x%x.\n",
+                           qp->ibqp.qp_type, opcode);
+               return -EFAULT;
+       }
+
+       return 0;
+}
+
+/*
+ * Parse responder WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_responder_pfault_handler(
+       struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+       void **wqe, void **wqe_end, int wqe_length)
+{
+       struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+       struct mlx5_ib_wq *wq = &qp->rq;
+       int wqe_size = 1 << wq->wqe_shift;
+
+       if (qp->ibqp.srq) {
+               mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n");
+               return -EFAULT;
+       }
+
+       if (qp->wq_sig) {
+               mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
+               return -EFAULT;
+       }
+
+       if (wqe_size > wqe_length) {
+               mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n");
+               return -EFAULT;
+       }
+
+       switch (qp->ibqp.qp_type) {
+       case IB_QPT_RC:
+               if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+                     IB_ODP_SUPPORT_RECV))
+                       goto invalid_transport_or_opcode;
+               break;
+       default:
+invalid_transport_or_opcode:
+               mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
+                           qp->ibqp.qp_type);
+               return -EFAULT;
+       }
+
+       *wqe_end = *wqe + wqe_size;
+
+       return 0;
+}
+
+static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
+                                         struct mlx5_ib_pfault *pfault)
+{
+       struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+       int ret;
+       void *wqe, *wqe_end;
+       u32 bytes_mapped, total_wqe_bytes;
+       char *buffer = NULL;
+       int resume_with_error = 0;
+       u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+       int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+
+       buffer = (char *)__get_free_page(GFP_KERNEL);
+       if (!buffer) {
+               mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
+               resume_with_error = 1;
+               goto resolve_page_fault;
+       }
+
+       ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
+                                   PAGE_SIZE);
+       if (ret < 0) {
+               mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
+                           -ret, wqe_index, qp->mqp.qpn);
+               resume_with_error = 1;
+               goto resolve_page_fault;
+       }
+
+       wqe = buffer;
+       if (requestor)
+               ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe,
+                                                         &wqe_end, ret);
+       else
+               ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe,
+                                                         &wqe_end, ret);
+       if (ret < 0) {
+               resume_with_error = 1;
+               goto resolve_page_fault;
+       }
+
+       if (wqe >= wqe_end) {
+               mlx5_ib_err(dev, "ODP fault on invalid WQE.\n");
+               resume_with_error = 1;
+               goto resolve_page_fault;
+       }
+
+       ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped,
+                                     &total_wqe_bytes, !requestor);
+       if (ret == -EAGAIN) {
+               goto resolve_page_fault;
+       } else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
+               mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n",
+                           -ret);
+               resume_with_error = 1;
+               goto resolve_page_fault;
+       }
+
+resolve_page_fault:
+       mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
+       mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
+                   qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+
+       free_page((unsigned long)buffer);
+}
+
+static int pages_in_range(u64 address, u32 length)
+{
+       return (ALIGN(address + length, PAGE_SIZE) -
+               (address & PAGE_MASK)) >> PAGE_SHIFT;
+}
+
+static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
+                                          struct mlx5_ib_pfault *pfault)
+{
+       struct mlx5_pagefault *mpfault = &pfault->mpfault;
+       u64 address;
+       u32 length;
+       u32 prefetch_len = mpfault->bytes_committed;
+       int prefetch_activated = 0;
+       u32 rkey = mpfault->rdma.r_key;
+       int ret;
+
+       /* The RDMA responder handler handles the page fault in two parts.
+        * First it brings the necessary pages for the current packet
+        * (and uses the pfault context), and then (after resuming the QP)
+        * prefetches more pages. The second operation cannot use the pfault
+        * context and therefore uses the dummy_pfault context allocated on
+        * the stack */
+       struct mlx5_ib_pfault dummy_pfault = {};
+
+       dummy_pfault.mpfault.bytes_committed = 0;
+
+       mpfault->rdma.rdma_va += mpfault->bytes_committed;
+       mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
+                                        mpfault->rdma.rdma_op_len);
+       mpfault->bytes_committed = 0;
+
+       address = mpfault->rdma.rdma_va;
+       length  = mpfault->rdma.rdma_op_len;
+
+       /* For some operations, the hardware cannot tell the exact message
+        * length, and in those cases it reports zero. Use prefetch
+        * logic. */
+       if (length == 0) {
+               prefetch_activated = 1;
+               length = mpfault->rdma.packet_size;
+               prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
+       }
+
+       ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
+                                           NULL);
+       if (ret == -EAGAIN) {
+               /* We're racing with an invalidation, don't prefetch */
+               prefetch_activated = 0;
+       } else if (ret < 0 || pages_in_range(address, length) > ret) {
+               mlx5_ib_page_fault_resume(qp, pfault, 1);
+               return;
+       }
+
+       mlx5_ib_page_fault_resume(qp, pfault, 0);
+
+       /* At this point, there might be a new pagefault already arriving in
+        * the eq, switch to the dummy pagefault for the rest of the
+        * processing. We're still OK with the objects being alive as the
+        * work-queue is being fenced. */
+
+       if (prefetch_activated) {
+               ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
+                                                   address,
+                                                   prefetch_len,
+                                                   NULL);
+               if (ret < 0) {
+                       pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
+                               ret, prefetch_activated,
+                               qp->ibqp.qp_num, address, prefetch_len);
+               }
+       }
+}
+
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+                              struct mlx5_ib_pfault *pfault)
+{
+       u8 event_subtype = pfault->mpfault.event_subtype;
+
+       switch (event_subtype) {
+       case MLX5_PFAULT_SUBTYPE_WQE:
+               mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
+               break;
+       case MLX5_PFAULT_SUBTYPE_RDMA:
+               mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
+               break;
+       default:
+               pr_warn("Invalid page fault event subtype: 0x%x\n",
+                       event_subtype);
+               mlx5_ib_page_fault_resume(qp, pfault, 1);
+               break;
+       }
+}
+
+static void mlx5_ib_qp_pfault_action(struct work_struct *work)
+{
+       struct mlx5_ib_pfault *pfault = container_of(work,
+                                                    struct mlx5_ib_pfault,
+                                                    work);
+       enum mlx5_ib_pagefault_context context =
+               mlx5_ib_get_pagefault_context(&pfault->mpfault);
+       struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp,
+                                            pagefaults[context]);
+       mlx5_ib_mr_pfault_handler(qp, pfault);
+}
+
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+       qp->disable_page_faults = 1;
+       spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+
+       /*
+        * Note that at this point, we are guarenteed that no more
+        * work queue elements will be posted to the work queue with
+        * the QP we are closing.
+        */
+       flush_workqueue(mlx5_ib_page_fault_wq);
+}
+
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+       qp->disable_page_faults = 0;
+       spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+}
+
+static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp,
+                                  struct mlx5_pagefault *pfault)
+{
+       /*
+        * Note that we will only get one fault event per QP per context
+        * (responder/initiator, read/write), until we resolve the page fault
+        * with the mlx5_ib_page_fault_resume command. Since this function is
+        * called from within the work element, there is no risk of missing
+        * events.
+        */
+       struct mlx5_ib_qp *mibqp = to_mibqp(qp);
+       enum mlx5_ib_pagefault_context context =
+               mlx5_ib_get_pagefault_context(pfault);
+       struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context];
+
+       qp_pfault->mpfault = *pfault;
+
+       /* No need to stop interrupts here since we are in an interrupt */
+       spin_lock(&mibqp->disable_page_faults_lock);
+       if (!mibqp->disable_page_faults)
+               queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work);
+       spin_unlock(&mibqp->disable_page_faults_lock);
+}
+
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
+{
+       int i;
+
+       qp->disable_page_faults = 1;
+       spin_lock_init(&qp->disable_page_faults_lock);
+
+       qp->mqp.pfault_handler  = mlx5_ib_pfault_handler;
+
+       for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
+               INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
+}
+
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+{
+       int ret;
+
+       ret = init_srcu_struct(&ibdev->mr_srcu);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+{
+       cleanup_srcu_struct(&ibdev->mr_srcu);
+}
+
+int __init mlx5_ib_odp_init(void)
+{
+       mlx5_ib_page_fault_wq =
+               create_singlethread_workqueue("mlx5_ib_page_faults");
+       if (!mlx5_ib_page_fault_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void mlx5_ib_odp_cleanup(void)
+{
+       destroy_workqueue(mlx5_ib_page_fault_wq);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c

index 1cae1c7132b4b6f84ed5055c1c53e426edf8a354..be0cd358b080977ed50fdc2dedcd2e0257a19a56 100644 (file)
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -70,15 +70,6 @@ static const u32 mlx5_ib_opcode[] = {
         [MLX5_IB_WR_UMR]                        = MLX5_OPCODE_UMR,
  };
  
-struct umr_wr {
-       u64                             virt_addr;
-       struct ib_pd                   *pd;
-       unsigned int                    page_shift;
-       unsigned int                    npages;
-       u32                             length;
-       int                             access_flags;
-       u32                             mkey;
-};
  
  static int is_qp0(enum ib_qp_type qp_type)
  {
@@ -110,6 +101,77 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
         return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
  }
  
+/**
+ * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
+ *
+ * @qp: QP to copy from.
+ * @send: copy from the send queue when non-zero, use the receive queue
+ *       otherwise.
+ * @wqe_index:  index to start copying from. For send work queues, the
+ *             wqe_index is in units of MLX5_SEND_WQE_BB.
+ *             For receive work queue, it is the number of work queue
+ *             element in the queue.
+ * @buffer: destination buffer.
+ * @length: maximum number of bytes to copy.
+ *
+ * Copies at least a single WQE, but may copy more data.
+ *
+ * Return: the number of bytes copied, or an error code.
+ */
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+                         void *buffer, u32 length)
+{
+       struct ib_device *ibdev = qp->ibqp.device;
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
+       size_t offset;
+       size_t wq_end;
+       struct ib_umem *umem = qp->umem;
+       u32 first_copy_length;
+       int wqe_length;
+       int ret;
+
+       if (wq->wqe_cnt == 0) {
+               mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n",
+                           qp->ibqp.qp_type);
+               return -EINVAL;
+       }
+
+       offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift);
+       wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift);
+
+       if (send && length < sizeof(struct mlx5_wqe_ctrl_seg))
+               return -EINVAL;
+
+       if (offset > umem->length ||
+           (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length))
+               return -EINVAL;
+
+       first_copy_length = min_t(u32, offset + length, wq_end) - offset;
+       ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length);
+       if (ret)
+               return ret;
+
+       if (send) {
+               struct mlx5_wqe_ctrl_seg *ctrl = buffer;
+               int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+
+               wqe_length = ds * MLX5_WQE_DS_UNITS;
+       } else {
+               wqe_length = 1 << wq->wqe_shift;
+       }
+
+       if (wqe_length <= first_copy_length)
+               return first_copy_length;
+
+       ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset,
+                               wqe_length - first_copy_length);
+       if (ret)
+               return ret;
+
+       return wqe_length;
+}
+
  static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
  {
         struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
@@ -814,6 +876,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
         int inlen = sizeof(*in);
         int err;
  
+       mlx5_ib_odp_create_qp(qp);
+
         gen = &dev->mdev->caps.gen;
         mutex_init(&qp->mutex);
         spin_lock_init(&qp->sq.lock);
@@ -1098,11 +1162,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
         in = kzalloc(sizeof(*in), GFP_KERNEL);
         if (!in)
                 return;
-       if (qp->state != IB_QPS_RESET)
+       if (qp->state != IB_QPS_RESET) {
+               mlx5_ib_qp_disable_pagefaults(qp);
                 if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
                                         MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
                         mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
                                      qp->mqp.qpn);
+       }
  
         get_cqs(qp, &send_cq, &recv_cq);
  
@@ -1650,6 +1716,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
         if (mlx5_st < 0)
                 goto out;
  
+       /* If moving to a reset or error state, we must disable page faults on
+        * this QP and flush all current page faults. Otherwise a stale page
+        * fault may attempt to work on this QP after it is reset and moved
+        * again to RTS, and may cause the driver and the device to get out of
+        * sync. */
+       if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
+           (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+               mlx5_ib_qp_disable_pagefaults(qp);
+
         optpar = ib_mask_to_mlx5_opt(attr_mask);
         optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
         in->optparam = cpu_to_be32(optpar);
@@ -1659,6 +1734,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
         if (err)
                 goto out;
  
+       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+               mlx5_ib_qp_enable_pagefaults(qp);
+
         qp->state = new_state;
  
         if (attr_mask & IB_QP_ACCESS_FLAGS)
@@ -1848,37 +1926,70 @@ static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
         umr->mkey_mask = frwr_mkey_mask();
  }
  
+static __be64 get_umr_reg_mr_mask(void)
+{
+       u64 result;
+
+       result = MLX5_MKEY_MASK_LEN             |
+                MLX5_MKEY_MASK_PAGE_SIZE       |
+                MLX5_MKEY_MASK_START_ADDR      |
+                MLX5_MKEY_MASK_PD              |
+                MLX5_MKEY_MASK_LR              |
+                MLX5_MKEY_MASK_LW              |
+                MLX5_MKEY_MASK_KEY             |
+                MLX5_MKEY_MASK_RR              |
+                MLX5_MKEY_MASK_RW              |
+                MLX5_MKEY_MASK_A               |
+                MLX5_MKEY_MASK_FREE;
+
+       return cpu_to_be64(result);
+}
+
+static __be64 get_umr_unreg_mr_mask(void)
+{
+       u64 result;
+
+       result = MLX5_MKEY_MASK_FREE;
+
+       return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_mtt_mask(void)
+{
+       u64 result;
+
+       result = MLX5_MKEY_MASK_FREE;
+
+       return cpu_to_be64(result);
+}
+
  static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
                                 struct ib_send_wr *wr)
  {
-       struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
-       u64 mask;
+       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
  
         memset(umr, 0, sizeof(*umr));
  
+       if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
+               umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */
+       else
+               umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
+
         if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
-               umr->flags = 1 << 5; /* fail if not free */
                 umr->klm_octowords = get_klm_octo(umrwr->npages);
-               mask =  MLX5_MKEY_MASK_LEN              |
-                       MLX5_MKEY_MASK_PAGE_SIZE        |
-                       MLX5_MKEY_MASK_START_ADDR       |
-                       MLX5_MKEY_MASK_PD               |
-                       MLX5_MKEY_MASK_LR               |
-                       MLX5_MKEY_MASK_LW               |
-                       MLX5_MKEY_MASK_KEY              |
-                       MLX5_MKEY_MASK_RR               |
-                       MLX5_MKEY_MASK_RW               |
-                       MLX5_MKEY_MASK_A                |
-                       MLX5_MKEY_MASK_FREE;
-               umr->mkey_mask = cpu_to_be64(mask);
+               if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
+                       umr->mkey_mask = get_umr_update_mtt_mask();
+                       umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
+                       umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+               } else {
+                       umr->mkey_mask = get_umr_reg_mr_mask();
+               }
         } else {
-               umr->flags = 2 << 5; /* fail if free */
-               mask = MLX5_MKEY_MASK_FREE;
-               umr->mkey_mask = cpu_to_be64(mask);
+               umr->mkey_mask = get_umr_unreg_mr_mask();
         }
  
         if (!wr->num_sge)
-               umr->flags |= (1 << 7); /* inline */
+               umr->flags |= MLX5_UMR_INLINE;
  }
  
  static u8 get_umr_flags(int acc)
@@ -1895,7 +2006,7 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
  {
         memset(seg, 0, sizeof(*seg));
         if (li) {
-               seg->status = 1 << 6;
+               seg->status = MLX5_MKEY_STATUS_FREE;
                 return;
         }
  
@@ -1912,19 +2023,23 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
  
  static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
  {
+       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
         memset(seg, 0, sizeof(*seg));
         if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
-               seg->status = 1 << 6;
+               seg->status = MLX5_MKEY_STATUS_FREE;
                 return;
         }
  
-       seg->flags = convert_access(wr->wr.fast_reg.access_flags);
-       seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
-       seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
-       seg->len = cpu_to_be64(wr->wr.fast_reg.length);
-       seg->log2_page_size = wr->wr.fast_reg.page_shift;
+       seg->flags = convert_access(umrwr->access_flags);
+       if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
+               seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+               seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
+       }
+       seg->len = cpu_to_be64(umrwr->length);
+       seg->log2_page_size = umrwr->page_shift;
         seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
-                                      mlx5_mkey_variant(wr->wr.fast_reg.rkey));
+                                      mlx5_mkey_variant(umrwr->mkey));
  }
  
  static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -2927,6 +3042,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
         int mlx5_state;
         int err = 0;
  
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       /*
+        * Wait for any outstanding page faults, in case the user frees memory
+        * based upon this query's result.
+        */
+       flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
         mutex_lock(&qp->mutex);
         outb = kzalloc(sizeof(*outb), GFP_KERNEL);
         if (!outb) {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c

index fef067c959fcf23c9455f96c17e8d734d6a894f6..c0d0296e7a003089dec6d8d180de2bb6dd05964d 100644 (file)
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2341,9 +2341,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
         nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
                         " offset = %u, page size = %u.\n",
                         (unsigned long int)start, (unsigned long int)virt, (u32)length,
-                       region->offset, region->page_size);
+                       ib_umem_offset(region), region->page_size);
  
-       skip_pages = ((u32)region->offset) >> 12;
+       skip_pages = ((u32)ib_umem_offset(region)) >> 12;
  
         if (ib_copy_from_udata(&req, udata, sizeof(req))) {
                 ib_umem_release(region);
@@ -2408,7 +2408,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                 region_length -= skip_pages << 12;
                                 for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
                                         skip_pages = 0;
-                                       if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length)
+                                       if ((page_count != 0) && (page_count << 12) - (ib_umem_offset(region) & (4096 - 1)) >= region->length)
                                                 goto enough_pages;
                                         if ((page_count&0x01FF) == 0) {
                                                 if (page_count >= 1024 * 512) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c

index ac02ce4e804045afa74df9cb34b1f50d1b146361..f3cc8c9e65ae70f9e0b157632189e324970cbea5 100644 (file)
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -96,7 +96,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
         struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
         union ib_gid sgid;
-       u8 zmac[ETH_ALEN];
  
         if (!(attr->ah_flags & IB_AH_GRH))
                 return ERR_PTR(-EINVAL);
@@ -118,9 +117,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
                 goto av_conf_err;
         }
  
-       memset(&zmac, 0, ETH_ALEN);
-       if (pd->uctx &&
-           memcmp(attr->dmac, &zmac, ETH_ALEN)) {
+       if (pd->uctx) {
                 status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
                                          attr->dmac, &attr->vlan_id);
                 if (status) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

index 4c68305ee7814685f60d51627b0d69d28907c0f3..fb8d8c4dfbb97d2b36abdf69888793741aba182a 100644 (file)
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -805,7 +805,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
                 goto umem_err;
  
         mr->hwmr.pbe_size = mr->umem->page_size;
-       mr->hwmr.fbo = mr->umem->offset;
+       mr->hwmr.fbo = ib_umem_offset(mr->umem);
         mr->hwmr.va = usr_addr;
         mr->hwmr.len = len;
         mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
@@ -1410,6 +1410,8 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
         mutex_unlock(&dev->dev_lock);
         if (status)
                 goto mbx_err;
+       if (qp->qp_type == IB_QPT_UD)
+               qp_attr->qkey = params.qkey;
         qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
         qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
         qp_attr->path_mtu =
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c

index 9bbb55347cc1934bf9335da1b656a246d1c5cbe2..a77fb4fb14e43c255e23a41b7c86877835a78ea3 100644 (file)
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -258,7 +258,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
         mr->mr.user_base = start;
         mr->mr.iova = virt_addr;
         mr->mr.length = length;
-       mr->mr.offset = umem->offset;
+       mr->mr.offset = ib_umem_offset(umem);
         mr->mr.access_flags = mr_access_flags;
         mr->umem = umem;
  
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h

index d7562beb542367faf1b93d7ba66e8ef879c73bf4..8ba80a6d3a46d17daf233945e76d8760c107a752 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -98,9 +98,15 @@ enum {
  
         IPOIB_MCAST_FLAG_FOUND    = 0,  /* used in set_multicast_list */
         IPOIB_MCAST_FLAG_SENDONLY = 1,
-       IPOIB_MCAST_FLAG_BUSY     = 2,  /* joining or already joined */
+       /*
+        * For IPOIB_MCAST_FLAG_BUSY
+        * When set, in flight join and mcast->mc is unreliable
+        * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
+        *   haven't started yet
+        * When clear and mcast->mc is valid pointer, join was successful
+        */
+       IPOIB_MCAST_FLAG_BUSY     = 2,
         IPOIB_MCAST_FLAG_ATTACHED = 3,
-       IPOIB_MCAST_JOIN_STARTED  = 4,
  
         MAX_SEND_CQE              = 16,
         IPOIB_CM_COPYBREAK        = 256,
@@ -317,6 +323,7 @@ struct ipoib_dev_priv {
         struct list_head multicast_list;
         struct rb_root multicast_tree;
  
+       struct workqueue_struct *wq;
         struct delayed_work mcast_task;
         struct work_struct carrier_on_task;
         struct work_struct flush_light;
@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
  void ipoib_pkey_event(struct work_struct *work);
  void ipoib_ib_dev_cleanup(struct net_device *dev);
  
-int ipoib_ib_dev_open(struct net_device *dev, int flush);
+int ipoib_ib_dev_open(struct net_device *dev);
  int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
  void ipoib_pkey_dev_check_presence(struct net_device *dev);
  
  int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
  
  void ipoib_mcast_restart_task(struct work_struct *work);
  int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
+int ipoib_mcast_stop_thread(struct net_device *dev);
  
  void ipoib_mcast_dev_down(struct net_device *dev);
  void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c

index 933efcea0d03f11b4da3967b8eedc137da21e08a..56959adb6c7da51ccbb6d20307247b7cb69ad55a 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
         }
  
         spin_lock_irq(&priv->lock);
-       queue_delayed_work(ipoib_workqueue,
+       queue_delayed_work(priv->wq,
                            &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
         /* Add this entry to passive ids list head, but do not re-add it
          * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                         spin_lock_irqsave(&priv->lock, flags);
                         list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
                         ipoib_cm_start_rx_drain(priv);
-                       queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+                       queue_work(priv->wq, &priv->cm.rx_reap_task);
                         spin_unlock_irqrestore(&priv->lock, flags);
                 } else
                         ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                                 spin_lock_irqsave(&priv->lock, flags);
                                 list_move(&p->list, &priv->cm.rx_reap_list);
                                 spin_unlock_irqrestore(&priv->lock, flags);
-                               queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+                               queue_work(priv->wq, &priv->cm.rx_reap_task);
                         }
                         return;
                 }
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
  
                 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                         list_move(&tx->list, &priv->cm.reap_list);
-                       queue_work(ipoib_workqueue, &priv->cm.reap_task);
+                       queue_work(priv->wq, &priv->cm.reap_task);
                 }
  
                 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
  
                 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                         list_move(&tx->list, &priv->cm.reap_list);
-                       queue_work(ipoib_workqueue, &priv->cm.reap_task);
+                       queue_work(priv->wq, &priv->cm.reap_task);
                 }
  
                 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
         tx->dev = dev;
         list_add(&tx->list, &priv->cm.start_list);
         set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
-       queue_work(ipoib_workqueue, &priv->cm.start_task);
+       queue_work(priv->wq, &priv->cm.start_task);
         return tx;
  }
  
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
         if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                 spin_lock_irqsave(&priv->lock, flags);
                 list_move(&tx->list, &priv->cm.reap_list);
-               queue_work(ipoib_workqueue, &priv->cm.reap_task);
+               queue_work(priv->wq, &priv->cm.reap_task);
                 ipoib_dbg(priv, "Reap connection for gid %pI6\n",
                           tx->neigh->daddr + 4);
                 tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
  
         skb_queue_tail(&priv->cm.skb_queue, skb);
         if (e)
-               queue_work(ipoib_workqueue, &priv->cm.skb_task);
+               queue_work(priv->wq, &priv->cm.skb_task);
  }
  
  static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
         }
  
         if (!list_empty(&priv->cm.passive_ids))
-               queue_delayed_work(ipoib_workqueue,
+               queue_delayed_work(priv->wq,
                                    &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
         spin_unlock_irq(&priv->lock);
  }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c

index 72626c3481749b962fe96b79722d7c8e9c99c585..fe65abb5150c76b2eb941b3b2331930bc5b2b81e 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -655,7 +655,7 @@ void ipoib_reap_ah(struct work_struct *work)
         __ipoib_reap_ah(dev);
  
         if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+               queue_delayed_work(priv->wq, &priv->ah_reap_task,
                                    round_jiffies_relative(HZ));
  }
  
@@ -664,7 +664,7 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
         drain_tx_cq((struct net_device *)ctx);
  }
  
-int ipoib_ib_dev_open(struct net_device *dev, int flush)
+int ipoib_ib_dev_open(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         int ret;
@@ -696,7 +696,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
         }
  
         clear_bit(IPOIB_STOP_REAPER, &priv->flags);
-       queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+       queue_delayed_work(priv->wq, &priv->ah_reap_task,
                            round_jiffies_relative(HZ));
  
         if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +706,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
  dev_stop:
         if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
                 napi_enable(&priv->napi);
-       ipoib_ib_dev_stop(dev, flush);
+       ipoib_ib_dev_stop(dev);
         return -1;
  }
  
@@ -738,7 +738,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
         return ipoib_mcast_start_thread(dev);
  }
  
-int ipoib_ib_dev_down(struct net_device *dev, int flush)
+int ipoib_ib_dev_down(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
  
@@ -747,7 +747,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
         clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
         netif_carrier_off(dev);
  
-       ipoib_mcast_stop_thread(dev, flush);
+       ipoib_mcast_stop_thread(dev);
         ipoib_mcast_dev_flush(dev);
  
         ipoib_flush_paths(dev);
@@ -807,7 +807,7 @@ void ipoib_drain_cq(struct net_device *dev)
         local_bh_enable();
  }
  
-int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+int ipoib_ib_dev_stop(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ib_qp_attr qp_attr;
@@ -880,8 +880,7 @@ timeout:
         /* Wait for all AHs to be reaped */
         set_bit(IPOIB_STOP_REAPER, &priv->flags);
         cancel_delayed_work(&priv->ah_reap_task);
-       if (flush)
-               flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
  
         begin = jiffies;
  
@@ -918,7 +917,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
                     (unsigned long) dev);
  
         if (dev->flags & IFF_UP) {
-               if (ipoib_ib_dev_open(dev, 1)) {
+               if (ipoib_ib_dev_open(dev)) {
                         ipoib_transport_dev_cleanup(dev);
                         return -ENODEV;
                 }
@@ -1040,12 +1039,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
         }
  
         if (level >= IPOIB_FLUSH_NORMAL)
-               ipoib_ib_dev_down(dev, 0);
+               ipoib_ib_dev_down(dev);
  
         if (level == IPOIB_FLUSH_HEAVY) {
                 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
-                       ipoib_ib_dev_stop(dev, 0);
-               if (ipoib_ib_dev_open(dev, 0) != 0)
+                       ipoib_ib_dev_stop(dev);
+               if (ipoib_ib_dev_open(dev) != 0)
                         return;
                 if (netif_queue_stopped(dev))
                         netif_start_queue(dev);
@@ -1097,7 +1096,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
          */
         ipoib_flush_paths(dev);
  
-       ipoib_mcast_stop_thread(dev, 1);
+       ipoib_mcast_stop_thread(dev);
         ipoib_mcast_dev_flush(dev);
  
         ipoib_transport_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c

index 58b5aa3b6f2dded5d2e6d15aff080551aa9eddd9..6bad17d4d5880886f88ef48d8424abe4347cdc50 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
  
         set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
  
-       if (ipoib_ib_dev_open(dev, 1)) {
+       if (ipoib_ib_dev_open(dev)) {
                 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
                         return 0;
                 goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
         return 0;
  
  err_stop:
-       ipoib_ib_dev_stop(dev, 1);
+       ipoib_ib_dev_stop(dev);
  
  err_disable:
         clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
  
         netif_stop_queue(dev);
  
-       ipoib_ib_dev_down(dev, 1);
-       ipoib_ib_dev_stop(dev, 0);
+       ipoib_ib_dev_down(dev);
+       ipoib_ib_dev_stop(dev);
  
         if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
                 struct ipoib_dev_priv *cpriv;
@@ -839,7 +839,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
                 return;
         }
  
-       queue_work(ipoib_workqueue, &priv->restart_task);
+       queue_work(priv->wq, &priv->restart_task);
  }
  
  static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -954,7 +954,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
         __ipoib_reap_neigh(priv);
  
         if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+               queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                                    arp_tbl.gc_interval);
  }
  
@@ -1133,7 +1133,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
  
         /* start garbage collection */
         clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-       queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+       queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                            arp_tbl.gc_interval);
  
         return 0;
@@ -1262,15 +1262,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
  
-       if (ipoib_neigh_hash_init(priv) < 0)
-               goto out;
         /* Allocate RX/TX "rings" to hold queued skbs */
         priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
                                 GFP_KERNEL);
         if (!priv->rx_ring) {
                 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
                        ca->name, ipoib_recvq_size);
-               goto out_neigh_hash_cleanup;
+               goto out;
         }
  
         priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1285,16 +1283,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
         if (ipoib_ib_dev_init(dev, ca, port))
                 goto out_tx_ring_cleanup;
  
+       /*
+        * Must be after ipoib_ib_dev_init so we can allocate a per
+        * device wq there and use it here
+        */
+       if (ipoib_neigh_hash_init(priv) < 0)
+               goto out_dev_uninit;
+
         return 0;
  
+out_dev_uninit:
+       ipoib_ib_dev_cleanup(dev);
+
  out_tx_ring_cleanup:
         vfree(priv->tx_ring);
  
  out_rx_ring_cleanup:
         kfree(priv->rx_ring);
  
-out_neigh_hash_cleanup:
-       ipoib_neigh_hash_uninit(dev);
  out:
         return -ENOMEM;
  }
@@ -1317,6 +1323,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
         }
         unregister_netdevice_many(&head);
  
+       /*
+        * Must be before ipoib_ib_dev_cleanup or we delete an in use
+        * work queue
+        */
+       ipoib_neigh_hash_uninit(dev);
+
         ipoib_ib_dev_cleanup(dev);
  
         kfree(priv->rx_ring);
@@ -1324,8 +1336,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
  
         priv->rx_ring = NULL;
         priv->tx_ring = NULL;
-
-       ipoib_neigh_hash_uninit(dev);
  }
  
  static const struct header_ops ipoib_header_ops = {
@@ -1636,7 +1646,7 @@ register_failed:
         /* Stop GC if started before flush */
         set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
         cancel_delayed_work(&priv->neigh_reap_task);
-       flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
  
  event_failed:
         ipoib_dev_cleanup(priv->dev);
@@ -1707,7 +1717,7 @@ static void ipoib_remove_one(struct ib_device *device)
                 /* Stop GC */
                 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
                 cancel_delayed_work(&priv->neigh_reap_task);
-               flush_workqueue(ipoib_workqueue);
+               flush_workqueue(priv->wq);
  
                 unregister_netdev(priv->dev);
                 free_netdev(priv->dev);
@@ -1748,8 +1758,13 @@ static int __init ipoib_init_module(void)
          * unregister_netdev() and linkwatch_event take the rtnl lock,
          * so flush_scheduled_work() can deadlock during device
          * removal.
+        *
+        * In addition, bringing one device up and another down at the
+        * same time can deadlock a single workqueue, so we have this
+        * global fallback workqueue, but we also attempt to open a
+        * per device workqueue each time we bring an interface up
          */
-       ipoib_workqueue = create_singlethread_workqueue("ipoib");
+       ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
         if (!ipoib_workqueue) {
                 ret = -ENOMEM;
                 goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c

index ffb83b5f7e805e411f1506d66a53f8465b90c439..bc50dd0d0e4dad7790725b0414d807d42fe82493 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -190,12 +190,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
                 spin_unlock_irq(&priv->lock);
                 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
                 set_qkey = 1;
-
-               if (!ipoib_cm_admin_enabled(dev)) {
-                       rtnl_lock();
-                       dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
-                       rtnl_unlock();
-               }
         }
  
         if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -277,16 +271,27 @@ ipoib_mcast_sendonly_join_complete(int status,
         struct ipoib_mcast *mcast = multicast->context;
         struct net_device *dev = mcast->dev;
  
+       /*
+        * We have to take the mutex to force mcast_sendonly_join to
+        * return from ib_sa_multicast_join and set mcast->mc to a
+        * valid value.  Otherwise we were racing with ourselves in
+        * that we might fail here, but get a valid return from
+        * ib_sa_multicast_join after we had cleared mcast->mc here,
+        * resulting in mis-matched joins and leaves and a deadlock
+        */
+       mutex_lock(&mcast_mutex);
+
         /* We trap for port events ourselves. */
         if (status == -ENETRESET)
-               return 0;
+               goto out;
  
         if (!status)
                 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
  
         if (status) {
                 if (mcast->logcount++ < 20)
-                       ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
+                       ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast "
+                                       "join failed for %pI6, status %d\n",
                                         mcast->mcmember.mgid.raw, status);
  
                 /* Flush out any queued packets */
@@ -296,11 +301,15 @@ ipoib_mcast_sendonly_join_complete(int status,
                         dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
                 }
                 netif_tx_unlock_bh(dev);
-
-               /* Clear the busy flag so we try again */
-               status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
-                                           &mcast->flags);
         }
+out:
+       clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+       if (status)
+               mcast->mc = NULL;
+       complete(&mcast->done);
+       if (status == -ENETRESET)
+               status = 0;
+       mutex_unlock(&mcast_mutex);
         return status;
  }
  
@@ -318,12 +327,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
         int ret = 0;
  
         if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
-               ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
+               ipoib_dbg_mcast(priv, "device shutting down, no sendonly "
+                               "multicast joins\n");
                 return -ENODEV;
         }
  
-       if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
-               ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
+       if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
+               ipoib_dbg_mcast(priv, "multicast entry busy, skipping "
+                               "sendonly join\n");
                 return -EBUSY;
         }
  
@@ -331,6 +342,9 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
         rec.port_gid = priv->local_gid;
         rec.pkey     = cpu_to_be16(priv->pkey);
  
+       mutex_lock(&mcast_mutex);
+       init_completion(&mcast->done);
+       set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
         mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
                                          priv->port, &rec,
                                          IB_SA_MCMEMBER_REC_MGID        |
@@ -343,12 +357,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
         if (IS_ERR(mcast->mc)) {
                 ret = PTR_ERR(mcast->mc);
                 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-               ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
-                          ret);
+               complete(&mcast->done);
+               ipoib_warn(priv, "ib_sa_join_multicast for sendonly join "
+                          "failed (ret = %d)\n", ret);
         } else {
-               ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
-                               mcast->mcmember.mgid.raw);
+               ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting "
+                               "sendonly join\n", mcast->mcmember.mgid.raw);
         }
+       mutex_unlock(&mcast_mutex);
  
         return ret;
  }
@@ -359,18 +375,29 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
                                                    carrier_on_task);
         struct ib_port_attr attr;
  
-       /*
-        * Take rtnl_lock to avoid racing with ipoib_stop() and
-        * turning the carrier back on while a device is being
-        * removed.
-        */
         if (ib_query_port(priv->ca, priv->port, &attr) ||
             attr.state != IB_PORT_ACTIVE) {
                 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
                 return;
         }
  
-       rtnl_lock();
+       /*
+        * Take rtnl_lock to avoid racing with ipoib_stop() and
+        * turning the carrier back on while a device is being
+        * removed.  However, ipoib_stop() will attempt to flush
+        * the workqueue while holding the rtnl lock, so loop
+        * on trylock until either we get the lock or we see
+        * FLAG_ADMIN_UP go away as that signals that we are bailing
+        * and can safely ignore the carrier on work.
+        */
+       while (!rtnl_trylock()) {
+               if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+                       return;
+               else
+                       msleep(20);
+       }
+       if (!ipoib_cm_admin_enabled(priv->dev))
+               dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
         netif_carrier_on(priv->dev);
         rtnl_unlock();
  }
@@ -385,60 +412,63 @@ static int ipoib_mcast_join_complete(int status,
         ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
                         mcast->mcmember.mgid.raw, status);
  
+       /*
+        * We have to take the mutex to force mcast_join to
+        * return from ib_sa_multicast_join and set mcast->mc to a
+        * valid value.  Otherwise we were racing with ourselves in
+        * that we might fail here, but get a valid return from
+        * ib_sa_multicast_join after we had cleared mcast->mc here,
+        * resulting in mis-matched joins and leaves and a deadlock
+        */
+       mutex_lock(&mcast_mutex);
+
         /* We trap for port events ourselves. */
-       if (status == -ENETRESET) {
-               status = 0;
+       if (status == -ENETRESET)
                 goto out;
-       }
  
         if (!status)
                 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
  
         if (!status) {
                 mcast->backoff = 1;
-               mutex_lock(&mcast_mutex);
                 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                       queue_delayed_work(ipoib_workqueue,
-                                          &priv->mcast_task, 0);
-               mutex_unlock(&mcast_mutex);
+                       queue_delayed_work(priv->wq, &priv->mcast_task, 0);
  
                 /*
-                * Defer carrier on work to ipoib_workqueue to avoid a
+                * Defer carrier on work to priv->wq to avoid a
                  * deadlock on rtnl_lock here.
                  */
                 if (mcast == priv->broadcast)
-                       queue_work(ipoib_workqueue, &priv->carrier_on_task);
-
-               status = 0;
-               goto out;
-       }
-
-       if (mcast->logcount++ < 20) {
-               if (status == -ETIMEDOUT || status == -EAGAIN) {
-                       ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
-                                       mcast->mcmember.mgid.raw, status);
-               } else {
-                       ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
-                                  mcast->mcmember.mgid.raw, status);
+                       queue_work(priv->wq, &priv->carrier_on_task);
+       } else {
+               if (mcast->logcount++ < 20) {
+                       if (status == -ETIMEDOUT || status == -EAGAIN) {
+                               ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
+                                               mcast->mcmember.mgid.raw, status);
+                       } else {
+                               ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
+                                          mcast->mcmember.mgid.raw, status);
+                       }
                 }
-       }
-
-       mcast->backoff *= 2;
-       if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
-               mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
  
-       /* Clear the busy flag so we try again */
-       status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-
-       mutex_lock(&mcast_mutex);
+               mcast->backoff *= 2;
+               if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
+                       mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
+       }
+out:
         spin_lock_irq(&priv->lock);
-       if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+       clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+       if (status)
+               mcast->mc = NULL;
+       complete(&mcast->done);
+       if (status == -ENETRESET)
+               status = 0;
+       if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags))
+               queue_delayed_work(priv->wq, &priv->mcast_task,
                                    mcast->backoff * HZ);
         spin_unlock_irq(&priv->lock);
         mutex_unlock(&mcast_mutex);
-out:
-       complete(&mcast->done);
+
         return status;
  }
  
@@ -487,10 +517,9 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                 rec.hop_limit     = priv->broadcast->mcmember.hop_limit;
         }
  
-       set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+       mutex_lock(&mcast_mutex);
         init_completion(&mcast->done);
-       set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
-
+       set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
         mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
                                          &rec, comp_mask, GFP_KERNEL,
                                          ipoib_mcast_join_complete, mcast);
@@ -504,13 +533,11 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
                         mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
  
-               mutex_lock(&mcast_mutex);
                 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                       queue_delayed_work(ipoib_workqueue,
-                                          &priv->mcast_task,
+                       queue_delayed_work(priv->wq, &priv->mcast_task,
                                            mcast->backoff * HZ);
-               mutex_unlock(&mcast_mutex);
         }
+       mutex_unlock(&mcast_mutex);
  }
  
  void ipoib_mcast_join_task(struct work_struct *work)
@@ -547,8 +574,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
                         ipoib_warn(priv, "failed to allocate broadcast group\n");
                         mutex_lock(&mcast_mutex);
                         if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                               queue_delayed_work(ipoib_workqueue,
-                                                  &priv->mcast_task, HZ);
+                               queue_delayed_work(priv->wq, &priv->mcast_task,
+                                                  HZ);
                         mutex_unlock(&mcast_mutex);
                         return;
                 }
@@ -563,7 +590,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
         }
  
         if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
-               if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+               if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
+                   !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
                         ipoib_mcast_join(dev, priv->broadcast, 0);
                 return;
         }
@@ -571,23 +599,33 @@ void ipoib_mcast_join_task(struct work_struct *work)
         while (1) {
                 struct ipoib_mcast *mcast = NULL;
  
+               /*
+                * Need the mutex so our flags are consistent, need the
+                * priv->lock so we don't race with list removals in either
+                * mcast_dev_flush or mcast_restart_task
+                */
+               mutex_lock(&mcast_mutex);
                 spin_lock_irq(&priv->lock);
                 list_for_each_entry(mcast, &priv->multicast_list, list) {
-                       if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
-                           && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
-                           && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+                       if (IS_ERR_OR_NULL(mcast->mc) &&
+                           !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
+                           !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
                                 /* Found the next unjoined group */
                                 break;
                         }
                 }
                 spin_unlock_irq(&priv->lock);
+               mutex_unlock(&mcast_mutex);
  
                 if (&mcast->list == &priv->multicast_list) {
                         /* All done */
                         break;
                 }
  
-               ipoib_mcast_join(dev, mcast, 1);
+               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+                       ipoib_mcast_sendonly_join(mcast);
+               else
+                       ipoib_mcast_join(dev, mcast, 1);
                 return;
         }
  
@@ -604,13 +642,13 @@ int ipoib_mcast_start_thread(struct net_device *dev)
  
         mutex_lock(&mcast_mutex);
         if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
+               queue_delayed_work(priv->wq, &priv->mcast_task, 0);
         mutex_unlock(&mcast_mutex);
  
         return 0;
  }
  
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+int ipoib_mcast_stop_thread(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
  
@@ -621,8 +659,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
         cancel_delayed_work(&priv->mcast_task);
         mutex_unlock(&mcast_mutex);
  
-       if (flush)
-               flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
  
         return 0;
  }
@@ -633,6 +670,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
         int ret = 0;
  
         if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+               ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
+
+       if (!IS_ERR_OR_NULL(mcast->mc))
                 ib_sa_free_multicast(mcast->mc);
  
         if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -685,6 +725,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
                 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
                 __ipoib_mcast_add(dev, mcast);
                 list_add_tail(&mcast->list, &priv->multicast_list);
+               if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
+                       queue_delayed_work(priv->wq, &priv->mcast_task, 0);
         }
  
         if (!mcast->ah) {
@@ -698,8 +740,6 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
                 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                         ipoib_dbg_mcast(priv, "no address vector, "
                                         "but multicast join already started\n");
-               else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-                       ipoib_mcast_sendonly_join(mcast);
  
                 /*
                  * If lookup completes between here and out:, don't
@@ -759,9 +799,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
  
         spin_unlock_irqrestore(&priv->lock, flags);
  
-       /* seperate between the wait to the leave*/
+       /*
+        * make sure the in-flight joins have finished before we attempt
+        * to leave
+        */
         list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-               if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                         wait_for_completion(&mcast->done);
  
         list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -794,8 +837,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
  
         ipoib_dbg_mcast(priv, "restarting multicast task\n");
  
-       ipoib_mcast_stop_thread(dev, 0);
-
         local_irq_save(flags);
         netif_addr_lock(dev);
         spin_lock(&priv->lock);
@@ -880,14 +921,38 @@ void ipoib_mcast_restart_task(struct work_struct *work)
         netif_addr_unlock(dev);
         local_irq_restore(flags);
  
-       /* We have to cancel outside of the spinlock */
+       /*
+        * make sure the in-flight joins have finished before we attempt
+        * to leave
+        */
+       list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+                       wait_for_completion(&mcast->done);
+
+       /*
+        * We have to cancel outside of the spinlock, but we have to
+        * take the rtnl lock or else we race with the removal of
+        * entries from the remove list in mcast_dev_flush as part
+        * of ipoib_stop().  We detect the drop of the ADMIN_UP flag
+        * to signal that we have hit this particular race, and we
+        * return since we know we don't need to do anything else
+        * anyway.
+        */
+       while (!rtnl_trylock()) {
+               if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+                       return;
+               else
+                       msleep(20);
+       }
         list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
                 ipoib_mcast_leave(mcast->dev, mcast);
                 ipoib_mcast_free(mcast);
         }
-
-       if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
-               ipoib_mcast_start_thread(dev);
+       /*
+        * Restart our join task if needed
+        */
+       ipoib_mcast_start_thread(dev);
+       rtnl_unlock();
  }
  
  #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c

index c56d5d44c53b3f11725b6d6da220ea2c440fe496..b72a753eb41dc3031608269c56434ed507b96f5f 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -145,10 +145,20 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
         int ret, size;
         int i;
  
+       /*
+        * the various IPoIB tasks assume they will never race against
+        * themselves, so always use a single thread workqueue
+        */
+       priv->wq = create_singlethread_workqueue("ipoib_wq");
+       if (!priv->wq) {
+               printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
+               return -ENODEV;
+       }
+
         priv->pd = ib_alloc_pd(priv->ca);
         if (IS_ERR(priv->pd)) {
                 printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
-               return -ENODEV;
+               goto out_free_wq;
         }
  
         priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
@@ -242,6 +252,10 @@ out_free_mr:
  
  out_free_pd:
         ib_dealloc_pd(priv->pd);
+
+out_free_wq:
+       destroy_workqueue(priv->wq);
+       priv->wq = NULL;
         return -ENODEV;
  }
  
@@ -270,6 +284,12 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
  
         if (ib_dealloc_pd(priv->pd))
                 ipoib_warn(priv, "ib_dealloc_pd failed\n");
+
+       if (priv->wq) {
+               flush_workqueue(priv->wq);
+               destroy_workqueue(priv->wq);
+               priv->wq = NULL;
+       }
  }
  
  void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c

index 20ca6a619476f065af6dbef57905897f717faf54..6a594aac229008418f388433107a959186f96867 100644 (file)
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -97,7 +97,7 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644);
  MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
  
  module_param_named(pi_guard, iser_pi_guard, int, 0644);
-MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)");
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
  
  static struct workqueue_struct *release_wq;
  struct iser_global ig;
@@ -164,18 +164,42 @@ iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
         return 0;
  }
  
-int iser_initialize_task_headers(struct iscsi_task *task,
-                                               struct iser_tx_desc *tx_desc)
+/**
+ * iser_initialize_task_headers() - Initialize task headers
+ * @task:       iscsi task
+ * @tx_desc:    iser tx descriptor
+ *
+ * Notes:
+ * This routine may race with iser teardown flow for scsi
+ * error handling TMFs. So for TMF we should acquire the
+ * state mutex to avoid dereferencing the IB device which
+ * may have already been terminated.
+ */
+int
+iser_initialize_task_headers(struct iscsi_task *task,
+                            struct iser_tx_desc *tx_desc)
  {
-       struct iser_conn       *iser_conn   = task->conn->dd_data;
+       struct iser_conn *iser_conn = task->conn->dd_data;
         struct iser_device *device = iser_conn->ib_conn.device;
         struct iscsi_iser_task *iser_task = task->dd_data;
         u64 dma_addr;
+       const bool mgmt_task = !task->sc && !in_interrupt();
+       int ret = 0;
+
+       if (unlikely(mgmt_task))
+               mutex_lock(&iser_conn->state_mutex);
+
+       if (unlikely(iser_conn->state != ISER_CONN_UP)) {
+               ret = -ENODEV;
+               goto out;
+       }
  
         dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
                                 ISER_HEADERS_LEN, DMA_TO_DEVICE);
-       if (ib_dma_mapping_error(device->ib_device, dma_addr))
-               return -ENOMEM;
+       if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
+               ret = -ENOMEM;
+               goto out;
+       }
  
         tx_desc->dma_addr = dma_addr;
         tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
@@ -183,7 +207,11 @@ int iser_initialize_task_headers(struct iscsi_task *task,
         tx_desc->tx_sg[0].lkey   = device->mr->lkey;
  
         iser_task->iser_conn = iser_conn;
-       return 0;
+out:
+       if (unlikely(mgmt_task))
+               mutex_unlock(&iser_conn->state_mutex);
+
+       return ret;
  }
  
  /**
@@ -199,9 +227,14 @@ static int
  iscsi_iser_task_init(struct iscsi_task *task)
  {
         struct iscsi_iser_task *iser_task = task->dd_data;
+       int ret;
  
-       if (iser_initialize_task_headers(task, &iser_task->desc))
-                       return -ENOMEM;
+       ret = iser_initialize_task_headers(task, &iser_task->desc);
+       if (ret) {
+               iser_err("Failed to init task %p, err = %d\n",
+                        iser_task, ret);
+               return ret;
+       }
  
         /* mgmt task */
         if (!task->sc)
@@ -508,8 +541,8 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
          */
         if (iser_conn) {
                 mutex_lock(&iser_conn->state_mutex);
-               iscsi_conn_stop(cls_conn, flag);
                 iser_conn_terminate(iser_conn);
+               iscsi_conn_stop(cls_conn, flag);
  
                 /* unbind */
                 iser_conn->iscsi_conn = NULL;
@@ -541,12 +574,13 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
  static inline unsigned int
  iser_dif_prot_caps(int prot_caps)
  {
-       return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION |
-                                                     SHOST_DIX_TYPE1_PROTECTION : 0) |
-              ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION |
-                                                     SHOST_DIX_TYPE2_PROTECTION : 0) |
-              ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION |
-                                                     SHOST_DIX_TYPE3_PROTECTION : 0);
+       return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ?
+               SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION |
+               SHOST_DIX_TYPE1_PROTECTION : 0) |
+              ((prot_caps & IB_PROT_T10DIF_TYPE_2) ?
+               SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) |
+              ((prot_caps & IB_PROT_T10DIF_TYPE_3) ?
+               SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0);
  }
  
  /**
@@ -569,6 +603,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
         struct Scsi_Host *shost;
         struct iser_conn *iser_conn = NULL;
         struct ib_conn *ib_conn;
+       u16 max_cmds;
  
         shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
         if (!shost)
@@ -586,26 +621,41 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
          */
         if (ep) {
                 iser_conn = ep->dd_data;
+               max_cmds = iser_conn->max_cmds;
+
+               mutex_lock(&iser_conn->state_mutex);
+               if (iser_conn->state != ISER_CONN_UP) {
+                       iser_err("iser conn %p already started teardown\n",
+                                iser_conn);
+                       mutex_unlock(&iser_conn->state_mutex);
+                       goto free_host;
+               }
+
                 ib_conn = &iser_conn->ib_conn;
                 if (ib_conn->pi_support) {
                         u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
  
                         scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
-                       if (iser_pi_guard)
-                               scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP);
-                       else
-                               scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC);
+                       scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
+                                                  SHOST_DIX_GUARD_CRC);
                 }
-       }
  
-       if (iscsi_host_add(shost, ep ?
-                          ib_conn->device->ib_device->dma_device : NULL))
-               goto free_host;
+               if (iscsi_host_add(shost,
+                                  ib_conn->device->ib_device->dma_device)) {
+                       mutex_unlock(&iser_conn->state_mutex);
+                       goto free_host;
+               }
+               mutex_unlock(&iser_conn->state_mutex);
+       } else {
+               max_cmds = ISER_DEF_XMIT_CMDS_MAX;
+               if (iscsi_host_add(shost, NULL))
+                       goto free_host;
+       }
  
-       if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+       if (cmds_max > max_cmds) {
                 iser_info("cmds_max changed from %u to %u\n",
-                         cmds_max, ISER_DEF_XMIT_CMDS_MAX);
-               cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+                         cmds_max, max_cmds);
+               cmds_max = max_cmds;
         }
  
         cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h

index cd4174ca9a760dc70c0c369721407e1c2919a4c4..5ce26817e7e1d9b8d43126518188b3769e0c7f44 100644 (file)
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,34 +69,31 @@
  
  #define DRV_NAME       "iser"
  #define PFX            DRV_NAME ": "
-#define DRV_VER                "1.4.8"
+#define DRV_VER                "1.5"
  
  #define iser_dbg(fmt, arg...)                           \
         do {                                             \
-               if (iser_debug_level > 2)                \
+               if (unlikely(iser_debug_level > 2))      \
                         printk(KERN_DEBUG PFX "%s: " fmt,\
                                 __func__ , ## arg);      \
         } while (0)
  
  #define iser_warn(fmt, arg...)                         \
         do {                                            \
-               if (iser_debug_level > 0)               \
+               if (unlikely(iser_debug_level > 0))     \
                         pr_warn(PFX "%s: " fmt,         \
                                 __func__ , ## arg);     \
         } while (0)
  
  #define iser_info(fmt, arg...)                         \
         do {                                            \
-               if (iser_debug_level > 1)               \
+               if (unlikely(iser_debug_level > 1))     \
                         pr_info(PFX "%s: " fmt,         \
                                 __func__ , ## arg);     \
         } while (0)
  
-#define iser_err(fmt, arg...)                          \
-       do {                                            \
-               printk(KERN_ERR PFX "%s: " fmt,         \
-                      __func__ , ## arg);              \
-       } while (0)
+#define iser_err(fmt, arg...) \
+       pr_err(PFX "%s: " fmt, __func__ , ## arg)
  
  #define SHIFT_4K       12
  #define SIZE_4K        (1ULL << SHIFT_4K)
@@ -144,6 +141,11 @@
                                         ISER_MAX_TX_MISC_PDUS         + \
                                         ISER_MAX_RX_MISC_PDUS)
  
+#define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr                      \
+                                        - ISER_MAX_TX_MISC_PDUS        \
+                                        - ISER_MAX_RX_MISC_PDUS) /     \
+                                        (1 + ISER_INFLIGHT_DATAOUTS))
+
  #define ISER_WC_BATCH_COUNT   16
  #define ISER_SIGNAL_CMD_COUNT 32
  
@@ -247,7 +249,6 @@ struct iscsi_endpoint;
   * @va:           MR start address (buffer va)
   * @len:          MR length
   * @mem_h:        pointer to registration context (FMR/Fastreg)
- * @is_mr:        indicates weather we registered the buffer
   */
  struct iser_mem_reg {
         u32  lkey;
@@ -255,7 +256,6 @@ struct iser_mem_reg {
         u64  va;
         u64  len;
         void *mem_h;
-       int  is_mr;
  };
  
  /**
@@ -323,8 +323,6 @@ struct iser_rx_desc {
         char                         pad[ISER_RX_PAD_SIZE];
  } __attribute__((packed));
  
-#define ISER_MAX_CQ 4
-
  struct iser_conn;
  struct ib_conn;
  struct iscsi_iser_task;
@@ -375,7 +373,7 @@ struct iser_device {
         struct list_head             ig_list;
         int                          refcount;
         int                          comps_used;
-       struct iser_comp             comps[ISER_MAX_CQ];
+       struct iser_comp             *comps;
         int                          (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn,
                                                                 unsigned cmds_max);
         void                         (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn);
@@ -432,6 +430,7 @@ struct fast_reg_descriptor {
   * @cma_id:              rdma_cm connection maneger handle
   * @qp:                  Connection Queue-pair
   * @post_recv_buf_count: post receive counter
+ * @sig_count:           send work request signal count
   * @rx_wr:               receive work request for batch posts
   * @device:              reference to iser device
   * @comp:                iser completion context
@@ -452,6 +451,7 @@ struct ib_conn {
         struct rdma_cm_id           *cma_id;
         struct ib_qp                *qp;
         int                          post_recv_buf_count;
+       u8                           sig_count;
         struct ib_recv_wr            rx_wr[ISER_MIN_POSTED_RX];
         struct iser_device          *device;
         struct iser_comp            *comp;
@@ -482,6 +482,7 @@ struct ib_conn {
   *                    to max number of post recvs
   * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1)
   * @min_posted_rx:    (qp_max_recv_dtos >> 2)
+ * @max_cmds:         maximum cmds allowed for this connection
   * @name:             connection peer portal
   * @release_work:     deffered work for release job
   * @state_mutex:      protects iser onnection state
@@ -507,6 +508,7 @@ struct iser_conn {
         unsigned                     qp_max_recv_dtos;
         unsigned                     qp_max_recv_dtos_mask;
         unsigned                     min_posted_rx;
+       u16                          max_cmds;
         char                         name[ISER_OBJECT_NAME_SIZE];
         struct work_struct           release_work;
         struct mutex                 state_mutex;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c

index 5a489ea63732c0166b695ce21fe16c6bb3206413..3821633f1065b15a9fcc1b1c36a74eda619aeae3 100644 (file)
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -369,7 +369,7 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
         return 0;
  }
  
-static inline bool iser_signal_comp(int sig_count)
+static inline bool iser_signal_comp(u8 sig_count)
  {
         return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
  }
@@ -388,7 +388,7 @@ int iser_send_command(struct iscsi_conn *conn,
         struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
         struct scsi_cmnd *sc  =  task->sc;
         struct iser_tx_desc *tx_desc = &iser_task->desc;
-       static unsigned sig_count;
+       u8 sig_count = ++iser_conn->ib_conn.sig_count;
  
         edtl = ntohl(hdr->data_length);
  
@@ -435,7 +435,7 @@ int iser_send_command(struct iscsi_conn *conn,
         iser_task->status = ISER_TASK_STATUS_STARTED;
  
         err = iser_post_send(&iser_conn->ib_conn, tx_desc,
-                            iser_signal_comp(++sig_count));
+                            iser_signal_comp(sig_count));
         if (!err)
                 return 0;
  
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c

index 6c5ce357fba6baa19fc62d39bd4c9b79be8c48a2..abce9339333f0a8551a2e52600d2409edd39461c 100644 (file)
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -73,7 +73,6 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
  
         if (cmd_dir == ISER_DIR_OUT) {
                 /* copy the unaligned sg the buffer which is used for RDMA */
-               int i;
                 char *p, *from;
  
                 sgl = (struct scatterlist *)data->buf;
@@ -409,7 +408,6 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
                 regd_buf->reg.rkey = device->mr->rkey;
                 regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
                 regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
-               regd_buf->reg.is_mr = 0;
  
                 iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
                          "va: 0x%08lX sz: %ld]\n",
@@ -440,13 +438,13 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
         return 0;
  }
  
-static inline void
+static void
  iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
                     struct ib_sig_domain *domain)
  {
         domain->sig_type = IB_SIG_TYPE_T10_DIF;
-       domain->sig.dif.pi_interval = sc->device->sector_size;
-       domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff;
+       domain->sig.dif.pi_interval = scsi_prot_interval(sc);
+       domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
         /*
          * At the moment we hard code those, but in the future
          * we will take them from sc.
@@ -454,8 +452,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
         domain->sig.dif.apptag_check_mask = 0xffff;
         domain->sig.dif.app_escape = true;
         domain->sig.dif.ref_escape = true;
-       if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 ||
-           scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2)
+       if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
                 domain->sig.dif.ref_remap = true;
  };
  
@@ -473,26 +470,16 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
         case SCSI_PROT_WRITE_STRIP:
                 sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
                 iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
-               /*
-                * At the moment we use this modparam to tell what is
-                * the memory bg_type, in the future we will take it
-                * from sc.
-                */
-               sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
-                                                IB_T10DIF_CRC;
+               sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+                                               IB_T10DIF_CSUM : IB_T10DIF_CRC;
                 break;
         case SCSI_PROT_READ_PASS:
         case SCSI_PROT_WRITE_PASS:
                 iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
                 sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
                 iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
-               /*
-                * At the moment we use this modparam to tell what is
-                * the memory bg_type, in the future we will take it
-                * from sc.
-                */
-               sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
-                                                IB_T10DIF_CRC;
+               sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+                                               IB_T10DIF_CSUM : IB_T10DIF_CRC;
                 break;
         default:
                 iser_err("Unsupported PI operation %d\n",
@@ -503,26 +490,28 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
         return 0;
  }
  
-static int
+static inline void
  iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
  {
-       switch (scsi_get_prot_type(sc)) {
-       case SCSI_PROT_DIF_TYPE0:
-               break;
-       case SCSI_PROT_DIF_TYPE1:
-       case SCSI_PROT_DIF_TYPE2:
-               *mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG;
-               break;
-       case SCSI_PROT_DIF_TYPE3:
-               *mask = ISER_CHECK_GUARD;
-               break;
-       default:
-               iser_err("Unsupported protection type %d\n",
-                        scsi_get_prot_type(sc));
-               return -EINVAL;
-       }
+       *mask = 0;
+       if (sc->prot_flags & SCSI_PROT_REF_CHECK)
+               *mask |= ISER_CHECK_REFTAG;
+       if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
+               *mask |= ISER_CHECK_GUARD;
+}
  
-       return 0;
+static void
+iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+{
+       u32 rkey;
+
+       memset(inv_wr, 0, sizeof(*inv_wr));
+       inv_wr->opcode = IB_WR_LOCAL_INV;
+       inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+       inv_wr->ex.invalidate_rkey = mr->rkey;
+
+       rkey = ib_inc_rkey(mr->rkey);
+       ib_update_fast_reg_key(mr, rkey);
  }
  
  static int
@@ -536,26 +525,17 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
         struct ib_send_wr *bad_wr, *wr = NULL;
         struct ib_sig_attrs sig_attrs;
         int ret;
-       u32 key;
  
         memset(&sig_attrs, 0, sizeof(sig_attrs));
         ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
         if (ret)
                 goto err;
  
-       ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
-       if (ret)
-               goto err;
+       iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
  
         if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
-               memset(&inv_wr, 0, sizeof(inv_wr));
-               inv_wr.opcode = IB_WR_LOCAL_INV;
-               inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-               inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+               iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
                 wr = &inv_wr;
-               /* Bump the key */
-               key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
-               ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
         }
  
         memset(&sig_wr, 0, sizeof(sig_wr));
@@ -585,12 +565,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
  
         sig_sge->lkey = pi_ctx->sig_mr->lkey;
         sig_sge->addr = 0;
-       sig_sge->length = data_sge->length + prot_sge->length;
-       if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT ||
-           scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) {
-               sig_sge->length += (data_sge->length /
-                                  iser_task->sc->device->sector_size) * 8;
-       }
+       sig_sge->length = scsi_transfer_length(iser_task->sc);
  
         iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
                  sig_sge->addr, sig_sge->length,
@@ -613,7 +588,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
         struct ib_fast_reg_page_list *frpl;
         struct ib_send_wr fastreg_wr, inv_wr;
         struct ib_send_wr *bad_wr, *wr = NULL;
-       u8 key;
         int ret, offset, size, plen;
  
         /* if there a single dma entry, dma mr suffices */
@@ -645,14 +619,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
         }
  
         if (!(desc->reg_indicators & ind)) {
-               memset(&inv_wr, 0, sizeof(inv_wr));
-               inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-               inv_wr.opcode = IB_WR_LOCAL_INV;
-               inv_wr.ex.invalidate_rkey = mr->rkey;
+               iser_inv_rkey(&inv_wr, mr);
                 wr = &inv_wr;
-               /* Bump the key */
-               key = (u8)(mr->rkey & 0x000000FF);
-               ib_update_fast_reg_key(mr, ++key);
         }
  
         /* Prepare FASTREG WR */
@@ -770,15 +738,11 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
                 regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
                 regd_buf->reg.va = sig_sge.addr;
                 regd_buf->reg.len = sig_sge.length;
-               regd_buf->reg.is_mr = 1;
         } else {
-               if (desc) {
+               if (desc)
                         regd_buf->reg.rkey = desc->data_mr->rkey;
-                       regd_buf->reg.is_mr = 1;
-               } else {
+               else
                         regd_buf->reg.rkey = device->mr->rkey;
-                       regd_buf->reg.is_mr = 0;
-               }
  
                 regd_buf->reg.lkey = data_sge.lkey;
                 regd_buf->reg.va = data_sge.addr;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c

index 67225bb82bb50bc4b35ac8f350a750e5e3d96797..695a2704bd4380acafbc04c4ae7f0ecd96bc95a0 100644 (file)
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -76,7 +76,7 @@ static void iser_event_handler(struct ib_event_handler *handler,
  static int iser_create_device_ib_res(struct iser_device *device)
  {
         struct ib_device_attr *dev_attr = &device->dev_attr;
-       int ret, i;
+       int ret, i, max_cqe;
  
         ret = ib_query_device(device->ib_device, dev_attr);
         if (ret) {
@@ -104,11 +104,19 @@ static int iser_create_device_ib_res(struct iser_device *device)
                 return -1;
         }
  
-       device->comps_used = min(ISER_MAX_CQ,
+       device->comps_used = min_t(int, num_online_cpus(),
                                  device->ib_device->num_comp_vectors);
-       iser_info("using %d CQs, device %s supports %d vectors\n",
+
+       device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
+                               GFP_KERNEL);
+       if (!device->comps)
+               goto comps_err;
+
+       max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+
+       iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
                   device->comps_used, device->ib_device->name,
-                 device->ib_device->num_comp_vectors);
+                 device->ib_device->num_comp_vectors, max_cqe);
  
         device->pd = ib_alloc_pd(device->ib_device);
         if (IS_ERR(device->pd))
@@ -122,7 +130,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
                                         iser_cq_callback,
                                         iser_cq_event_callback,
                                         (void *)comp,
-                                       ISER_MAX_CQ_LEN, i);
+                                       max_cqe, i);
                 if (IS_ERR(comp->cq)) {
                         comp->cq = NULL;
                         goto cq_err;
@@ -162,6 +170,8 @@ cq_err:
         }
         ib_dealloc_pd(device->pd);
  pd_err:
+       kfree(device->comps);
+comps_err:
         iser_err("failed to allocate an IB resource\n");
         return -1;
  }
@@ -187,6 +197,9 @@ static void iser_free_device_ib_res(struct iser_device *device)
         (void)ib_dereg_mr(device->mr);
         (void)ib_dealloc_pd(device->pd);
  
+       kfree(device->comps);
+       device->comps = NULL;
+
         device->mr = NULL;
         device->pd = NULL;
  }
@@ -425,7 +438,10 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
   */
  static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
  {
+       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+                                                  ib_conn);
         struct iser_device      *device;
+       struct ib_device_attr *dev_attr;
         struct ib_qp_init_attr  init_attr;
         int                     ret = -ENOMEM;
         int index, min_index = 0;
@@ -433,6 +449,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
         BUG_ON(ib_conn->device == NULL);
  
         device = ib_conn->device;
+       dev_attr = &device->dev_attr;
  
         memset(&init_attr, 0, sizeof init_attr);
  
@@ -460,8 +477,20 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
         if (ib_conn->pi_support) {
                 init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
                 init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
+               iser_conn->max_cmds =
+                       ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
         } else {
-               init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
+               if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+                       init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
+                       iser_conn->max_cmds =
+                               ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
+               } else {
+                       init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+                       iser_conn->max_cmds =
+                               ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+                       iser_dbg("device %s supports max_send_wr %d\n",
+                                device->ib_device->name, dev_attr->max_qp_wr);
+               }
         }
  
         ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
@@ -475,7 +504,11 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
         return ret;
  
  out_err:
+       mutex_lock(&ig.connlist_mutex);
+       ib_conn->comp->active_qps--;
+       mutex_unlock(&ig.connlist_mutex);
         iser_err("unable to alloc mem or create resource, err %d\n", ret);
+
         return ret;
  }
  
@@ -610,9 +643,11 @@ void iser_conn_release(struct iser_conn *iser_conn)
         mutex_unlock(&ig.connlist_mutex);
  
         mutex_lock(&iser_conn->state_mutex);
-       if (iser_conn->state != ISER_CONN_DOWN)
+       if (iser_conn->state != ISER_CONN_DOWN) {
                 iser_warn("iser conn %p state %d, expected state down.\n",
                           iser_conn, iser_conn->state);
+               iser_conn->state = ISER_CONN_DOWN;
+       }
         /*
          * In case we never got to bind stage, we still need to
          * release IB resources (which is safe to call more than once).
@@ -662,8 +697,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
  
                 /* post an indication that all flush errors were consumed */
                 err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
-               if (err)
+               if (err) {
                         iser_err("conn %p failed to post beacon", ib_conn);
+                       return 1;
+               }
  
                 wait_for_completion(&ib_conn->flush_comp);
         }
@@ -846,20 +883,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
                 break;
         case RDMA_CM_EVENT_DISCONNECTED:
         case RDMA_CM_EVENT_ADDR_CHANGE:
-               iser_disconnected_handler(cma_id);
+       case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+               iser_cleanup_handler(cma_id, false);
                 break;
         case RDMA_CM_EVENT_DEVICE_REMOVAL:
                 /*
                  * we *must* destroy the device as we cannot rely
                  * on iscsid to be around to initiate error handling.
-                * also implicitly destroy the cma_id.
+                * also if we are not in state DOWN implicitly destroy
+                * the cma_id.
                  */
                 iser_cleanup_handler(cma_id, true);
-               iser_conn->ib_conn.cma_id = NULL;
-               ret = 1;
-               break;
-       case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-               iser_cleanup_handler(cma_id, false);
+               if (iser_conn->state != ISER_CONN_DOWN) {
+                       iser_conn->ib_conn.cma_id = NULL;
+                       ret = 1;
+               }
                 break;
         default:
                 iser_err("Unexpected RDMA CM event (%d)\n", event->event);
@@ -981,7 +1019,6 @@ int iser_reg_page_vec(struct ib_conn *ib_conn,
         mem_reg->rkey  = mem->fmr->rkey;
         mem_reg->len   = page_vec->length * SIZE_4K;
         mem_reg->va    = io_addr;
-       mem_reg->is_mr = 1;
         mem_reg->mem_h = (void *)mem;
  
         mem_reg->va   += page_vec->offset;
@@ -1008,7 +1045,7 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
         struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
         int ret;
  
-       if (!reg->is_mr)
+       if (!reg->mem_h)
                 return;
  
         iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
@@ -1028,11 +1065,10 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
         struct ib_conn *ib_conn = &iser_conn->ib_conn;
         struct fast_reg_descriptor *desc = reg->mem_h;
  
-       if (!reg->is_mr)
+       if (!desc)
                 return;
  
         reg->mem_h = NULL;
-       reg->is_mr = 0;
         spin_lock_bh(&ib_conn->lock);
         list_add_tail(&desc->list, &ib_conn->fastreg.pool);
         spin_unlock_bh(&ib_conn->lock);
@@ -1049,7 +1085,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
         sge.length = ISER_RX_LOGIN_SIZE;
         sge.lkey   = ib_conn->device->mr->lkey;
  
-       rx_wr.wr_id   = (unsigned long)iser_conn->login_resp_buf;
+       rx_wr.wr_id   = (uintptr_t)iser_conn->login_resp_buf;
         rx_wr.sg_list = &sge;
         rx_wr.num_sge = 1;
         rx_wr.next    = NULL;
@@ -1073,7 +1109,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
  
         for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
                 rx_desc         = &iser_conn->rx_descs[my_rx_head];
-               rx_wr->wr_id    = (unsigned long)rx_desc;
+               rx_wr->wr_id    = (uintptr_t)rx_desc;
                 rx_wr->sg_list  = &rx_desc->rx_sg;
                 rx_wr->num_sge  = 1;
                 rx_wr->next     = rx_wr + 1;
@@ -1110,7 +1146,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
                                       DMA_TO_DEVICE);
  
         send_wr.next       = NULL;
-       send_wr.wr_id      = (unsigned long)tx_desc;
+       send_wr.wr_id      = (uintptr_t)tx_desc;
         send_wr.sg_list    = tx_desc->tx_sg;
         send_wr.num_sge    = tx_desc->num_sge;
         send_wr.opcode     = IB_WR_SEND;
@@ -1160,6 +1196,7 @@ static void
  iser_handle_comp_error(struct ib_conn *ib_conn,
                        struct ib_wc *wc)
  {
+       void *wr_id = (void *)(uintptr_t)wc->wr_id;
         struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
                                                    ib_conn);
  
@@ -1168,8 +1205,8 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
                         iscsi_conn_failure(iser_conn->iscsi_conn,
                                            ISCSI_ERR_CONN_FAILED);
  
-       if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
-               struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
+       if (is_iser_tx_desc(iser_conn, wr_id)) {
+               struct iser_tx_desc *desc = wr_id;
  
                 if (desc->type == ISCSI_TX_DATAOUT)
                         kmem_cache_free(ig.desc_cache, desc);
@@ -1193,14 +1230,14 @@ static void iser_handle_wc(struct ib_wc *wc)
         struct iser_rx_desc *rx_desc;
  
         ib_conn = wc->qp->qp_context;
-       if (wc->status == IB_WC_SUCCESS) {
+       if (likely(wc->status == IB_WC_SUCCESS)) {
                 if (wc->opcode == IB_WC_RECV) {
-                       rx_desc = (struct iser_rx_desc *)wc->wr_id;
+                       rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
                         iser_rcv_completion(rx_desc, wc->byte_len,
                                             ib_conn);
                 } else
                 if (wc->opcode == IB_WC_SEND) {
-                       tx_desc = (struct iser_tx_desc *)wc->wr_id;
+                       tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
                         iser_snd_completion(tx_desc, ib_conn);
                 } else {
                         iser_err("Unknown wc opcode %d\n", wc->opcode);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c

index 5461924c9f10ef9311a8f11a5091170c4597281a..db3c8c851af16cd22524f080d2db91581621b020 100644 (file)
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2929,7 +2929,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
                 return -ENOMEM;
  
         sep_opt = options;
-       while ((p = strsep(&sep_opt, ",")) != NULL) {
+       while ((p = strsep(&sep_opt, ",\n")) != NULL) {
                 if (!*p)
                         continue;
  
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig

index 3c89fcbc621efe8ab78c865cc00fd84af81dd018..49cd30870e0d6ff6fb293fc33fd0af850cd6590c 100644 (file)
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -160,7 +160,6 @@ source "drivers/media/usb/Kconfig"
  source "drivers/media/pci/Kconfig"
  source "drivers/media/platform/Kconfig"
  source "drivers/media/mmc/Kconfig"
-source "drivers/media/parport/Kconfig"
  source "drivers/media/radio/Kconfig"
  
  comment "Supported FireWire (IEEE 1394) Adapters"
diff --git a/drivers/media/Makefile b/drivers/media/Makefile

index 620f275a45c9903069a1ebd7bb24370de71f3f4e..e608bbce0c3546c244a2570fc2c19d339f433e2a 100644 (file)
--- a/drivers/media/Makefile
+++ b/drivers/media/Makefile
@@ -28,6 +28,6 @@ obj-y += rc/
  # Finally, merge the drivers that require the core
  #
  
-obj-y += common/ platform/ pci/ usb/ mmc/ firewire/ parport/
+obj-y += common/ platform/ pci/ usb/ mmc/ firewire/
  obj-$(CONFIG_VIDEO_DEV) += radio/
  
diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig

index f40b4cf6107a4ec42d80ade00729c20303ad32bf..205d71364343dea06e1f1833ffa121923ad1a6b8 100644 (file)
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig
@@ -284,15 +284,6 @@ config VIDEO_SAA711X
           To compile this driver as a module, choose M here: the
           module will be called saa7115.
  
-config VIDEO_SAA7191
-       tristate "Philips SAA7191 video decoder"
-       depends on VIDEO_V4L2 && I2C
-       ---help---
-         Support for the Philips SAA7191 video decoder.
-
-         To compile this driver as a module, choose M here: the
-         module will be called saa7191.
-
  config VIDEO_TVP514X
         tristate "Texas Instruments TVP514x video decoder"
         depends on VIDEO_V4L2 && I2C
diff --git a/drivers/media/i2c/Makefile b/drivers/media/i2c/Makefile

index 01ae9328e5821e6ec2b70b46b915865efc8c8cf3..9858900168bfe967dc4d1957361b1c9622f83689 100644 (file)
--- a/drivers/media/i2c/Makefile
+++ b/drivers/media/i2c/Makefile
@@ -18,7 +18,6 @@ obj-$(CONFIG_VIDEO_SAA711X) += saa7115.o
  obj-$(CONFIG_VIDEO_SAA717X) += saa717x.o
  obj-$(CONFIG_VIDEO_SAA7127) += saa7127.o
  obj-$(CONFIG_VIDEO_SAA7185) += saa7185.o
-obj-$(CONFIG_VIDEO_SAA7191) += saa7191.o
  obj-$(CONFIG_VIDEO_SAA6752HS) += saa6752hs.o
  obj-$(CONFIG_VIDEO_ADV7170) += adv7170.o
  obj-$(CONFIG_VIDEO_ADV7175) += adv7175.o
diff --git a/drivers/media/pci/cx88/cx88-blackbird.c b/drivers/media/pci/cx88/cx88-blackbird.c

index 4160ca4e541344f9b4f8f49f2eeab24dc57db4f0..d3c79d964f2cda152691302eb462939aed59e870 100644 (file)
--- a/drivers/media/pci/cx88/cx88-blackbird.c
+++ b/drivers/media/pci/cx88/cx88-blackbird.c
@@ -647,6 +647,7 @@ static int queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
         dev->ts_packet_size  = 188 * 4;
         dev->ts_packet_count  = 32;
         sizes[0] = dev->ts_packet_size * dev->ts_packet_count;
+       alloc_ctxs[0] = dev->alloc_ctx;
         return 0;
  }
  
@@ -662,14 +663,11 @@ static void buffer_finish(struct vb2_buffer *vb)
  {
         struct cx8802_dev *dev = vb->vb2_queue->drv_priv;
         struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-       struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
         struct cx88_riscmem *risc = &buf->risc;
  
         if (risc->cpu)
                 pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
         memset(risc, 0, sizeof(*risc));
-
-       dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
  }
  
  static void buffer_queue(struct vb2_buffer *vb)
diff --git a/drivers/media/pci/cx88/cx88-dvb.c b/drivers/media/pci/cx88/cx88-dvb.c

index c344bfd0b8961909ff4586e8c5f87dcd3d7af6a3..5780e2f013b4972e6677d66b221952485ddf7262 100644 (file)
--- a/drivers/media/pci/cx88/cx88-dvb.c
+++ b/drivers/media/pci/cx88/cx88-dvb.c
@@ -92,6 +92,7 @@ static int queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
         dev->ts_packet_size  = 188 * 4;
         dev->ts_packet_count = dvb_buf_tscnt;
         sizes[0] = dev->ts_packet_size * dev->ts_packet_count;
+       alloc_ctxs[0] = dev->alloc_ctx;
         *num_buffers = dvb_buf_tscnt;
         return 0;
  }
@@ -108,14 +109,11 @@ static void buffer_finish(struct vb2_buffer *vb)
  {
         struct cx8802_dev *dev = vb->vb2_queue->drv_priv;
         struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-       struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
         struct cx88_riscmem *risc = &buf->risc;
  
         if (risc->cpu)
                 pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
         memset(risc, 0, sizeof(*risc));
-
-       dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
  }
  
  static void buffer_queue(struct vb2_buffer *vb)
diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c

index f181a3a10389c9bb0021c7d2bb7284959b3190aa..1c1f69e6b0b9177b7bed4f13f640d7466c742dfd 100644 (file)
--- a/drivers/media/pci/cx88/cx88-mpeg.c
+++ b/drivers/media/pci/cx88/cx88-mpeg.c
@@ -235,10 +235,6 @@ int cx8802_buf_prepare(struct vb2_queue *q, struct cx8802_dev *dev,
                 return -EINVAL;
         vb2_set_plane_payload(&buf->vb, 0, size);
  
-       rc = dma_map_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
-       if (!rc)
-               return -EIO;
-
         rc = cx88_risc_databuffer(dev->pci, risc, sgt->sgl,
                              dev->ts_packet_size, dev->ts_packet_count, 0);
         if (rc) {
@@ -733,6 +729,11 @@ static int cx8802_probe(struct pci_dev *pci_dev,
         if (NULL == dev)
                 goto fail_core;
         dev->pci = pci_dev;
+       dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
+       if (IS_ERR(dev->alloc_ctx)) {
+               err = PTR_ERR(dev->alloc_ctx);
+               goto fail_core;
+       }
         dev->core = core;
  
         /* Maintain a reference so cx88-video can query the 8802 device. */
@@ -752,6 +753,7 @@ static int cx8802_probe(struct pci_dev *pci_dev,
         return 0;
  
   fail_free:
+       vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
         kfree(dev);
   fail_core:
         core->dvbdev = NULL;
@@ -798,6 +800,7 @@ static void cx8802_remove(struct pci_dev *pci_dev)
         /* common */
         cx8802_fini_common(dev);
         cx88_core_put(dev->core,dev->pci);
+       vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
         kfree(dev);
  }
  
diff --git a/drivers/media/pci/cx88/cx88-vbi.c b/drivers/media/pci/cx88/cx88-vbi.c

index 6ab6e27648f638fcf49ca48a603b5b324125aeef..32eb7fdb875e294268bedf94f265d9ac48a41ab3 100644 (file)
--- a/drivers/media/pci/cx88/cx88-vbi.c
+++ b/drivers/media/pci/cx88/cx88-vbi.c
@@ -120,6 +120,7 @@ static int queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
                 sizes[0] = VBI_LINE_NTSC_COUNT * VBI_LINE_LENGTH * 2;
         else
                 sizes[0] = VBI_LINE_PAL_COUNT * VBI_LINE_LENGTH * 2;
+       alloc_ctxs[0] = dev->alloc_ctx;
         return 0;
  }
  
@@ -131,7 +132,6 @@ static int buffer_prepare(struct vb2_buffer *vb)
         struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
         unsigned int lines;
         unsigned int size;
-       int rc;
  
         if (dev->core->tvnorm & V4L2_STD_525_60)
                 lines = VBI_LINE_NTSC_COUNT;
@@ -142,10 +142,6 @@ static int buffer_prepare(struct vb2_buffer *vb)
                 return -EINVAL;
         vb2_set_plane_payload(vb, 0, size);
  
-       rc = dma_map_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
-       if (!rc)
-               return -EIO;
-
         cx88_risc_buffer(dev->pci, &buf->risc, sgt->sgl,
                          0, VBI_LINE_LENGTH * lines,
                          VBI_LINE_LENGTH, 0,
@@ -157,14 +153,11 @@ static void buffer_finish(struct vb2_buffer *vb)
  {
         struct cx8800_dev *dev = vb->vb2_queue->drv_priv;
         struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-       struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
         struct cx88_riscmem *risc = &buf->risc;
  
         if (risc->cpu)
                 pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
         memset(risc, 0, sizeof(*risc));
-
-       dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
  }
  
  static void buffer_queue(struct vb2_buffer *vb)
diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c

index a64ae31ae142b2a3e9542b65a45f393c29d77b82..860c98fc72c7f95f12763fc2c0edfaacb8a76890 100644 (file)
--- a/drivers/media/pci/cx88/cx88-video.c
+++ b/drivers/media/pci/cx88/cx88-video.c
@@ -440,6 +440,7 @@ static int queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
  
         *num_planes = 1;
         sizes[0] = (dev->fmt->depth * core->width * core->height) >> 3;
+       alloc_ctxs[0] = dev->alloc_ctx;
         return 0;
  }
  
@@ -449,7 +450,6 @@ static int buffer_prepare(struct vb2_buffer *vb)
         struct cx88_core *core = dev->core;
         struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
         struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
-       int rc;
  
         buf->bpl = core->width * dev->fmt->depth >> 3;
  
@@ -457,10 +457,6 @@ static int buffer_prepare(struct vb2_buffer *vb)
                 return -EINVAL;
         vb2_set_plane_payload(vb, 0, core->height * buf->bpl);
  
-       rc = dma_map_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
-       if (!rc)
-               return -EIO;
-
         switch (core->field) {
         case V4L2_FIELD_TOP:
                 cx88_risc_buffer(dev->pci, &buf->risc,
@@ -505,14 +501,11 @@ static void buffer_finish(struct vb2_buffer *vb)
  {
         struct cx8800_dev *dev = vb->vb2_queue->drv_priv;
         struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-       struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
         struct cx88_riscmem *risc = &buf->risc;
  
         if (risc->cpu)
                 pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
         memset(risc, 0, sizeof(*risc));
-
-       dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
  }
  
  static void buffer_queue(struct vb2_buffer *vb)
@@ -530,7 +523,6 @@ static void buffer_queue(struct vb2_buffer *vb)
  
         if (list_empty(&q->active)) {
                 list_add_tail(&buf->list, &q->active);
-               start_video_dma(dev, q, buf);
                 buf->count    = q->count++;
                 dprintk(2,"[%p/%d] buffer_queue - first active\n",
                         buf, buf->vb.v4l2_buf.index);
@@ -1345,6 +1337,12 @@ static int cx8800_initdev(struct pci_dev *pci_dev,
                 err = -EIO;
                 goto fail_core;
         }
+       dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
+       if (IS_ERR(dev->alloc_ctx)) {
+               err = PTR_ERR(dev->alloc_ctx);
+               goto fail_core;
+       }
+
  
         /* initialize driver struct */
         spin_lock_init(&dev->slock);
@@ -1549,6 +1547,7 @@ fail_unreg:
         free_irq(pci_dev->irq, dev);
         mutex_unlock(&core->lock);
  fail_core:
+       vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
         core->v4ldev = NULL;
         cx88_core_put(core,dev->pci);
  fail_free:
@@ -1582,6 +1581,7 @@ static void cx8800_finidev(struct pci_dev *pci_dev)
  
         /* free memory */
         cx88_core_put(core,dev->pci);
+       vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
         kfree(dev);
  }
  
diff --git a/drivers/media/pci/cx88/cx88.h b/drivers/media/pci/cx88/cx88.h

index 3b0ae754f16588b86ab8695aabeedbd9aa3795fd..7748ca9abb09c31a462d9450aea1f6a06e1af19c 100644 (file)
--- a/drivers/media/pci/cx88/cx88.h
+++ b/drivers/media/pci/cx88/cx88.h
@@ -485,6 +485,7 @@ struct cx8800_dev {
         /* pci i/o */
         struct pci_dev             *pci;
         unsigned char              pci_rev,pci_lat;
+       void                       *alloc_ctx;
  
         const struct cx8800_fmt    *fmt;
  
@@ -548,6 +549,7 @@ struct cx8802_dev {
         /* pci i/o */
         struct pci_dev             *pci;
         unsigned char              pci_rev,pci_lat;
+       void                       *alloc_ctx;
  
         /* dma queues */
         struct cx88_dmaqueue       mpegq;
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig

index 0c61155699f77f3992dcb83ce44712469d3aee80..dba29b80184b822e5908a0d54373f6cc1278b459 100644 (file)
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -65,14 +65,6 @@ config VIDEO_TIMBERDALE
         ---help---
           Add support for the Video In peripherial of the timberdale FPGA.
  
-config VIDEO_VINO
-       tristate "SGI Vino Video For Linux"
-       depends on I2C && SGI_IP22 && VIDEO_V4L2
-       select VIDEO_SAA7191 if MEDIA_SUBDRV_AUTOSELECT
-       help
-         Say Y here to build in support for the Vino video input system found
-         on SGI Indy machines.
-
  config VIDEO_M32R_AR
         tristate "AR devices"
         depends on VIDEO_V4L2
diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile

index b818afb4d336b4d007da72efcdf5c511063bf9d8..a49936b8ce8a5e175e18fb3f5df5142c8590f8b0 100644 (file)
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -2,9 +2,6 @@
  # Makefile for the video capture/playback device drivers.
  #
  
-obj-$(CONFIG_VIDEO_VINO) += indycam.o
-obj-$(CONFIG_VIDEO_VINO) += vino.o
-
  obj-$(CONFIG_VIDEO_TIMBERDALE) += timblogiw.o
  obj-$(CONFIG_VIDEO_M32R_AR_M64278) += arv.o
  
diff --git a/drivers/media/platform/soc_camera/rcar_vin.c b/drivers/media/platform/soc_camera/rcar_vin.c

index 126ac7c5b6fd4044bff8dc69059d8e7d0b245715..0c1f5564810627ec88242ac373600f03a854387d 100644 (file)
--- a/drivers/media/platform/soc_camera/rcar_vin.c
+++ b/drivers/media/platform/soc_camera/rcar_vin.c
@@ -64,6 +64,30 @@
  #define VNDMR_REG      0x58    /* Video n Data Mode Register */
  #define VNDMR2_REG     0x5C    /* Video n Data Mode Register 2 */
  #define VNUVAOF_REG    0x60    /* Video n UV Address Offset Register */
+#define VNC1A_REG      0x80    /* Video n Coefficient Set C1A Register */
+#define VNC1B_REG      0x84    /* Video n Coefficient Set C1B Register */
+#define VNC1C_REG      0x88    /* Video n Coefficient Set C1C Register */
+#define VNC2A_REG      0x90    /* Video n Coefficient Set C2A Register */
+#define VNC2B_REG      0x94    /* Video n Coefficient Set C2B Register */
+#define VNC2C_REG      0x98    /* Video n Coefficient Set C2C Register */
+#define VNC3A_REG      0xA0    /* Video n Coefficient Set C3A Register */
+#define VNC3B_REG      0xA4    /* Video n Coefficient Set C3B Register */
+#define VNC3C_REG      0xA8    /* Video n Coefficient Set C3C Register */
+#define VNC4A_REG      0xB0    /* Video n Coefficient Set C4A Register */
+#define VNC4B_REG      0xB4    /* Video n Coefficient Set C4B Register */
+#define VNC4C_REG      0xB8    /* Video n Coefficient Set C4C Register */
+#define VNC5A_REG      0xC0    /* Video n Coefficient Set C5A Register */
+#define VNC5B_REG      0xC4    /* Video n Coefficient Set C5B Register */
+#define VNC5C_REG      0xC8    /* Video n Coefficient Set C5C Register */
+#define VNC6A_REG      0xD0    /* Video n Coefficient Set C6A Register */
+#define VNC6B_REG      0xD4    /* Video n Coefficient Set C6B Register */
+#define VNC6C_REG      0xD8    /* Video n Coefficient Set C6C Register */
+#define VNC7A_REG      0xE0    /* Video n Coefficient Set C7A Register */
+#define VNC7B_REG      0xE4    /* Video n Coefficient Set C7B Register */
+#define VNC7C_REG      0xE8    /* Video n Coefficient Set C7C Register */
+#define VNC8A_REG      0xF0    /* Video n Coefficient Set C8A Register */
+#define VNC8B_REG      0xF4    /* Video n Coefficient Set C8B Register */
+#define VNC8C_REG      0xF8    /* Video n Coefficient Set C8C Register */
  
  /* Register bit fields for R-Car VIN */
  /* Video n Main Control Register bits */
@@ -106,6 +130,7 @@
  #define VNDMR2_VPS             (1 << 30)
  #define VNDMR2_HPS             (1 << 29)
  #define VNDMR2_FTEV            (1 << 17)
+#define VNDMR2_VLV(n)          ((n & 0xf) << 12)
  
  #define VIN_MAX_WIDTH          2048
  #define VIN_MAX_HEIGHT         2048
@@ -117,6 +142,324 @@ enum chip_id {
         RCAR_E1,
  };
  
+struct vin_coeff {
+       unsigned short xs_value;
+       u32 coeff_set[24];
+};
+
+static const struct vin_coeff vin_coeff_set[] = {
+       { 0x0000, {
+               0x00000000,             0x00000000,             0x00000000,
+               0x00000000,             0x00000000,             0x00000000,
+               0x00000000,             0x00000000,             0x00000000,
+               0x00000000,             0x00000000,             0x00000000,
+               0x00000000,             0x00000000,             0x00000000,
+               0x00000000,             0x00000000,             0x00000000,
+               0x00000000,             0x00000000,             0x00000000,
+               0x00000000,             0x00000000,             0x00000000 },
+       },
+       { 0x1000, {
+               0x000fa400,             0x000fa400,             0x09625902,
+               0x000003f8,             0x00000403,             0x3de0d9f0,
+               0x001fffed,             0x00000804,             0x3cc1f9c3,
+               0x001003de,             0x00000c01,             0x3cb34d7f,
+               0x002003d2,             0x00000c00,             0x3d24a92d,
+               0x00200bca,             0x00000bff,             0x3df600d2,
+               0x002013cc,             0x000007ff,             0x3ed70c7e,
+               0x00100fde,             0x00000000,             0x3f87c036 },
+       },
+       { 0x1200, {
+               0x002ffff1,             0x002ffff1,             0x02a0a9c8,
+               0x002003e7,             0x001ffffa,             0x000185bc,
+               0x002007dc,             0x000003ff,             0x3e52859c,
+               0x00200bd4,             0x00000002,             0x3d53996b,
+               0x00100fd0,             0x00000403,             0x3d04ad2d,
+               0x00000bd5,             0x00000403,             0x3d35ace7,
+               0x3ff003e4,             0x00000801,             0x3dc674a1,
+               0x3fffe800,             0x00000800,             0x3e76f461 },
+       },
+       { 0x1400, {
+               0x00100be3,             0x00100be3,             0x04d1359a,
+               0x00000fdb,             0x002003ed,             0x0211fd93,
+               0x00000fd6,             0x002003f4,             0x0002d97b,
+               0x000007d6,             0x002ffffb,             0x3e93b956,
+               0x3ff003da,             0x001003ff,             0x3db49926,
+               0x3fffefe9,             0x00100001,             0x3d655cee,
+               0x3fffd400,             0x00000003,             0x3d65f4b6,
+               0x000fb421,             0x00000402,             0x3dc6547e },
+       },
+       { 0x1600, {
+               0x00000bdd,             0x00000bdd,             0x06519578,
+               0x3ff007da,             0x00000be3,             0x03c24973,
+               0x3ff003d9,             0x00000be9,             0x01b30d5f,
+               0x3ffff7df,             0x001003f1,             0x0003c542,
+               0x000fdfec,             0x001003f7,             0x3ec4711d,
+               0x000fc400,             0x002ffffd,             0x3df504f1,
+               0x001fa81a,             0x002ffc00,             0x3d957cc2,
+               0x002f8c3c,             0x00100000,             0x3db5c891 },
+       },
+       { 0x1800, {
+               0x3ff003dc,             0x3ff003dc,             0x0791e558,
+               0x000ff7dd,             0x3ff007de,             0x05328554,
+               0x000fe7e3,             0x3ff00be2,             0x03232546,
+               0x000fd7ee,             0x000007e9,             0x0143bd30,
+               0x001fb800,             0x000007ee,             0x00044511,
+               0x002fa015,             0x000007f4,             0x3ef4bcee,
+               0x002f8832,             0x001003f9,             0x3e4514c7,
+               0x001f7853,             0x001003fd,             0x3de54c9f },
+       },
+       { 0x1a00, {
+               0x000fefe0,             0x000fefe0,             0x08721d3c,
+               0x001fdbe7,             0x000ffbde,             0x0652a139,
+               0x001fcbf0,             0x000003df,             0x0463292e,
+               0x002fb3ff,             0x3ff007e3,             0x0293a91d,
+               0x002f9c12,             0x3ff00be7,             0x01241905,
+               0x001f8c29,             0x000007ed,             0x3fe470eb,
+               0x000f7c46,             0x000007f2,             0x3f04b8ca,
+               0x3fef7865,             0x000007f6,             0x3e74e4a8 },
+       },
+       { 0x1c00, {
+               0x001fd3e9,             0x001fd3e9,             0x08f23d26,
+               0x002fbff3,             0x001fe3e4,             0x0712ad23,
+               0x002fa800,             0x000ff3e0,             0x05631d1b,
+               0x001f9810,             0x000ffbe1,             0x03b3890d,
+               0x000f8c23,             0x000003e3,             0x0233e8fa,
+               0x3fef843b,             0x000003e7,             0x00f430e4,
+               0x3fbf8456,             0x3ff00bea,             0x00046cc8,
+               0x3f8f8c72,             0x3ff00bef,             0x3f3490ac },
+       },
+       { 0x1e00, {
+               0x001fbbf4,             0x001fbbf4,             0x09425112,
+               0x001fa800,             0x002fc7ed,             0x0792b110,
+               0x000f980e,             0x001fdbe6,             0x0613110a,
+               0x3fff8c20,             0x001fe7e3,             0x04a368fd,
+               0x3fcf8c33,             0x000ff7e2,             0x0343b8ed,
+               0x3f9f8c4a,             0x000fffe3,             0x0203f8da,
+               0x3f5f9c61,             0x000003e6,             0x00e428c5,
+               0x3f1fb07b,             0x000003eb,             0x3fe440af },
+       },
+       { 0x2000, {
+               0x000fa400,             0x000fa400,             0x09625902,
+               0x3fff980c,             0x001fb7f5,             0x0812b0ff,
+               0x3fdf901c,             0x001fc7ed,             0x06b2fcfa,
+               0x3faf902d,             0x001fd3e8,             0x055348f1,
+               0x3f7f983f,             0x001fe3e5,             0x04038ce3,
+               0x3f3fa454,             0x001fefe3,             0x02e3c8d1,
+               0x3f0fb86a,             0x001ff7e4,             0x01c3e8c0,
+               0x3ecfd880,             0x000fffe6,             0x00c404ac },
+       },
+       { 0x2200, {
+               0x3fdf9c0b,             0x3fdf9c0b,             0x09725cf4,
+               0x3fbf9818,             0x3fffa400,             0x0842a8f1,
+               0x3f8f9827,             0x000fb3f7,             0x0702f0ec,
+               0x3f5fa037,             0x000fc3ef,             0x05d330e4,
+               0x3f2fac49,             0x001fcfea,             0x04a364d9,
+               0x3effc05c,             0x001fdbe7,             0x038394ca,
+               0x3ecfdc6f,             0x001fe7e6,             0x0273b0bb,
+               0x3ea00083,             0x001fefe6,             0x0183c0a9 },
+       },
+       { 0x2400, {
+               0x3f9fa014,             0x3f9fa014,             0x098260e6,
+               0x3f7f9c23,             0x3fcf9c0a,             0x08629ce5,
+               0x3f4fa431,             0x3fefa400,             0x0742d8e1,
+               0x3f1fb440,             0x3fffb3f8,             0x062310d9,
+               0x3eefc850,             0x000fbbf2,             0x050340d0,
+               0x3ecfe062,             0x000fcbec,             0x041364c2,
+               0x3ea00073,             0x001fd3ea,             0x03037cb5,
+               0x3e902086,             0x001fdfe8,             0x022388a5 },
+       },
+       { 0x2600, {
+               0x3f5fa81e,             0x3f5fa81e,             0x096258da,
+               0x3f3fac2b,             0x3f8fa412,             0x088290d8,
+               0x3f0fbc38,             0x3fafa408,             0x0772c8d5,
+               0x3eefcc47,             0x3fcfa800,             0x0672f4ce,
+               0x3ecfe456,             0x3fefaffa,             0x05531cc6,
+               0x3eb00066,             0x3fffbbf3,             0x047334bb,
+               0x3ea01c77,             0x000fc7ee,             0x039348ae,
+               0x3ea04486,             0x000fd3eb,             0x02b350a1 },
+       },
+       { 0x2800, {
+               0x3f2fb426,             0x3f2fb426,             0x094250ce,
+               0x3f0fc032,             0x3f4fac1b,             0x086284cd,
+               0x3eefd040,             0x3f7fa811,             0x0782acc9,
+               0x3ecfe84c,             0x3f9fa807,             0x06a2d8c4,
+               0x3eb0005b,             0x3fbfac00,             0x05b2f4bc,
+               0x3eb0186a,             0x3fdfb3fa,             0x04c308b4,
+               0x3eb04077,             0x3fefbbf4,             0x03f31ca8,
+               0x3ec06884,             0x000fbff2,             0x03031c9e },
+       },
+       { 0x2a00, {
+               0x3f0fc42d,             0x3f0fc42d,             0x090240c4,
+               0x3eefd439,             0x3f2fb822,             0x08526cc2,
+               0x3edfe845,             0x3f4fb018,             0x078294bf,
+               0x3ec00051,             0x3f6fac0f,             0x06b2b4bb,
+               0x3ec0185f,             0x3f8fac07,             0x05e2ccb4,
+               0x3ec0386b,             0x3fafac00,             0x0502e8ac,
+               0x3ed05c77,             0x3fcfb3fb,             0x0432f0a3,
+               0x3ef08482,             0x3fdfbbf6,             0x0372f898 },
+       },
+       { 0x2c00, {
+               0x3eefdc31,             0x3eefdc31,             0x08e238b8,
+               0x3edfec3d,             0x3f0fc828,             0x082258b9,
+               0x3ed00049,             0x3f1fc01e,             0x077278b6,
+               0x3ed01455,             0x3f3fb815,             0x06c294b2,
+               0x3ed03460,             0x3f5fb40d,             0x0602acac,
+               0x3ef0506c,             0x3f7fb006,             0x0542c0a4,
+               0x3f107476,             0x3f9fb400,             0x0472c89d,
+               0x3f309c80,             0x3fbfb7fc,             0x03b2cc94 },
+       },
+       { 0x2e00, {
+               0x3eefec37,             0x3eefec37,             0x088220b0,
+               0x3ee00041,             0x3effdc2d,             0x07f244ae,
+               0x3ee0144c,             0x3f0fd023,             0x07625cad,
+               0x3ef02c57,             0x3f1fc81a,             0x06c274a9,
+               0x3f004861,             0x3f3fbc13,             0x060288a6,
+               0x3f20686b,             0x3f5fb80c,             0x05529c9e,
+               0x3f408c74,             0x3f6fb805,             0x04b2ac96,
+               0x3f80ac7e,             0x3f8fb800,             0x0402ac8e },
+       },
+       { 0x3000, {
+               0x3ef0003a,             0x3ef0003a,             0x084210a6,
+               0x3ef01045,             0x3effec32,             0x07b228a7,
+               0x3f00284e,             0x3f0fdc29,             0x073244a4,
+               0x3f104058,             0x3f0fd420,             0x06a258a2,
+               0x3f305c62,             0x3f2fc818,             0x0612689d,
+               0x3f508069,             0x3f3fc011,             0x05728496,
+               0x3f80a072,             0x3f4fc00a,             0x04d28c90,
+               0x3fc0c07b,             0x3f6fbc04,             0x04429088 },
+       },
+       { 0x3200, {
+               0x3f00103e,             0x3f00103e,             0x07f1fc9e,
+               0x3f102447,             0x3f000035,             0x0782149d,
+               0x3f203c4f,             0x3f0ff02c,             0x07122c9c,
+               0x3f405458,             0x3f0fe424,             0x06924099,
+               0x3f607061,             0x3f1fd41d,             0x06024c97,
+               0x3f909068,             0x3f2fcc16,             0x05726490,
+               0x3fc0b070,             0x3f3fc80f,             0x04f26c8a,
+               0x0000d077,             0x3f4fc409,             0x04627484 },
+       },
+       { 0x3400, {
+               0x3f202040,             0x3f202040,             0x07a1e898,
+               0x3f303449,             0x3f100c38,             0x0741fc98,
+               0x3f504c50,             0x3f10002f,             0x06e21495,
+               0x3f706459,             0x3f1ff028,             0x06722492,
+               0x3fa08060,             0x3f1fe421,             0x05f2348f,
+               0x3fd09c67,             0x3f1fdc19,             0x05824c89,
+               0x0000bc6e,             0x3f2fd014,             0x04f25086,
+               0x0040dc74,             0x3f3fcc0d,             0x04825c7f },
+       },
+       { 0x3600, {
+               0x3f403042,             0x3f403042,             0x0761d890,
+               0x3f504848,             0x3f301c3b,             0x0701f090,
+               0x3f805c50,             0x3f200c33,             0x06a2008f,
+               0x3fa07458,             0x3f10002b,             0x06520c8d,
+               0x3fd0905e,             0x3f1ff424,             0x05e22089,
+               0x0000ac65,             0x3f1fe81d,             0x05823483,
+               0x0030cc6a,             0x3f2fdc18,             0x04f23c81,
+               0x0080e871,             0x3f2fd412,             0x0482407c },
+       },
+       { 0x3800, {
+               0x3f604043,             0x3f604043,             0x0721c88a,
+               0x3f80544a,             0x3f502c3c,             0x06d1d88a,
+               0x3fb06851,             0x3f301c35,             0x0681e889,
+               0x3fd08456,             0x3f30082f,             0x0611fc88,
+               0x00009c5d,             0x3f200027,             0x05d20884,
+               0x0030b863,             0x3f2ff421,             0x05621880,
+               0x0070d468,             0x3f2fe81b,             0x0502247c,
+               0x00c0ec6f,             0x3f2fe015,             0x04a22877 },
+       },
+       { 0x3a00, {
+               0x3f904c44,             0x3f904c44,             0x06e1b884,
+               0x3fb0604a,             0x3f70383e,             0x0691c885,
+               0x3fe07451,             0x3f502c36,             0x0661d483,
+               0x00009055,             0x3f401831,             0x0601ec81,
+               0x0030a85b,             0x3f300c2a,             0x05b1f480,
+               0x0070c061,             0x3f300024,             0x0562047a,
+               0x00b0d867,             0x3f3ff41e,             0x05020c77,
+               0x00f0f46b,             0x3f2fec19,             0x04a21474 },
+       },
+       { 0x3c00, {
+               0x3fb05c43,             0x3fb05c43,             0x06c1b07e,
+               0x3fe06c4b,             0x3f902c3f,             0x0681c081,
+               0x0000844f,             0x3f703838,             0x0631cc7d,
+               0x00309855,             0x3f602433,             0x05d1d47e,
+               0x0060b459,             0x3f50142e,             0x0581e47b,
+               0x00a0c85f,             0x3f400828,             0x0531f078,
+               0x00e0e064,             0x3f300021,             0x0501fc73,
+               0x00b0fc6a,             0x3f3ff41d,             0x04a20873 },
+       },
+       { 0x3e00, {
+               0x3fe06444,             0x3fe06444,             0x0681a07a,
+               0x00007849,             0x3fc0503f,             0x0641b07a,
+               0x0020904d,             0x3fa0403a,             0x05f1c07a,
+               0x0060a453,             0x3f803034,             0x05c1c878,
+               0x0090b858,             0x3f70202f,             0x0571d477,
+               0x00d0d05d,             0x3f501829,             0x0531e073,
+               0x0110e462,             0x3f500825,             0x04e1e471,
+               0x01510065,             0x3f40001f,             0x04a1f06d },
+       },
+       { 0x4000, {
+               0x00007044,             0x00007044,             0x06519476,
+               0x00208448,             0x3fe05c3f,             0x0621a476,
+               0x0050984d,             0x3fc04c3a,             0x05e1b075,
+               0x0080ac52,             0x3fa03c35,             0x05a1b875,
+               0x00c0c056,             0x3f803030,             0x0561c473,
+               0x0100d45b,             0x3f70202b,             0x0521d46f,
+               0x0140e860,             0x3f601427,             0x04d1d46e,
+               0x01810064,             0x3f500822,             0x0491dc6b },
+       },
+       { 0x5000, {
+               0x0110a442,             0x0110a442,             0x0551545e,
+               0x0140b045,             0x00e0983f,             0x0531585f,
+               0x0160c047,             0x00c08c3c,             0x0511645e,
+               0x0190cc4a,             0x00908039,             0x04f1685f,
+               0x01c0dc4c,             0x00707436,             0x04d1705e,
+               0x0200e850,             0x00506833,             0x04b1785b,
+               0x0230f453,             0x00305c30,             0x0491805a,
+               0x02710056,             0x0010542d,             0x04718059 },
+       },
+       { 0x6000, {
+               0x01c0bc40,             0x01c0bc40,             0x04c13052,
+               0x01e0c841,             0x01a0b43d,             0x04c13851,
+               0x0210cc44,             0x0180a83c,             0x04a13453,
+               0x0230d845,             0x0160a03a,             0x04913c52,
+               0x0260e047,             0x01409838,             0x04714052,
+               0x0280ec49,             0x01208c37,             0x04514c50,
+               0x02b0f44b,             0x01008435,             0x04414c50,
+               0x02d1004c,             0x00e07c33,             0x0431544f },
+       },
+       { 0x7000, {
+               0x0230c83e,             0x0230c83e,             0x04711c4c,
+               0x0250d03f,             0x0210c43c,             0x0471204b,
+               0x0270d840,             0x0200b83c,             0x0451244b,
+               0x0290dc42,             0x01e0b43a,             0x0441244c,
+               0x02b0e443,             0x01c0b038,             0x0441284b,
+               0x02d0ec44,             0x01b0a438,             0x0421304a,
+               0x02f0f445,             0x0190a036,             0x04213449,
+               0x0310f847,             0x01709c34,             0x04213848 },
+       },
+       { 0x8000, {
+               0x0280d03d,             0x0280d03d,             0x04310c48,
+               0x02a0d43e,             0x0270c83c,             0x04311047,
+               0x02b0dc3e,             0x0250c83a,             0x04311447,
+               0x02d0e040,             0x0240c03a,             0x04211446,
+               0x02e0e840,             0x0220bc39,             0x04111847,
+               0x0300e842,             0x0210b438,             0x04012445,
+               0x0310f043,             0x0200b037,             0x04012045,
+               0x0330f444,             0x01e0ac36,             0x03f12445 },
+       },
+       { 0xefff, {
+               0x0340dc3a,             0x0340dc3a,             0x03b0ec40,
+               0x0340e03a,             0x0330e039,             0x03c0f03e,
+               0x0350e03b,             0x0330dc39,             0x03c0ec3e,
+               0x0350e43a,             0x0320dc38,             0x03c0f43e,
+               0x0360e43b,             0x0320d839,             0x03b0f03e,
+               0x0360e83b,             0x0310d838,             0x03c0fc3b,
+               0x0370e83b,             0x0310d439,             0x03a0f83d,
+               0x0370e83c,             0x0300d438,             0x03b0fc3c },
+       }
+};
+
  enum rcar_vin_state {
         STOPPED = 0,
         RUNNING,
@@ -161,6 +504,9 @@ struct rcar_vin_cam {
         /* Client output, as seen by the VIN */
         unsigned int                    width;
         unsigned int                    height;
+       /* User window from S_FMT */
+       unsigned int out_width;
+       unsigned int out_height;
         /*
          * User window from S_CROP / G_CROP, produced by client cropping and
          * scaling, VIN scaling and VIN cropping, mapped back onto the client
@@ -332,7 +678,7 @@ static int rcar_vin_setup(struct rcar_vin_priv *priv)
                 vnmc |= VNMC_BPS;
  
         /* progressive or interlaced mode */
-       interrupts = progressive ? VNIE_FIE | VNIE_EFE : VNIE_EFE;
+       interrupts = progressive ? VNIE_FIE : VNIE_EFE;
  
         /* ack interrupts */
         iowrite32(interrupts, priv->base + VNINTS_REG);
@@ -667,6 +1013,60 @@ static void rcar_vin_clock_stop(struct soc_camera_host *ici)
         /* VIN does not have "mclk" */
  }
  
+static void set_coeff(struct rcar_vin_priv *priv, unsigned short xs)
+{
+       int i;
+       const struct vin_coeff *p_prev_set = NULL;
+       const struct vin_coeff *p_set = NULL;
+
+       /* Look for suitable coefficient values */
+       for (i = 0; i < ARRAY_SIZE(vin_coeff_set); i++) {
+               p_prev_set = p_set;
+               p_set = &vin_coeff_set[i];
+
+               if (xs < p_set->xs_value)
+                       break;
+       }
+
+       /* Use previous value if its XS value is closer */
+       if (p_prev_set && p_set &&
+           xs - p_prev_set->xs_value < p_set->xs_value - xs)
+               p_set = p_prev_set;
+
+       /* Set coefficient registers */
+       iowrite32(p_set->coeff_set[0], priv->base + VNC1A_REG);
+       iowrite32(p_set->coeff_set[1], priv->base + VNC1B_REG);
+       iowrite32(p_set->coeff_set[2], priv->base + VNC1C_REG);
+
+       iowrite32(p_set->coeff_set[3], priv->base + VNC2A_REG);
+       iowrite32(p_set->coeff_set[4], priv->base + VNC2B_REG);
+       iowrite32(p_set->coeff_set[5], priv->base + VNC2C_REG);
+
+       iowrite32(p_set->coeff_set[6], priv->base + VNC3A_REG);
+       iowrite32(p_set->coeff_set[7], priv->base + VNC3B_REG);
+       iowrite32(p_set->coeff_set[8], priv->base + VNC3C_REG);
+
+       iowrite32(p_set->coeff_set[9], priv->base + VNC4A_REG);
+       iowrite32(p_set->coeff_set[10], priv->base + VNC4B_REG);
+       iowrite32(p_set->coeff_set[11], priv->base + VNC4C_REG);
+
+       iowrite32(p_set->coeff_set[12], priv->base + VNC5A_REG);
+       iowrite32(p_set->coeff_set[13], priv->base + VNC5B_REG);
+       iowrite32(p_set->coeff_set[14], priv->base + VNC5C_REG);
+
+       iowrite32(p_set->coeff_set[15], priv->base + VNC6A_REG);
+       iowrite32(p_set->coeff_set[16], priv->base + VNC6B_REG);
+       iowrite32(p_set->coeff_set[17], priv->base + VNC6C_REG);
+
+       iowrite32(p_set->coeff_set[18], priv->base + VNC7A_REG);
+       iowrite32(p_set->coeff_set[19], priv->base + VNC7B_REG);
+       iowrite32(p_set->coeff_set[20], priv->base + VNC7C_REG);
+
+       iowrite32(p_set->coeff_set[21], priv->base + VNC8A_REG);
+       iowrite32(p_set->coeff_set[22], priv->base + VNC8B_REG);
+       iowrite32(p_set->coeff_set[23], priv->base + VNC8C_REG);
+}
+
  /* rect is guaranteed to not exceed the scaled camera rectangle */
  static int rcar_vin_set_rect(struct soc_camera_device *icd)
  {
@@ -676,6 +1076,7 @@ static int rcar_vin_set_rect(struct soc_camera_device *icd)
         unsigned int left_offset, top_offset;
         unsigned char dsize = 0;
         struct v4l2_rect *cam_subrect = &cam->subrect;
+       u32 value;
  
         dev_dbg(icd->parent, "Crop %ux%u@%u:%u\n",
                 icd->user_width, icd->user_height, cam->vin_left, cam->vin_top);
@@ -695,40 +1096,64 @@ static int rcar_vin_set_rect(struct soc_camera_device *icd)
  
         /* Set Start/End Pixel/Line Pre-Clip */
         iowrite32(left_offset << dsize, priv->base + VNSPPRC_REG);
-       iowrite32((left_offset + cam->width - 1) << dsize,
+       iowrite32((left_offset + cam_subrect->width - 1) << dsize,
                   priv->base + VNEPPRC_REG);
         switch (priv->field) {
         case V4L2_FIELD_INTERLACED:
         case V4L2_FIELD_INTERLACED_TB:
         case V4L2_FIELD_INTERLACED_BT:
                 iowrite32(top_offset / 2, priv->base + VNSLPRC_REG);
-               iowrite32((top_offset + cam->height) / 2 - 1,
+               iowrite32((top_offset + cam_subrect->height) / 2 - 1,
                           priv->base + VNELPRC_REG);
                 break;
         default:
                 iowrite32(top_offset, priv->base + VNSLPRC_REG);
-               iowrite32(top_offset + cam->height - 1,
+               iowrite32(top_offset + cam_subrect->height - 1,
                           priv->base + VNELPRC_REG);
                 break;
         }
  
+       /* Set scaling coefficient */
+       value = 0;
+       if (cam_subrect->height != cam->out_height)
+               value = (4096 * cam_subrect->height) / cam->out_height;
+       dev_dbg(icd->parent, "YS Value: %x\n", value);
+       iowrite32(value, priv->base + VNYS_REG);
+
+       value = 0;
+       if (cam_subrect->width != cam->out_width)
+               value = (4096 * cam_subrect->width) / cam->out_width;
+
+       /* Horizontal upscaling is up to double size */
+       if (0 < value && value < 2048)
+               value = 2048;
+
+       dev_dbg(icd->parent, "XS Value: %x\n", value);
+       iowrite32(value, priv->base + VNXS_REG);
+
+       /* Horizontal upscaling is carried out by scaling down from double size */
+       if (value < 4096)
+               value *= 2;
+
+       set_coeff(priv, value);
+
         /* Set Start/End Pixel/Line Post-Clip */
         iowrite32(0, priv->base + VNSPPOC_REG);
         iowrite32(0, priv->base + VNSLPOC_REG);
-       iowrite32((cam_subrect->width - 1) << dsize, priv->base + VNEPPOC_REG);
+       iowrite32((cam->out_width - 1) << dsize, priv->base + VNEPPOC_REG);
         switch (priv->field) {
         case V4L2_FIELD_INTERLACED:
         case V4L2_FIELD_INTERLACED_TB:
         case V4L2_FIELD_INTERLACED_BT:
-               iowrite32(cam_subrect->height / 2 - 1,
+               iowrite32(cam->out_height / 2 - 1,
                           priv->base + VNELPOC_REG);
                 break;
         default:
-               iowrite32(cam_subrect->height - 1, priv->base + VNELPOC_REG);
+               iowrite32(cam->out_height - 1, priv->base + VNELPOC_REG);
                 break;
         }
  
-       iowrite32(ALIGN(cam->width, 0x10), priv->base + VNIS_REG);
+       iowrite32(ALIGN(cam->out_width, 0x10), priv->base + VNIS_REG);
  
         return 0;
  }
@@ -819,7 +1244,7 @@ static int rcar_vin_set_bus_param(struct soc_camera_device *icd)
         if (ret < 0 && ret != -ENOIOCTLCMD)
                 return ret;
  
-       val = priv->field == V4L2_FIELD_NONE ? VNDMR2_FTEV : 0;
+       val = VNDMR2_FTEV | VNDMR2_VLV(1);
         if (!(common_flags & V4L2_MBUS_VSYNC_ACTIVE_LOW))
                 val |= VNDMR2_VPS;
         if (!(common_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW))
@@ -879,6 +1304,14 @@ static const struct soc_mbus_pixelfmt rcar_vin_formats[] = {
                 .order                  = SOC_MBUS_ORDER_LE,
                 .layout                 = SOC_MBUS_LAYOUT_PLANAR_Y_C,
         },
+       {
+               .fourcc                 = V4L2_PIX_FMT_YUYV,
+               .name                   = "YUYV",
+               .bits_per_sample        = 16,
+               .packing                = SOC_MBUS_PACKING_NONE,
+               .order                  = SOC_MBUS_ORDER_LE,
+               .layout                 = SOC_MBUS_LAYOUT_PACKED,
+       },
         {
                 .fourcc                 = V4L2_PIX_FMT_UYVY,
                 .name                   = "UYVY",
@@ -999,6 +1432,8 @@ static int rcar_vin_get_formats(struct soc_camera_device *icd, unsigned int idx,
                 cam->subrect = rect;
                 cam->width = mf.width;
                 cam->height = mf.height;
+               cam->out_width  = mf.width;
+               cam->out_height = mf.height;
  
                 icd->host_priv = cam;
         } else {
@@ -1259,6 +1694,9 @@ static int rcar_vin_set_fmt(struct soc_camera_device *icd,
         dev_dbg(dev, "W: %u : %u, H: %u : %u\n",
                 vin_sub_width, pix->width, vin_sub_height, pix->height);
  
+       cam->out_width = pix->width;
+       cam->out_height = pix->height;
+
         icd->current_fmt = xlate;
  
         priv->field = field;
@@ -1310,8 +1748,12 @@ static int rcar_vin_try_fmt(struct soc_camera_device *icd,
         if (ret < 0)
                 return ret;
  
-       pix->width = mf.width;
-       pix->height = mf.height;
+       /* Adjust only if VIN cannot scale */
+       if (pix->width > mf.width * 2)
+               pix->width = mf.width * 2;
+       if (pix->height > mf.height * 3)
+               pix->height = mf.height * 3;
+
         pix->field = mf.field;
         pix->colorspace = mf.colorspace;
  
@@ -1395,6 +1837,8 @@ static struct soc_camera_host_ops rcar_vin_host_ops = {
  
  #ifdef CONFIG_OF
  static struct of_device_id rcar_vin_of_table[] = {
+       { .compatible = "renesas,vin-r8a7794", .data = (void *)RCAR_GEN2 },
+       { .compatible = "renesas,vin-r8a7793", .data = (void *)RCAR_GEN2 },
         { .compatible = "renesas,vin-r8a7791", .data = (void *)RCAR_GEN2 },
         { .compatible = "renesas,vin-r8a7790", .data = (void *)RCAR_GEN2 },
         { .compatible = "renesas,vin-r8a7779", .data = (void *)RCAR_H1 },
diff --git a/drivers/media/platform/vivid/vivid-vid-out.c b/drivers/media/platform/vivid/vivid-vid-out.c

index ee5c3992b276b9e7e7b2ed5bd3144e4ba9f33346..39ff79f6aa67da8453cba76321364ca857f16509 100644 (file)
--- a/drivers/media/platform/vivid/vivid-vid-out.c
+++ b/drivers/media/platform/vivid/vivid-vid-out.c
@@ -625,7 +625,7 @@ int vivid_vid_out_g_selection(struct file *file, void *priv,
                 sel->r = dev->fmt_out_rect;
                 break;
         case V4L2_SEL_TGT_CROP_BOUNDS:
-               if (!dev->has_compose_out)
+               if (!dev->has_crop_out)
                         return -EINVAL;
                 sel->r = vivid_max_rect;
                 break;
diff --git a/drivers/media/usb/Kconfig b/drivers/media/usb/Kconfig

index 056181f2f5694864be2740c05f95069e8aa923d1..7496f332f3f5706f9d131673d5dfb5a86b74959e 100644 (file)
--- a/drivers/media/usb/Kconfig
+++ b/drivers/media/usb/Kconfig
@@ -24,7 +24,6 @@ if MEDIA_ANALOG_TV_SUPPORT
         comment "Analog TV USB devices"
  source "drivers/media/usb/pvrusb2/Kconfig"
  source "drivers/media/usb/hdpvr/Kconfig"
-source "drivers/media/usb/tlg2300/Kconfig"
  source "drivers/media/usb/usbvision/Kconfig"
  source "drivers/media/usb/stk1160/Kconfig"
  source "drivers/media/usb/go7007/Kconfig"
diff --git a/drivers/media/usb/Makefile b/drivers/media/usb/Makefile

index 6f2eb7c8416c47844eeb2977122f554b0513668c..8874ba774a348029d6b865b4e41225e219da1a65 100644 (file)
--- a/drivers/media/usb/Makefile
+++ b/drivers/media/usb/Makefile
@@ -16,7 +16,6 @@ obj-$(CONFIG_VIDEO_CPIA2) += cpia2/
  obj-$(CONFIG_VIDEO_AU0828) += au0828/
  obj-$(CONFIG_VIDEO_HDPVR)      += hdpvr/
  obj-$(CONFIG_VIDEO_PVRUSB2) += pvrusb2/
-obj-$(CONFIG_VIDEO_TLG2300) += tlg2300/
  obj-$(CONFIG_VIDEO_USBVISION) += usbvision/
  obj-$(CONFIG_VIDEO_STK1160) += stk1160/
  obj-$(CONFIG_VIDEO_CX231XX) += cx231xx/
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c

index 75658717961f4ac6603890ab785d444cfe864c3d..faac2f4e0f3ad358699a55123ebe18b38e8f65f5 100644 (file)
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1017,6 +1017,12 @@ static int v4l_querycap(const struct v4l2_ioctl_ops *ops,
         ret = ops->vidioc_querycap(file, fh, cap);
  
         cap->capabilities |= V4L2_CAP_EXT_PIX_FORMAT;
+       /*
+        * Drivers MUST fill in device_caps, so check for this and
+        * warn if it was forgotten.
+        */
+       WARN_ON(!(cap->capabilities & V4L2_CAP_DEVICE_CAPS) ||
+               !cap->device_caps);
         cap->device_caps |= V4L2_CAP_EXT_PIX_FORMAT;
  
         return ret;
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig

index 7cf8f4ac281f50518d0baefd9574fa392893e33e..48e62a34f7f27e6a8d4e1669f24c7bfef9709b09 100644 (file)
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -59,7 +59,7 @@ config NET_DSA_BCM_SF2
         depends on HAS_IOMEM
         select NET_DSA
         select NET_DSA_TAG_BRCM
-       select FIXED_PHY if NET_DSA_BCM_SF2=y
+       select FIXED_PHY
         select BCM7XXX_PHY
         select MDIO_BCM_UNIMAC
         ---help---
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig

index 888247ad90680891db3591019db926f0c93c3eb4..41a3c9804427b0931569c914d3cfc4f1418ee332 100644 (file)
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -64,7 +64,7 @@ config BCMGENET
         tristate "Broadcom GENET internal MAC support"
         select MII
         select PHYLIB
-       select FIXED_PHY if BCMGENET=y
+       select FIXED_PHY
         select BCM7XXX_PHY
         help
           This driver supports the built-in Ethernet MACs found in the
@@ -155,7 +155,7 @@ config SYSTEMPORT
         depends on OF
         select MII
         select PHYLIB
-       select FIXED_PHY if SYSTEMPORT=y
+       select FIXED_PHY
         help
           This driver supports the built-in Ethernet MACs found in the
           Broadcom BCM7xxx Set Top Box family chipset using an internal
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

index 691f0bf09ee15ca972e95f84c257d0dba3d977fb..9f5e38769a294a3d66d9562e12e2e0116bac925e 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13256,7 +13256,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
                 return -EFAULT;
         }
  
-       DP(BNX2X_MSG_PTP, "Configrued val = %d, period = %d\n", best_val,
+       DP(BNX2X_MSG_PTP, "Configured val = %d, period = %d\n", best_val,
            best_period);
  
         return 0;
@@ -14784,7 +14784,7 @@ static int bnx2x_hwtstamp_ioctl(struct bnx2x *bp, struct ifreq *ifr)
                 -EFAULT : 0;
  }
  
-/* Configrues HW for PTP */
+/* Configures HW for PTP */
  static int bnx2x_configure_ptp(struct bnx2x *bp)
  {
         int rc, port = BP_PORT(bp);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h

index b0779d773343cbc82aa825eed3024205581ad33f..6fe547c93e74b0e36f8a920737f25c79c2470143 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -7549,7 +7549,7 @@ Theotherbitsarereservedandshouldbezero*/
  #define IGU_REG_SISR_MDPC_WOMASK_UPPER         0x05a6
  
  #define IGU_REG_RESERVED_UPPER                         0x05ff
-/* Fields of IGU PF CONFIGRATION REGISTER */
+/* Fields of IGU PF CONFIGURATION REGISTER */
  #define IGU_PF_CONF_FUNC_EN      (0x1<<0)  /* function enable        */
  #define IGU_PF_CONF_MSI_MSIX_EN   (0x1<<1)  /* MSI/MSIX enable       */
  #define IGU_PF_CONF_INT_LINE_EN   (0x1<<2)  /* INT enable            */
@@ -7557,7 +7557,7 @@ Theotherbitsarereservedandshouldbezero*/
  #define IGU_PF_CONF_SINGLE_ISR_EN (0x1<<4)  /* single ISR mode enable */
  #define IGU_PF_CONF_SIMD_MODE    (0x1<<5)  /* simd all ones mode     */
  
-/* Fields of IGU VF CONFIGRATION REGISTER */
+/* Fields of IGU VF CONFIGURATION REGISTER */
  #define IGU_VF_CONF_FUNC_EN       (0x1<<0)  /* function enable        */
  #define IGU_VF_CONF_MSI_MSIX_EN    (0x1<<1)  /* MSI/MSIX enable        */
  #define IGU_VF_CONF_PARENT_MASK    (0x3<<2)  /* Parent PF             */
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c

index 06dea3dd463c2c27936bd5c02cb5a3c76d7bd229..3767271c7667f69ef212fb2cb31105910121b516 100644 (file)
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2160,7 +2160,7 @@ static int __init macb_probe(struct platform_device *pdev)
         int err = -ENXIO;
         const char *mac;
         void __iomem *mem;
-       unsigned int hw_q, queue_mask, q, num_queues, q_irq = 0;
+       unsigned int hw_q, queue_mask, q, num_queues;
         struct clk *pclk, *hclk, *tx_clk;
  
         regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2235,11 +2235,11 @@ static int __init macb_probe(struct platform_device *pdev)
          * register mapping but we don't want to test the queue index then
          * compute the corresponding register offset at run time.
          */
-       for (hw_q = 0; hw_q < MACB_MAX_QUEUES; ++hw_q) {
+       for (hw_q = 0, q = 0; hw_q < MACB_MAX_QUEUES; ++hw_q) {
                 if (!(queue_mask & (1 << hw_q)))
                         continue;
  
-               queue = &bp->queues[q_irq];
+               queue = &bp->queues[q];
                 queue->bp = bp;
                 if (hw_q) {
                         queue->ISR  = GEM_ISR(hw_q - 1);
@@ -2261,18 +2261,18 @@ static int __init macb_probe(struct platform_device *pdev)
                  * must remove the optional gaps that could exist in the
                  * hardware queue mask.
                  */
-               queue->irq = platform_get_irq(pdev, q_irq);
+               queue->irq = platform_get_irq(pdev, q);
                 err = devm_request_irq(&pdev->dev, queue->irq, macb_interrupt,
                                        0, dev->name, queue);
                 if (err) {
                         dev_err(&pdev->dev,
                                 "Unable to request IRQ %d (error %d)\n",
                                 queue->irq, err);
-                       goto err_out_free_irq;
+                       goto err_out_free_netdev;
                 }
  
                 INIT_WORK(&queue->tx_error_task, macb_tx_error_task);
-               q_irq++;
+               q++;
         }
         dev->irq = bp->queues[0].irq;
  
@@ -2350,7 +2350,7 @@ static int __init macb_probe(struct platform_device *pdev)
         err = register_netdev(dev);
         if (err) {
                 dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-               goto err_out_free_irq;
+               goto err_out_free_netdev;
         }
  
         err = macb_mii_init(bp);
@@ -2373,9 +2373,7 @@ static int __init macb_probe(struct platform_device *pdev)
  
  err_out_unregister_netdev:
         unregister_netdev(dev);
-err_out_free_irq:
-       for (q = 0, queue = bp->queues; q < q_irq; ++q, ++queue)
-               devm_free_irq(&pdev->dev, queue->irq, queue);
+err_out_free_netdev:
         free_netdev(dev);
  err_out_disable_clocks:
         if (!IS_ERR(tx_clk))
@@ -2392,8 +2390,6 @@ static int __exit macb_remove(struct platform_device *pdev)
  {
         struct net_device *dev;
         struct macb *bp;
-       struct macb_queue *queue;
-       unsigned int q;
  
         dev = platform_get_drvdata(pdev);
  
@@ -2405,14 +2401,11 @@ static int __exit macb_remove(struct platform_device *pdev)
                 kfree(bp->mii_bus->irq);
                 mdiobus_free(bp->mii_bus);
                 unregister_netdev(dev);
-               queue = bp->queues;
-               for (q = 0; q < bp->num_queues; ++q, ++queue)
-                       devm_free_irq(&pdev->dev, queue->irq, queue);
-               free_netdev(dev);
                 if (!IS_ERR(bp->tx_clk))
                         clk_disable_unprepare(bp->tx_clk);
                 clk_disable_unprepare(bp->hclk);
                 clk_disable_unprepare(bp->pclk);
+               free_netdev(dev);
         }
  
         return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c

index 28d04153f9990797068218bc96afd484bb536c0c..c132d9030729d9e20f5f1051611fd35d46098128 100644 (file)
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -2376,7 +2376,7 @@ const char *t4_get_port_type_description(enum fw_port_type port_type)
                 "KR/KX",
                 "KR/KX/KX4",
                 "R QSFP_10G",
-               "",
+               "R QSA",
                 "R QSFP",
                 "R BP40_BA",
         };
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h

index 291b6f219708c946c5ad01c9bc0485d06dc7f19c..7c0aec85137a2acee4a892afc5ec14c16731a26e 100644 (file)
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -2470,8 +2470,8 @@ enum fw_port_type {
         FW_PORT_TYPE_BP_AP,
         FW_PORT_TYPE_BP4_AP,
         FW_PORT_TYPE_QSFP_10G,
-       FW_PORT_TYPE_QSFP,
         FW_PORT_TYPE_QSA,
+       FW_PORT_TYPE_QSFP,
         FW_PORT_TYPE_BP40_BA,
  
         FW_PORT_TYPE_NONE = FW_PORT_CMD_PTYPE_M
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c

index b2427928eb11878724704ed8d041f6286ceb525c..d1c025fd972607eaedbe51c86f157aef54456410 100644 (file)
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -60,6 +60,7 @@
  #include <linux/interrupt.h>
  #include <linux/ioport.h>
  #include <linux/in.h>
+#include <linux/jiffies.h>
  #include <linux/skbuff.h>
  #include <linux/spinlock.h>
  #include <linux/string.h>
@@ -238,13 +239,13 @@ writereg(struct net_device *dev, u16 regno, u16 value)
  static int __init
  wait_eeprom_ready(struct net_device *dev)
  {
-       int timeout = jiffies;
+       unsigned long timeout = jiffies;
         /* check to see if the EEPROM is ready,
          * a timeout is used just in case EEPROM is ready when
          * SI_BUSY in the PP_SelfST is clear
          */
         while (readreg(dev, PP_SelfST) & SI_BUSY)
-               if (jiffies - timeout >= 40)
+               if (time_after_eq(jiffies, timeout + 40))
                         return -1;
         return 0;
  }
@@ -485,7 +486,7 @@ control_dc_dc(struct net_device *dev, int on_not_off)
  {
         struct net_local *lp = netdev_priv(dev);
         unsigned int selfcontrol;
-       int timenow = jiffies;
+       unsigned long timenow = jiffies;
         /* control the DC to DC convertor in the SelfControl register.
          * Note: This is hooked up to a general purpose pin, might not
          * always be a DC to DC convertor.
@@ -499,7 +500,7 @@ control_dc_dc(struct net_device *dev, int on_not_off)
         writereg(dev, PP_SelfCTL, selfcontrol);
  
         /* Wait for the DC/DC converter to power up - 500ms */
-       while (jiffies - timenow < HZ)
+       while (time_before(jiffies, timenow + HZ))
                 ;
  }
  
@@ -514,7 +515,7 @@ send_test_pkt(struct net_device *dev)
                 0, 0,           /* DSAP=0 & SSAP=0 fields */
                 0xf3, 0         /* Control (Test Req + P bit set) */
         };
-       long timenow = jiffies;
+       unsigned long timenow = jiffies;
  
         writereg(dev, PP_LineCTL, readreg(dev, PP_LineCTL) | SERIAL_TX_ON);
  
@@ -525,10 +526,10 @@ send_test_pkt(struct net_device *dev)
         iowrite16(ETH_ZLEN, lp->virt_addr + TX_LEN_PORT);
  
         /* Test to see if the chip has allocated memory for the packet */
-       while (jiffies - timenow < 5)
+       while (time_before(jiffies, timenow + 5))
                 if (readreg(dev, PP_BusST) & READY_FOR_TX_NOW)
                         break;
-       if (jiffies - timenow >= 5)
+       if (time_after_eq(jiffies, timenow + 5))
                 return 0;       /* this shouldn't happen */
  
         /* Write the contents of the packet */
@@ -536,7 +537,7 @@ send_test_pkt(struct net_device *dev)
  
         cs89_dbg(1, debug, "Sending test packet ");
         /* wait a couple of jiffies for packet to be received */
-       for (timenow = jiffies; jiffies - timenow < 3;)
+       for (timenow = jiffies; time_before(jiffies, timenow + 3);)
                 ;
         if ((readreg(dev, PP_TxEvent) & TX_SEND_OK_BITS) == TX_OK) {
                 cs89_dbg(1, cont, "succeeded\n");
@@ -556,7 +557,7 @@ static int
  detect_tp(struct net_device *dev)
  {
         struct net_local *lp = netdev_priv(dev);
-       int timenow = jiffies;
+       unsigned long timenow = jiffies;
         int fdx;
  
         cs89_dbg(1, debug, "%s: Attempting TP\n", dev->name);
@@ -574,7 +575,7 @@ detect_tp(struct net_device *dev)
         /* Delay for the hardware to work out if the TP cable is present
          * - 150ms
          */
-       for (timenow = jiffies; jiffies - timenow < 15;)
+       for (timenow = jiffies; time_before(jiffies, timenow + 15);)
                 ;
         if ((readreg(dev, PP_LineST) & LINK_OK) == 0)
                 return DETECTED_NONE;
@@ -618,7 +619,7 @@ detect_tp(struct net_device *dev)
                 if ((lp->auto_neg_cnf & AUTO_NEG_BITS) == AUTO_NEG_ENABLE) {
                         pr_info("%s: negotiating duplex...\n", dev->name);
                         while (readreg(dev, PP_AutoNegST) & AUTO_NEG_BUSY) {
-                               if (jiffies - timenow > 4000) {
+                               if (time_after(jiffies, timenow + 4000)) {
                                         pr_err("**** Full / half duplex auto-negotiation timed out ****\n");
                                         break;
                                 }
@@ -1271,7 +1272,7 @@ static void __init reset_chip(struct net_device *dev)
  {
  #if !defined(CONFIG_MACH_MX31ADS)
         struct net_local *lp = netdev_priv(dev);
-       int reset_start_time;
+       unsigned long reset_start_time;
  
         writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
  
@@ -1294,7 +1295,7 @@ static void __init reset_chip(struct net_device *dev)
         /* Wait until the chip is reset */
         reset_start_time = jiffies;
         while ((readreg(dev, PP_SelfST) & INIT_DONE) == 0 &&
-              jiffies - reset_start_time < 2)
+              time_before(jiffies, reset_start_time + 2))
                 ;
  #endif /* !CONFIG_MACH_MX31ADS */
  }
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c

index 2aacd47310511275be8ee4ef317963e588b39922..196073110e320b1bb05e8b38bf9a5e1c66b5e929 100644 (file)
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3138,6 +3138,7 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter)
  
         netdev->hw_enc_features = 0;
         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
+       netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
  }
  #endif
  
@@ -4429,6 +4430,7 @@ static void be_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family,
                                    NETIF_F_TSO | NETIF_F_TSO6 |
                                    NETIF_F_GSO_UDP_TUNNEL;
         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+       netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
  
         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
                  be16_to_cpu(port));
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c

index ebf76c496e7aab472c1c0ca740213b1624146a20..5ebdf8dc8a31300f526fd98912080dc4850937f2 100644 (file)
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1558,20 +1558,21 @@ fec_enet_interrupt(int irq, void *dev_id)
  {
         struct net_device *ndev = dev_id;
         struct fec_enet_private *fep = netdev_priv(ndev);
-       const unsigned napi_mask = FEC_ENET_RXF | FEC_ENET_TXF;
         uint int_events;
         irqreturn_t ret = IRQ_NONE;
  
         int_events = readl(fep->hwp + FEC_IEVENT);
-       writel(int_events & ~napi_mask, fep->hwp + FEC_IEVENT);
+       writel(int_events, fep->hwp + FEC_IEVENT);
         fec_enet_collect_events(fep, int_events);
  
-       if (int_events & napi_mask) {
+       if (fep->work_tx || fep->work_rx) {
                 ret = IRQ_HANDLED;
  
-               /* Disable the NAPI interrupts */
-               writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
-               napi_schedule(&fep->napi);
+               if (napi_schedule_prep(&fep->napi)) {
+                       /* Disable the NAPI interrupts */
+                       writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
+                       __napi_schedule(&fep->napi);
+               }
         }
  
         if (int_events & FEC_ENET_MII) {
@@ -1591,12 +1592,6 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
         struct fec_enet_private *fep = netdev_priv(ndev);
         int pkts;
  
-       /*
-        * Clear any pending transmit or receive interrupts before
-        * processing the rings to avoid racing with the hardware.
-        */
-       writel(FEC_ENET_RXF | FEC_ENET_TXF, fep->hwp + FEC_IEVENT);
-
         pkts = fec_enet_rx(ndev, budget);
  
         fec_enet_tx(ndev);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c

index 0a7ea4c5f9d3f7c79f585ddfe8696f6051556bee..a5f2660d552d67b6f8c0123f1b63d7fc7ee4ad52 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7549,6 +7549,11 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
         if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED))
                 return -EOPNOTSUPP;
  
+       if (vid) {
+               pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+               return -EINVAL;
+       }
+
         /* Hardware does not support aging addresses so if a
          * ndm_state is given only allow permanent addresses
          */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c

index 6ff214de1111b8f7127740314dc2b23b6f12a5ec..190cbd931f6bc8527654d14d042dc85799883a81 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1569,8 +1569,15 @@ int mlx4_en_start_port(struct net_device *dev)
                         mlx4_en_free_affinity_hint(priv, i);
                         goto cq_err;
                 }
-               for (j = 0; j < cq->size; j++)
-                       cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+
+               for (j = 0; j < cq->size; j++) {
+                       struct mlx4_cqe *cqe = NULL;
+
+                       cqe = mlx4_en_get_cqe(cq->buf, j, priv->cqe_size) +
+                             priv->cqe_factor;
+                       cqe->owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+               }
+
                 err = mlx4_en_set_cq_moder(priv, cq);
                 if (err) {
                         en_err(priv, "Failed setting cq moderation parameters\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c

index ef3b95bac2adc46a7bf976bda2a3f51988aa2ab5..982861d1df44d706cba4884bfb1caa19fe9f8f11 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -787,11 +787,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                 if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size))
                         field = 3;
                 dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
-               mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
-                        dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
         } else {
                 dev_cap->bf_reg_size = 0;
-               mlx4_dbg(dev, "BlueFlame not available\n");
         }
  
         MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET);
@@ -902,9 +899,6 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                         goto out;
         }
  
-       mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
-                dev_cap->bmme_flags, dev_cap->reserved_lkey);
-
         /*
          * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
          * we can't use any EQs whose doorbell falls on that page,
@@ -916,6 +910,21 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
         else
                 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS;
  
+out:
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+
+void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+{
+       if (dev_cap->bf_reg_size > 0)
+               mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
+                        dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
+       else
+               mlx4_dbg(dev, "BlueFlame not available\n");
+
+       mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
+                dev_cap->bmme_flags, dev_cap->reserved_lkey);
         mlx4_dbg(dev, "Max ICM size %lld MB\n",
                  (unsigned long long) dev_cap->max_icm_sz >> 20);
         mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
@@ -949,13 +958,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                  dev_cap->dmfs_high_rate_qpn_base);
         mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n",
                  dev_cap->dmfs_high_rate_qpn_range);
-
         dump_dev_cap_flags(dev, dev_cap->flags);
         dump_dev_cap_flags2(dev, dev_cap->flags2);
-
-out:
-       mlx4_free_cmd_mailbox(dev, mailbox);
-       return err;
  }
  
  int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap)
@@ -1848,8 +1852,8 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
         /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
         MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
         if (byte_field) {
-               param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
-               param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+               param->dev_cap_enabled |= MLX4_DEV_CAP_EQE_STRIDE_ENABLED;
+               param->dev_cap_enabled |= MLX4_DEV_CAP_CQE_STRIDE_ENABLED;
                 param->cqe_size = 1 << ((byte_field &
                                          MLX4_CQE_SIZE_MASK_STRIDE) + 5);
                 param->eqe_size = 1 << (((byte_field &
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h

index 794e2826609a32af4eb3205dc8dfb137dbaabc37..62562b60fa876e5577e8940d2bb5d986eacbf565 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -224,6 +224,7 @@ struct mlx4_set_ib_param {
         u32 cap_mask;
  };
  
+void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
  int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
  int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap);
  int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c

index e25436b24ce7a50b4de4f7c425099bb6dc7bd2a8..943cbd47d832bb98719e355a727e8451c68bfbbe 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -171,9 +171,9 @@ int mlx4_check_port_params(struct mlx4_dev *dev,
  {
         int i;
  
-       for (i = 0; i < dev->caps.num_ports - 1; i++) {
-               if (port_type[i] != port_type[i + 1]) {
-                       if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+       if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+               for (i = 0; i < dev->caps.num_ports - 1; i++) {
+                       if (port_type[i] != port_type[i + 1]) {
                                 mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
                                 return -EINVAL;
                         }
@@ -305,6 +305,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
                 return err;
         }
+       mlx4_dev_cap_dump(dev, dev_cap);
  
         if (dev_cap->min_page_sz > PAGE_SIZE) {
                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
@@ -2488,41 +2489,42 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
                              u8 total_vfs, int existing_vfs)
  {
         u64 dev_flags = dev->flags;
+       int err = 0;
  
-       dev->dev_vfs = kzalloc(
-                       total_vfs * sizeof(*dev->dev_vfs),
-                       GFP_KERNEL);
+       atomic_inc(&pf_loading);
+       if (dev->flags &  MLX4_FLAG_SRIOV) {
+               if (existing_vfs != total_vfs) {
+                       mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
+                                existing_vfs, total_vfs);
+                       total_vfs = existing_vfs;
+               }
+       }
+
+       dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
         if (NULL == dev->dev_vfs) {
                 mlx4_err(dev, "Failed to allocate memory for VFs\n");
                 goto disable_sriov;
-       } else if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
-               int err = 0;
-
-               atomic_inc(&pf_loading);
-               if (existing_vfs) {
-                       if (existing_vfs != total_vfs)
-                               mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
-                                        existing_vfs, total_vfs);
-               } else {
-                       mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
-                       err = pci_enable_sriov(pdev, total_vfs);
-               }
-               if (err) {
-                       mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
-                                err);
-                       atomic_dec(&pf_loading);
-                       goto disable_sriov;
-               } else {
-                       mlx4_warn(dev, "Running in master mode\n");
-                       dev_flags |= MLX4_FLAG_SRIOV |
-                               MLX4_FLAG_MASTER;
-                       dev_flags &= ~MLX4_FLAG_SLAVE;
-                       dev->num_vfs = total_vfs;
-               }
+       }
+
+       if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
+               mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
+               err = pci_enable_sriov(pdev, total_vfs);
+       }
+       if (err) {
+               mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
+                        err);
+               goto disable_sriov;
+       } else {
+               mlx4_warn(dev, "Running in master mode\n");
+               dev_flags |= MLX4_FLAG_SRIOV |
+                       MLX4_FLAG_MASTER;
+               dev_flags &= ~MLX4_FLAG_SLAVE;
+               dev->num_vfs = total_vfs;
         }
         return dev_flags;
  
  disable_sriov:
+       atomic_dec(&pf_loading);
         dev->num_vfs = 0;
         kfree(dev->dev_vfs);
         return dev_flags & ~MLX4_FLAG_MASTER;
@@ -2606,8 +2608,10 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
                 }
  
                 if (total_vfs) {
-                       existing_vfs = pci_num_vf(pdev);
                         dev->flags = MLX4_FLAG_MASTER;
+                       existing_vfs = pci_num_vf(pdev);
+                       if (existing_vfs)
+                               dev->flags |= MLX4_FLAG_SRIOV;
                         dev->num_vfs = total_vfs;
                 }
         }
@@ -2643,6 +2647,7 @@ slave_start:
         }
  
         if (mlx4_is_master(dev)) {
+               /* when we hit the goto slave_start below, dev_cap already initialized */
                 if (!dev_cap) {
                         dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
  
@@ -2849,6 +2854,7 @@ slave_start:
         if (mlx4_is_master(dev) && dev->num_vfs)
                 atomic_dec(&pf_loading);
  
+       kfree(dev_cap);
         return 0;
  
  err_port:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c

index ab684463780b4fbd193b79eea5299a743e7a36cd..da82991239a8cb0df3f74e1a14d36037ed2d9a18 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -157,6 +157,8 @@ static const char *eqe_type_str(u8 type)
                 return "MLX5_EVENT_TYPE_CMD";
         case MLX5_EVENT_TYPE_PAGE_REQUEST:
                 return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+       case MLX5_EVENT_TYPE_PAGE_FAULT:
+               return "MLX5_EVENT_TYPE_PAGE_FAULT";
         default:
                 return "Unrecognized event";
         }
@@ -279,6 +281,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
                         }
                         break;
  
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+               case MLX5_EVENT_TYPE_PAGE_FAULT:
+                       mlx5_eq_pagefault(dev, eqe);
+                       break;
+#endif
  
                 default:
                         mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
@@ -446,8 +453,12 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
  int mlx5_start_eqs(struct mlx5_core_dev *dev)
  {
         struct mlx5_eq_table *table = &dev->priv.eq_table;
+       u32 async_event_mask = MLX5_ASYNC_EVENT_MASK;
         int err;
  
+       if (dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)
+               async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);
+
         err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
                                  MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
                                  "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
@@ -459,7 +470,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
         mlx5_cmd_use_events(dev);
  
         err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
-                                MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK,
+                                MLX5_NUM_ASYNC_EQE, async_event_mask,
                                  "mlx5_async_eq", &dev->priv.uuari.uars[0]);
         if (err) {
                 mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c

index 087c4c797debdc90a7cec13aacf3b78f5cf04190..06f9036acd836196424b294ea47ce4dc624cbe09 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -69,6 +69,46 @@ int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, struct mlx5_caps *caps)
         return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR);
  }
  
+int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps)
+{
+       u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
+       int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+       void *out;
+       int err;
+
+       if (!(dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG))
+               return -ENOTSUPP;
+
+       memset(in, 0, sizeof(in));
+       out = kzalloc(out_sz, GFP_KERNEL);
+       if (!out)
+               return -ENOMEM;
+       MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+       MLX5_SET(query_hca_cap_in, in, op_mod, HCA_CAP_OPMOD_GET_ODP_CUR);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+       if (err)
+               goto out;
+
+       err = mlx5_cmd_status_to_err_v2(out);
+       if (err) {
+               mlx5_core_warn(dev, "query cur hca ODP caps failed, %d\n", err);
+               goto out;
+       }
+
+       memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct),
+              sizeof(*caps));
+
+       mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n",
+               be32_to_cpu(caps->per_transport_caps.rc_odp_caps),
+               be32_to_cpu(caps->per_transport_caps.uc_odp_caps),
+               be32_to_cpu(caps->per_transport_caps.ud_odp_caps));
+
+out:
+       kfree(out);
+       return err;
+}
+EXPORT_SYMBOL(mlx5_query_odp_caps);
+
  int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
  {
         struct mlx5_cmd_init_hca_mbox_in in;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c

index 5261a2b0da436b3a4359787c11dbc3686ee70689..575d853dbe05d1e9e8e3255761a673ff72dc5c30 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -88,6 +88,95 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
         mlx5_core_put_rsc(common);
  }
  
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+       struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault;
+       int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK;
+       struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn);
+       struct mlx5_core_qp *qp =
+               container_of(common, struct mlx5_core_qp, common);
+       struct mlx5_pagefault pfault;
+
+       if (!qp) {
+               mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n",
+                              qpn);
+               return;
+       }
+
+       pfault.event_subtype = eqe->sub_type;
+       pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) &
+               (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA);
+       pfault.bytes_committed = be32_to_cpu(
+               pf_eqe->bytes_committed);
+
+       mlx5_core_dbg(dev,
+                     "PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n",
+                     eqe->sub_type, pfault.flags);
+
+       switch (eqe->sub_type) {
+       case MLX5_PFAULT_SUBTYPE_RDMA:
+               /* RDMA based event */
+               pfault.rdma.r_key =
+                       be32_to_cpu(pf_eqe->rdma.r_key);
+               pfault.rdma.packet_size =
+                       be16_to_cpu(pf_eqe->rdma.packet_length);
+               pfault.rdma.rdma_op_len =
+                       be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+               pfault.rdma.rdma_va =
+                       be64_to_cpu(pf_eqe->rdma.rdma_va);
+               mlx5_core_dbg(dev,
+                             "PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n",
+                             qpn, pfault.rdma.r_key);
+               mlx5_core_dbg(dev,
+                             "PAGE_FAULT: rdma_op_len: 0x%08x,\n",
+                             pfault.rdma.rdma_op_len);
+               mlx5_core_dbg(dev,
+                             "PAGE_FAULT: rdma_va: 0x%016llx,\n",
+                             pfault.rdma.rdma_va);
+               mlx5_core_dbg(dev,
+                             "PAGE_FAULT: bytes_committed: 0x%06x\n",
+                             pfault.bytes_committed);
+               break;
+
+       case MLX5_PFAULT_SUBTYPE_WQE:
+               /* WQE based event */
+               pfault.wqe.wqe_index =
+                       be16_to_cpu(pf_eqe->wqe.wqe_index);
+               pfault.wqe.packet_size =
+                       be16_to_cpu(pf_eqe->wqe.packet_length);
+               mlx5_core_dbg(dev,
+                             "PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n",
+                             qpn, pfault.wqe.wqe_index);
+               mlx5_core_dbg(dev,
+                             "PAGE_FAULT: bytes_committed: 0x%06x\n",
+                             pfault.bytes_committed);
+               break;
+
+       default:
+               mlx5_core_warn(dev,
+                              "Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n",
+                              eqe->sub_type, qpn);
+               /* Unsupported page faults should still be resolved by the
+                * page fault handler
+                */
+       }
+
+       if (qp->pfault_handler) {
+               qp->pfault_handler(qp, &pfault);
+       } else {
+               mlx5_core_err(dev,
+                             "ODP event for QP %08x, without a fault handler in QP\n",
+                             qpn);
+               /* Page fault will remain unresolved. QP will hang until it is
+                * destroyed
+                */
+       }
+
+       mlx5_core_put_rsc(common);
+}
+#endif
+
  int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                         struct mlx5_core_qp *qp,
                         struct mlx5_create_qp_mbox_in *in,
@@ -322,3 +411,33 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
         return err;
  }
  EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
+                               u8 flags, int error)
+{
+       struct mlx5_page_fault_resume_mbox_in in;
+       struct mlx5_page_fault_resume_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_PAGE_FAULT_RESUME);
+       in.hdr.opmod = 0;
+       flags &= (MLX5_PAGE_FAULT_RESUME_REQUESTOR |
+                 MLX5_PAGE_FAULT_RESUME_WRITE     |
+                 MLX5_PAGE_FAULT_RESUME_RDMA);
+       flags |= (error ? MLX5_PAGE_FAULT_RESUME_ERROR : 0);
+       in.flags_qpn = cpu_to_be32((qpn & MLX5_QPN_MASK) |
+                                  (flags << MLX5_QPN_BITS));
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               err = mlx5_cmd_status_to_err(&out.hdr);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
+#endif
diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig

index 627926800ff38fe16467c099cb58517cf37456a1..9468e64e6007bd2ee200e650c0355398ea750d5c 100644 (file)
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig
@@ -39,7 +39,7 @@ config SMC91X
         select CRC32
         select MII
         depends on (ARM || M32R || SUPERH || MIPS || BLACKFIN || \
-                   MN10300 || COLDFIRE || ARM64 || XTENSA || NIOS2)
+                   MN10300 || COLDFIRE || ARM64 || XTENSA || NIOS2) && (!OF || GPIOLIB)
         ---help---
           This is a driver for SMC's 91x series of Ethernet chipsets,
           including the SMC91C94 and the SMC91C111. Say Y if you want it
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c

index 0e137751e76e45b4e904a9156c6a4a07aad58e09..056b358b4a72441d424a9b3cf0094919e383c600 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -309,16 +309,16 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
  
         if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) {
                 const char *rs;
-               dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
  
                 err = of_property_read_string(np, "st,tx-retime-src", &rs);
-               if (err < 0)
+               if (err < 0) {
                         dev_warn(dev, "Use internal clock source\n");
-
-               if (!strcasecmp(rs, "clk_125"))
+                       dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
+               } else if (!strcasecmp(rs, "clk_125")) {
                         dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
-               else if (!strcasecmp(rs, "txclk"))
+               } else if (!strcasecmp(rs, "txclk")) {
                         dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
+               }
  
                 dwmac->speed = SPEED_1000;
         }
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c

index 60f7ee5fafbe004a766fdd262d2f074e04070bcb..7df221788cd4dc7ae4d8c3ed2f53789296d00deb 100644 (file)
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -46,16 +46,18 @@ struct macvtap_queue {
         struct list_head next;
  };
  
-#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_VNET_LE | IFF_MULTI_QUEUE)
+#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
+
+#define MACVTAP_VNET_LE 0x80000000
  
  static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
  {
-       return __virtio16_to_cpu(q->flags & IFF_VNET_LE, val);
+       return __virtio16_to_cpu(q->flags & MACVTAP_VNET_LE, val);
  }
  
  static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val)
  {
-       return __cpu_to_virtio16(q->flags & IFF_VNET_LE, val);
+       return __cpu_to_virtio16(q->flags & MACVTAP_VNET_LE, val);
  }
  
  static struct proto macvtap_proto = {
@@ -999,7 +1001,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
         void __user *argp = (void __user *)arg;
         struct ifreq __user *ifr = argp;
         unsigned int __user *up = argp;
-       unsigned int u;
+       unsigned short u;
         int __user *sp = argp;
         int s;
         int ret;
@@ -1014,7 +1016,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
                 if ((u & ~MACVTAP_FEATURES) != (IFF_NO_PI | IFF_TAP))
                         ret = -EINVAL;
                 else
-                       q->flags = u;
+                       q->flags = (q->flags & ~MACVTAP_FEATURES) | u;
  
                 return ret;
  
@@ -1027,8 +1029,9 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
                 }
  
                 ret = 0;
+               u = q->flags;
                 if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) ||
-                   put_user(q->flags, &ifr->ifr_flags))
+                   put_user(u, &ifr->ifr_flags))
                         ret = -EFAULT;
                 macvtap_put_vlan(vlan);
                 rtnl_unlock();
@@ -1069,6 +1072,21 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
                 q->vnet_hdr_sz = s;
                 return 0;
  
+       case TUNGETVNETLE:
+               s = !!(q->flags & MACVTAP_VNET_LE);
+               if (put_user(s, sp))
+                       return -EFAULT;
+               return 0;
+
+       case TUNSETVNETLE:
+               if (get_user(s, sp))
+                       return -EFAULT;
+               if (s)
+                       q->flags |= MACVTAP_VNET_LE;
+               else
+                       q->flags &= ~MACVTAP_VNET_LE;
+               return 0;
+
         case TUNSETOFFLOAD:
                 /* let the user check for future flags */
                 if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig

index b4b0f804e84c937df436e68332b9f289ff16a75d..a3c251b79f38b5dcfa8d48d4c3fbceb1e94ef25f 100644 (file)
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -119,8 +119,8 @@ config MICREL_PHY
           Supports the KSZ9021, VSC8201, KS8001 PHYs.
  
  config FIXED_PHY
-       bool "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
-       depends on PHYLIB=y
+       tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
+       depends on PHYLIB
         ---help---
           Adds the platform "fixed" MDIO Bus to cover the boards that use
           PHYs that are not connected to the real MDIO bus.
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile

index eb3b18b5978b33ec7441a2ca2e0f4aa33906cdd5..501ea7699a2df399c6e1998e570eab6c7ae9a645 100644 (file)
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_BCM87XX_PHY)     += bcm87xx.o
  obj-$(CONFIG_ICPLUS_PHY)       += icplus.o
  obj-$(CONFIG_REALTEK_PHY)      += realtek.o
  obj-$(CONFIG_LSI_ET1011C_PHY)  += et1011c.o
-obj-$(CONFIG_FIXED_PHY)                += fixed.o
+obj-$(CONFIG_FIXED_PHY)                += fixed_phy.o
  obj-$(CONFIG_MDIO_BITBANG)     += mdio-bitbang.o
  obj-$(CONFIG_MDIO_GPIO)                += mdio-gpio.o
  obj-$(CONFIG_NATIONAL_PHY)     += national.o
diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed_phy.c

similarity index 100%

rename from drivers/net/phy/fixed.c

rename to drivers/net/phy/fixed_phy.c
diff --git a/drivers/net/tun.c b/drivers/net/tun.c

index a5cbf67517f09e08e728a44dd9917e2792d37d9d..8c8dc16839a79473e976d9de2132cc6df911e418 100644 (file)
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -110,9 +110,11 @@ do {                                                               \
   * overload it to mean fasync when stored there.
   */
  #define TUN_FASYNC     IFF_ATTACH_QUEUE
+/* High bits in flags field are unused. */
+#define TUN_VNET_LE     0x80000000
  
  #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
-                     IFF_VNET_LE | IFF_MULTI_QUEUE)
+                     IFF_MULTI_QUEUE)
  #define GOODCOPY_LEN 128
  
  #define FLT_EXACT_COUNT 8
@@ -208,12 +210,12 @@ struct tun_struct {
  
  static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
  {
-       return __virtio16_to_cpu(tun->flags & IFF_VNET_LE, val);
+       return __virtio16_to_cpu(tun->flags & TUN_VNET_LE, val);
  }
  
  static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
  {
-       return __cpu_to_virtio16(tun->flags & IFF_VNET_LE, val);
+       return __cpu_to_virtio16(tun->flags & TUN_VNET_LE, val);
  }
  
  static inline u32 tun_hashfn(u32 rxhash)
@@ -1843,6 +1845,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
         int sndbuf;
         int vnet_hdr_sz;
         unsigned int ifindex;
+       int le;
         int ret;
  
         if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) {
@@ -2042,6 +2045,23 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                 tun->vnet_hdr_sz = vnet_hdr_sz;
                 break;
  
+       case TUNGETVNETLE:
+               le = !!(tun->flags & TUN_VNET_LE);
+               if (put_user(le, (int __user *)argp))
+                       ret = -EFAULT;
+               break;
+
+       case TUNSETVNETLE:
+               if (get_user(le, (int __user *)argp)) {
+                       ret = -EFAULT;
+                       break;
+               }
+               if (le)
+                       tun->flags |= TUN_VNET_LE;
+               else
+                       tun->flags &= ~TUN_VNET_LE;
+               break;
+
         case TUNATTACHFILTER:
                 /* Can be set only for TAPs */
                 ret = -EINVAL;
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c

index a104d7ac3796491a62274f1bcf7399d15d046a22..eb8584a9c49a84d28a5ec592ce943a6243e3029f 100644 (file)
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c
@@ -316,7 +316,7 @@ static const u16 xmtfifo_sz[][NFIFO] = {
  static const char * const fifo_names[] = {
         "AC_BK", "AC_BE", "AC_VI", "AC_VO", "BCMC", "ATIM" };
  #else
-static const char fifo_names[6][0];
+static const char fifo_names[6][1];
  #endif
  
  #ifdef DEBUG
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c

index b6ec51923b203afb589e49738a13d87a6c37a836..50033aa7c7d5ab39761dbb0184cc2d1fdfcaa5fd 100644 (file)
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -381,18 +381,15 @@ static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
  
         res = pcmcia_read_config_byte(hw_priv->link, CISREG_COR, &old_cor);
         if (res != 0) {
-               printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 1 "
-                      "(%d)\n", res);
+               printk(KERN_DEBUG "%s failed 1 (%d)\n", __func__, res);
                 return;
         }
-       printk(KERN_DEBUG "prism2_pccard_genesis_sreset: original COR %02x\n",
-               old_cor);
+       printk(KERN_DEBUG "%s: original COR %02x\n", __func__, old_cor);
  
         res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
                                 old_cor | COR_SOFT_RESET);
         if (res != 0) {
-               printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 2 "
-                      "(%d)\n", res);
+               printk(KERN_DEBUG "%s failed 2 (%d)\n", __func__, res);
                 return;
         }
  
@@ -401,8 +398,7 @@ static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
         /* Setup Genesis mode */
         res = pcmcia_write_config_byte(hw_priv->link, CISREG_CCSR, hcr);
         if (res != 0) {
-               printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 3 "
-                      "(%d)\n", res);
+               printk(KERN_DEBUG "%s failed 3 (%d)\n", __func__, res);
                 return;
         }
         mdelay(10);
@@ -410,8 +406,7 @@ static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
         res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
                                 old_cor & ~COR_SOFT_RESET);
         if (res != 0) {
-               printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 4 "
-                      "(%d)\n", res);
+               printk(KERN_DEBUG "%s failed 4 (%d)\n", __func__, res);
                 return;
         }
  
diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c

index d2ec5160bbf05dd5be42d70a4780292dc8768d58..5c646d5f7bb83e9b3492c33401ae5de810ced7d5 100644 (file)
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
@@ -955,6 +955,7 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
         local_save_flags(flags);
         local_irq_enable();
  
+       rtlhal->fw_ready = false;
         rtlpriv->intf_ops->disable_aspm(hw);
         rtstatus = _rtl92ce_init_mac(hw);
         if (!rtstatus) {
@@ -971,6 +972,7 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
                 goto exit;
         }
  
+       rtlhal->fw_ready = true;
         rtlhal->last_hmeboxnum = 0;
         rtl92c_phy_mac_config(hw);
         /* because last function modify RCR, so we update
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c

index 873363acbacfac12df0d0c724dc962c74b2a2fc7..551321728ae06b7f3781795717a27e453f6a8f65 100644 (file)
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
@@ -1592,7 +1592,7 @@ void rtl92cu_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val)
         }
  }
  
-bool usb_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb)
+static bool usb_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb)
  {
    /* Currently nothing happens here.
     * Traffic stops after some seconds in WPA2 802.11n mode.
diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c

index 9be106109921b5ba94cbc44dc88b83e35f6e2759..ba30b0d250fd7bb75ff7f39edf16b6b225a07395 100644 (file)
--- a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
+++ b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
@@ -2078,8 +2078,7 @@ void rtl8821ae_dm_txpwr_track_set_pwr(struct ieee80211_hw *hw,
         if (rtldm->tx_rate != 0xFF)
                 tx_rate = rtl8821ae_hw_rate_to_mrate(hw, rtldm->tx_rate);
  
-       RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-                "===>rtl8812ae_dm_txpwr_track_set_pwr\n");
+       RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD, "===>%s\n", __func__);
  
         if (tx_rate != 0xFF) { /* Mimic Modify High Rate BBSwing Limit.*/
                 /*CCK*/
@@ -2128,7 +2127,7 @@ void rtl8821ae_dm_txpwr_track_set_pwr(struct ieee80211_hw *hw,
  
         if (method == BBSWING) {
                 RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-                        "===>rtl8812ae_dm_txpwr_track_set_pwr\n");
+                        "===>%s\n", __func__);
                 if (rf_path == RF90_PATH_A) {
                         final_swing_idx[RF90_PATH_A] =
                                 (rtldm->ofdm_index[RF90_PATH_A] >
@@ -2260,7 +2259,8 @@ void rtl8821ae_dm_txpower_tracking_callback_thermalmeter(
         rtldm->txpower_trackinginit = true;
  
         RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-                "===>rtl8812ae_dm_txpower_tracking_callback_thermalmeter,\n pDM_Odm->BbSwingIdxCckBase: %d,pDM_Odm->BbSwingIdxOfdmBase[A]:%d, pDM_Odm->DefaultOfdmIndex: %d\n",
+                "===>%s,\n pDM_Odm->BbSwingIdxCckBase: %d,pDM_Odm->BbSwingIdxOfdmBase[A]:%d, pDM_Odm->DefaultOfdmIndex: %d\n",
+                __func__,
                  rtldm->swing_idx_cck_base,
                  rtldm->swing_idx_ofdm_base[RF90_PATH_A],
                  rtldm->default_ofdm_index);
@@ -2539,8 +2539,7 @@ void rtl8821ae_dm_txpower_tracking_callback_thermalmeter(
                 }
         }
  
-       RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-                "<===rtl8812ae_dm_txpower_tracking_callback_thermalmeter\n");
+       RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD, "<===%s\n", __func__);
  }
  
  void rtl8821ae_dm_check_txpower_tracking_thermalmeter(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c

index 73a49b868035ee26b18924ab7d48c06bd967add7..07b94eda96041070ba01063683dcdcf7295e8de5 100644 (file)
--- a/drivers/net/wireless/zd1211rw/zd_chip.c
+++ b/drivers/net/wireless/zd1211rw/zd_chip.c
@@ -129,7 +129,7 @@ int zd_ioread32v_locked(struct zd_chip *chip, u32 *values, const zd_addr_t *addr
         r = zd_ioread16v_locked(chip, v16, a16, count16);
         if (r) {
                 dev_dbg_f(zd_chip_dev(chip),
-                         "error: zd_ioread16v_locked. Error number %d\n", r);
+                         "error: %s. Error number %d\n", __func__, r);
                 return r;
         }
  
@@ -256,8 +256,8 @@ int zd_iowrite32a_locked(struct zd_chip *chip,
                 if (r) {
                         zd_usb_iowrite16v_async_end(&chip->usb, 0);
                         dev_dbg_f(zd_chip_dev(chip),
-                               "error _zd_iowrite32v_locked."
-                               " Error number %d\n", r);
+                               "error _%s. Error number %d\n", __func__,
+                               r);
                         return r;
                 }
         }
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h

index 083ecc93fe5e7b0941aedba3ccb9fd27158cc03b..5f1fda44882b764d2adcdc4d02a782740db6cbc5 100644 (file)
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -230,6 +230,8 @@ struct xenvif {
          */
         bool disabled;
         unsigned long status;
+       unsigned long drain_timeout;
+       unsigned long stall_timeout;
  
         /* Queues */
         struct xenvif_queue *queues;
@@ -328,7 +330,7 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id);
  extern bool separate_tx_rx_irq;
  
  extern unsigned int rx_drain_timeout_msecs;
-extern unsigned int rx_drain_timeout_jiffies;
+extern unsigned int rx_stall_timeout_msecs;
  extern unsigned int xenvif_max_queues;
  
  #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c

index a6a32d337bbbcf0440dae386117c4dfb3b6b901c..9259a732e8a4a6d20740a1ad9170962040b8d1f8 100644 (file)
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -166,7 +166,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
                 goto drop;
  
         cb = XENVIF_RX_CB(skb);
-       cb->expires = jiffies + rx_drain_timeout_jiffies;
+       cb->expires = jiffies + vif->drain_timeout;
  
         xenvif_rx_queue_tail(queue, skb);
         xenvif_kick_thread(queue);
@@ -414,6 +414,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
         vif->ip_csum = 1;
         vif->dev = dev;
         vif->disabled = false;
+       vif->drain_timeout = msecs_to_jiffies(rx_drain_timeout_msecs);
+       vif->stall_timeout = msecs_to_jiffies(rx_stall_timeout_msecs);
  
         /* Start out with no queues. */
         vif->queues = NULL;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c

index 4a509f715fe8be2f8ff70f7dbf87dce93631caa9..908e65e9b8219783ae4b99e5d06e4701478c830d 100644 (file)
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -60,14 +60,12 @@ module_param(separate_tx_rx_irq, bool, 0644);
   */
  unsigned int rx_drain_timeout_msecs = 10000;
  module_param(rx_drain_timeout_msecs, uint, 0444);
-unsigned int rx_drain_timeout_jiffies;
  
  /* The length of time before the frontend is considered unresponsive
   * because it isn't providing Rx slots.
   */
-static unsigned int rx_stall_timeout_msecs = 60000;
+unsigned int rx_stall_timeout_msecs = 60000;
  module_param(rx_stall_timeout_msecs, uint, 0444);
-static unsigned int rx_stall_timeout_jiffies;
  
  unsigned int xenvif_max_queues;
  module_param_named(max_queues, xenvif_max_queues, uint, 0644);
@@ -2020,7 +2018,7 @@ static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
         return !queue->stalled
                 && prod - cons < XEN_NETBK_RX_SLOTS_MAX
                 && time_after(jiffies,
-                             queue->last_rx_time + rx_stall_timeout_jiffies);
+                             queue->last_rx_time + queue->vif->stall_timeout);
  }
  
  static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
@@ -2038,8 +2036,9 @@ static bool xenvif_have_rx_work(struct xenvif_queue *queue)
  {
         return (!skb_queue_empty(&queue->rx_queue)
                 && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
-               || xenvif_rx_queue_stalled(queue)
-               || xenvif_rx_queue_ready(queue)
+               || (queue->vif->stall_timeout &&
+                   (xenvif_rx_queue_stalled(queue)
+                    || xenvif_rx_queue_ready(queue)))
                 || kthread_should_stop()
                 || queue->vif->disabled;
  }
@@ -2092,6 +2091,9 @@ int xenvif_kthread_guest_rx(void *data)
         struct xenvif_queue *queue = data;
         struct xenvif *vif = queue->vif;
  
+       if (!vif->stall_timeout)
+               xenvif_queue_carrier_on(queue);
+
         for (;;) {
                 xenvif_wait_for_rx_work(queue);
  
@@ -2118,10 +2120,12 @@ int xenvif_kthread_guest_rx(void *data)
                  * while it's probably not responsive, drop the
                  * carrier so packets are dropped earlier.
                  */
-               if (xenvif_rx_queue_stalled(queue))
-                       xenvif_queue_carrier_off(queue);
-               else if (xenvif_rx_queue_ready(queue))
-                       xenvif_queue_carrier_on(queue);
+               if (vif->stall_timeout) {
+                       if (xenvif_rx_queue_stalled(queue))
+                               xenvif_queue_carrier_off(queue);
+                       else if (xenvif_rx_queue_ready(queue))
+                               xenvif_queue_carrier_on(queue);
+               }
  
                 /* Queued packets may have foreign pages from other
                  * domains.  These cannot be queued indefinitely as
@@ -2192,9 +2196,6 @@ static int __init netback_init(void)
         if (rc)
                 goto failed_init;
  
-       rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
-       rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
-
  #ifdef CONFIG_DEBUG_FS
         xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
         if (IS_ERR_OR_NULL(xen_netback_dbg_root))
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c

index d44cd19169bdd64a6044d1fc4fcf6a9a4a7ab357..efbaf2ae1999a97982a8e57274e3cf09f95b42ec 100644 (file)
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -887,9 +887,15 @@ static int read_xenbus_vif_flags(struct backend_info *be)
                 return -EOPNOTSUPP;
  
         if (xenbus_scanf(XBT_NIL, dev->otherend,
-                        "feature-rx-notify", "%d", &val) < 0 || val == 0) {
-               xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory");
-               return -EINVAL;
+                        "feature-rx-notify", "%d", &val) < 0)
+               val = 0;
+       if (!val) {
+               /* - Reduce drain timeout to poll more frequently for
+                *   Rx requests.
+                * - Disable Rx stall detection.
+                */
+               be->vif->drain_timeout = msecs_to_jiffies(30);
+               be->vif->stall_timeout = 0;
         }
  
         if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c

index 2f0a9ce9ff737656798fc146ba995d46fe669231..22bcb4e12e2a1318fc1802fb3c5ff6b2cb4acf92 100644 (file)
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -977,7 +977,6 @@ static int xennet_poll(struct napi_struct *napi, int budget)
         struct sk_buff_head rxq;
         struct sk_buff_head errq;
         struct sk_buff_head tmpq;
-       unsigned long flags;
         int err;
  
         spin_lock(&queue->rx_lock);
@@ -1050,15 +1049,11 @@ err:
         if (work_done < budget) {
                 int more_to_do = 0;
  
-               napi_gro_flush(napi, false);
-
-               local_irq_save(flags);
+               napi_complete(napi);
  
                 RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
-               if (!more_to_do)
-                       __napi_complete(napi);
-
-               local_irq_restore(flags);
+               if (more_to_do)
+                       napi_schedule(napi);
         }
  
         spin_unlock(&queue->rx_lock);
diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig

index 96498b7fc20e2ad78fedde1cc5867befa3dac4b6..2a054a99d433b0f1d16bc6aa701931e9860d1152 100644 (file)
--- a/drivers/staging/media/Kconfig
+++ b/drivers/staging/media/Kconfig
@@ -27,12 +27,18 @@ source "drivers/staging/media/davinci_vpfe/Kconfig"
  
  source "drivers/staging/media/dt3155v4l/Kconfig"
  
+source "drivers/staging/media/tlg2300/Kconfig"
+
  source "drivers/staging/media/mn88472/Kconfig"
  
  source "drivers/staging/media/mn88473/Kconfig"
  
  source "drivers/staging/media/omap4iss/Kconfig"
  
+source "drivers/staging/media/parport/Kconfig"
+
+source "drivers/staging/media/vino/Kconfig"
+
  # Keep LIRC at the end, as it has sub-menus
  source "drivers/staging/media/lirc/Kconfig"
  
diff --git a/drivers/staging/media/Makefile b/drivers/staging/media/Makefile

index 30fb352fc4a93f3eff64d5e3ba3eb4c94e397a65..412b284083985b590582e85e178d7f570104152c 100644 (file)
--- a/drivers/staging/media/Makefile
+++ b/drivers/staging/media/Makefile
@@ -6,4 +6,7 @@ obj-$(CONFIG_VIDEO_DM365_VPFE)  += davinci_vpfe/
  obj-$(CONFIG_VIDEO_OMAP4)      += omap4iss/
  obj-$(CONFIG_DVB_MN88472)       += mn88472/
  obj-$(CONFIG_DVB_MN88473)       += mn88473/
+obj-y                          += parport/
+obj-$(CONFIG_VIDEO_TLG2300)    += tlg2300/
+obj-y                           += vino/
  
diff --git a/drivers/media/parport/Kconfig b/drivers/staging/media/parport/Kconfig

similarity index 65%

rename from drivers/media/parport/Kconfig

rename to drivers/staging/media/parport/Kconfig

index 948c981d9f05ae00cd2ec347d601acd5469e54ca..15974efdba1d30b695ef941b42692804dd7ec939 100644 (file)
--- a/drivers/media/parport/Kconfig
+++ b/drivers/staging/media/parport/Kconfig
@@ -7,18 +7,22 @@ menuconfig MEDIA_PARPORT_SUPPORT
  
  if MEDIA_PARPORT_SUPPORT
  config VIDEO_BWQCAM
-       tristate "Quickcam BW Video For Linux"
+       tristate "Quickcam BW Video For Linux (Deprecated)"
         depends on PARPORT && VIDEO_V4L2
         select VIDEOBUF2_VMALLOC
         help
           Say Y have if you the black and white version of the QuickCam
           camera. See the next option for the color version.
  
+         This driver is deprecated and will be removed soon. If you have
+         hardware for this and you want to work on this driver, then contact
+         the linux-media mailinglist.
+
           To compile this driver as a module, choose M here: the
           module will be called bw-qcam.
  
  config VIDEO_CQCAM
-       tristate "QuickCam Colour Video For Linux"
+       tristate "QuickCam Colour Video For Linux (Deprecated)"
         depends on PARPORT && VIDEO_V4L2
         help
           This is the video4linux driver for the colour version of the
@@ -28,18 +32,26 @@ config VIDEO_CQCAM
           as a module (c-qcam).
           Read <file:Documentation/video4linux/CQcam.txt> for more information.
  
+         This driver is deprecated and will be removed soon. If you have
+         hardware for this and you want to work on this driver, then contact
+         the linux-media mailinglist.
+
  config VIDEO_PMS
-       tristate "Mediavision Pro Movie Studio Video For Linux"
+       tristate "Mediavision Pro Movie Studio Video For Linux (Deprecated)"
         depends on ISA && VIDEO_V4L2
         help
           Say Y if you have the ISA Mediavision Pro Movie Studio
           capture card.
  
+         This driver is deprecated and will be removed soon. If you have
+         hardware for this and you want to work on this driver, then contact
+         the linux-media mailinglist.
+
           To compile this driver as a module, choose M here: the
           module will be called pms.
  
  config VIDEO_W9966
-       tristate "W9966CF Webcam (FlyCam Supra and others) Video For Linux"
+       tristate "W9966CF Webcam (FlyCam Supra and others) Video For Linux (Deprecated)"
         depends on PARPORT_1284 && PARPORT && VIDEO_V4L2
         help
           Video4linux driver for Winbond's w9966 based Webcams.
@@ -50,4 +62,8 @@ config VIDEO_W9966
  
           Check out <file:Documentation/video4linux/w9966.txt> for more
           information.
+
+         This driver is deprecated and will be removed soon. If you have
+         hardware for this and you want to work on this driver, then contact
+         the linux-media mailinglist.
  endif
diff --git a/drivers/media/parport/Makefile b/drivers/staging/media/parport/Makefile

similarity index 100%

rename from drivers/media/parport/Makefile

rename to drivers/staging/media/parport/Makefile
diff --git a/drivers/media/parport/bw-qcam.c b/drivers/staging/media/parport/bw-qcam.c

similarity index 100%

rename from drivers/media/parport/bw-qcam.c

rename to drivers/staging/media/parport/bw-qcam.c
diff --git a/drivers/media/parport/c-qcam.c b/drivers/staging/media/parport/c-qcam.c

similarity index 100%

rename from drivers/media/parport/c-qcam.c

rename to drivers/staging/media/parport/c-qcam.c
diff --git a/drivers/media/parport/pms.c b/drivers/staging/media/parport/pms.c

similarity index 100%

rename from drivers/media/parport/pms.c

rename to drivers/staging/media/parport/pms.c
diff --git a/drivers/media/parport/w9966.c b/drivers/staging/media/parport/w9966.c

similarity index 100%

rename from drivers/media/parport/w9966.c

rename to drivers/staging/media/parport/w9966.c
diff --git a/drivers/media/usb/tlg2300/Kconfig b/drivers/staging/media/tlg2300/Kconfig

similarity index 63%

rename from drivers/media/usb/tlg2300/Kconfig

rename to drivers/staging/media/tlg2300/Kconfig

index 645d915267e6e4c4ff09a5375817c8ba15d493b3..81784c6f7b8872ed7a0b45aaea1ec0124633db17 100644 (file)
--- a/drivers/media/usb/tlg2300/Kconfig
+++ b/drivers/staging/media/tlg2300/Kconfig
@@ -1,5 +1,5 @@
  config VIDEO_TLG2300
-       tristate "Telegent TLG2300 USB video capture support"
+       tristate "Telegent TLG2300 USB video capture support (Deprecated)"
         depends on VIDEO_DEV && I2C && SND && DVB_CORE
         select VIDEO_TUNER
         select VIDEO_TVEEPROM
@@ -12,5 +12,9 @@ config VIDEO_TLG2300
           This is a video4linux driver for Telegent tlg2300 based TV cards.
           The driver supports V4L2, DVB-T and radio.
  
+         This driver is deprecated and will be removed soon. If you have
+         hardware for this and you want to work on this driver, then contact
+         the linux-media mailinglist.
+
           To compile this driver as a module, choose M here: the
           module will be called poseidon
diff --git a/drivers/media/usb/tlg2300/Makefile b/drivers/staging/media/tlg2300/Makefile

similarity index 100%

rename from drivers/media/usb/tlg2300/Makefile

rename to drivers/staging/media/tlg2300/Makefile
diff --git a/drivers/media/usb/tlg2300/pd-alsa.c b/drivers/staging/media/tlg2300/pd-alsa.c

similarity index 100%

rename from drivers/media/usb/tlg2300/pd-alsa.c

rename to drivers/staging/media/tlg2300/pd-alsa.c
diff --git a/drivers/media/usb/tlg2300/pd-common.h b/drivers/staging/media/tlg2300/pd-common.h

similarity index 100%

rename from drivers/media/usb/tlg2300/pd-common.h

rename to drivers/staging/media/tlg2300/pd-common.h
diff --git a/drivers/media/usb/tlg2300/pd-dvb.c b/drivers/staging/media/tlg2300/pd-dvb.c

similarity index 100%

rename from drivers/media/usb/tlg2300/pd-dvb.c

rename to drivers/staging/media/tlg2300/pd-dvb.c
diff --git a/drivers/media/usb/tlg2300/pd-main.c b/drivers/staging/media/tlg2300/pd-main.c

similarity index 100%

rename from drivers/media/usb/tlg2300/pd-main.c

rename to drivers/staging/media/tlg2300/pd-main.c
diff --git a/drivers/media/usb/tlg2300/pd-radio.c b/drivers/staging/media/tlg2300/pd-radio.c

similarity index 100%

rename from drivers/media/usb/tlg2300/pd-radio.c

rename to drivers/staging/media/tlg2300/pd-radio.c
diff --git a/drivers/media/usb/tlg2300/pd-video.c b/drivers/staging/media/tlg2300/pd-video.c

similarity index 100%

rename from drivers/media/usb/tlg2300/pd-video.c

rename to drivers/staging/media/tlg2300/pd-video.c
diff --git a/drivers/media/usb/tlg2300/vendorcmds.h b/drivers/staging/media/tlg2300/vendorcmds.h

similarity index 100%

rename from drivers/media/usb/tlg2300/vendorcmds.h

rename to drivers/staging/media/tlg2300/vendorcmds.h
diff --git a/drivers/staging/media/vino/Kconfig b/drivers/staging/media/vino/Kconfig

new file mode 100644 (file)

index 0000000..03700da
--- /dev/null
+++ b/drivers/staging/media/vino/Kconfig
@@ -0,0 +1,24 @@
+config VIDEO_VINO
+       tristate "SGI Vino Video For Linux (Deprecated)"
+       depends on I2C && SGI_IP22 && VIDEO_V4L2
+       select VIDEO_SAA7191 if MEDIA_SUBDRV_AUTOSELECT
+       help
+         Say Y here to build in support for the Vino video input system found
+         on SGI Indy machines.
+
+         This driver is deprecated and will be removed soon. If you have
+         hardware for this and you want to work on this driver, then contact
+         the linux-media mailinglist.
+
+config VIDEO_SAA7191
+       tristate "Philips SAA7191 video decoder (Deprecated)"
+       depends on VIDEO_V4L2 && I2C
+       ---help---
+         Support for the Philips SAA7191 video decoder.
+
+         This driver is deprecated and will be removed soon. If you have
+         hardware for this and you want to work on this driver, then contact
+         the linux-media mailinglist.
+
+         To compile this driver as a module, choose M here: the
+         module will be called saa7191.
diff --git a/drivers/staging/media/vino/Makefile b/drivers/staging/media/vino/Makefile

new file mode 100644 (file)

index 0000000..914c251
--- /dev/null
+++ b/drivers/staging/media/vino/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_VIDEO_VINO) += indycam.o
+obj-$(CONFIG_VIDEO_VINO) += vino.o
+obj-$(CONFIG_VIDEO_SAA7191) += saa7191.o
diff --git a/drivers/media/platform/indycam.c b/drivers/staging/media/vino/indycam.c

similarity index 100%

rename from drivers/media/platform/indycam.c

rename to drivers/staging/media/vino/indycam.c
diff --git a/drivers/media/platform/indycam.h b/drivers/staging/media/vino/indycam.h

similarity index 100%

rename from drivers/media/platform/indycam.h

rename to drivers/staging/media/vino/indycam.h
diff --git a/drivers/media/i2c/saa7191.c b/drivers/staging/media/vino/saa7191.c

similarity index 100%

rename from drivers/media/i2c/saa7191.c

rename to drivers/staging/media/vino/saa7191.c
diff --git a/drivers/media/i2c/saa7191.h b/drivers/staging/media/vino/saa7191.h

similarity index 100%

rename from drivers/media/i2c/saa7191.h

rename to drivers/staging/media/vino/saa7191.h
diff --git a/drivers/media/platform/vino.c b/drivers/staging/media/vino/vino.c

similarity index 100%

rename from drivers/media/platform/vino.c

rename to drivers/staging/media/vino/vino.c
diff --git a/drivers/media/platform/vino.h b/drivers/staging/media/vino/vino.h

similarity index 100%

rename from drivers/media/platform/vino.h

rename to drivers/staging/media/vino/vino.h
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c

index 32602c667b4aaf697c97b7f7e709d808328a5468..7892e6fddb66d86f0653140e3dce725b2469285d 100644 (file)
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
         return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
  }
  
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
-                          u32 parent, struct qstr *str)
+/* Generates key for catalog file/folders record. */
+int hfsplus_cat_build_key(struct super_block *sb,
+               hfsplus_btree_key *key, u32 parent, struct qstr *str)
  {
-       int len;
+       int len, err;
  
         key->cat.parent = cpu_to_be32(parent);
-       if (str) {
-               hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
-                                       str->name, str->len);
-               len = be16_to_cpu(key->cat.name.length);
-       } else {
-               key->cat.name.length = 0;
-               len = 0;
-       }
+       err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+                       str->name, str->len);
+       if (unlikely(err < 0))
+               return err;
+
+       len = be16_to_cpu(key->cat.name.length);
         key->key_len = cpu_to_be16(6 + 2 * len);
+       return 0;
+}
+
+/* Generates key for catalog thread record. */
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+                       hfsplus_btree_key *key, u32 parent)
+{
+       key->cat.parent = cpu_to_be32(parent);
+       key->cat.name.length = 0;
+       key->key_len = cpu_to_be16(6);
  }
  
  static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
                                    hfsplus_cat_entry *entry, int type,
                                    u32 parentid, struct qstr *str)
  {
+       int err;
+
         entry->type = cpu_to_be16(type);
         entry->thread.reserved = 0;
         entry->thread.parentID = cpu_to_be32(parentid);
-       hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+       err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
                                 str->name, str->len);
+       if (unlikely(err < 0))
+               return err;
+
         return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
  }
  
@@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
         int err;
         u16 type;
  
-       hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL);
+       hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
         err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
         if (err)
                 return err;
@@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
         if (err)
                 return err;
  
-       hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+       hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
         entry_size = hfsplus_fill_cat_thread(sb, &entry,
                 S_ISDIR(inode->i_mode) ?
                         HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
                 dir->i_ino, str);
+       if (unlikely(entry_size < 0)) {
+               err = entry_size;
+               goto err2;
+       }
+
         err = hfs_brec_find(&fd, hfs_find_rec_by_key);
         if (err != -ENOENT) {
                 if (!err)
@@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
         if (err)
                 goto err2;
  
-       hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+       err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+       if (unlikely(err))
+               goto err1;
+
         entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
         err = hfs_brec_find(&fd, hfs_find_rec_by_key);
         if (err != -ENOENT) {
@@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
         return 0;
  
  err1:
-       hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+       hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
         if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
                 hfs_brec_remove(&fd);
  err2:
@@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
         if (!str) {
                 int len;
  
-               hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+               hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
                 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
                 if (err)
                         goto out;
@@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
                         off + 2, len);
                 fd.search_key->key_len = cpu_to_be16(6 + len);
         } else
-               hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+               err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+               if (unlikely(err))
+                       goto out;
  
         err = hfs_brec_find(&fd, hfs_find_rec_by_key);
         if (err)
@@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
         if (err)
                 goto out;
  
-       hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+       hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
         err = hfs_brec_find(&fd, hfs_find_rec_by_key);
         if (err)
                 goto out;
@@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid,
         dst_fd = src_fd;
  
         /* find the old dir entry and read the data */
-       hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+       err = hfsplus_cat_build_key(sb, src_fd.search_key,
+                       src_dir->i_ino, src_name);
+       if (unlikely(err))
+               goto out;
+
         err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
         if (err)
                 goto out;
@@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid,
         type = be16_to_cpu(entry.type);
  
         /* create new dir entry with the data from the old entry */
-       hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
+       err = hfsplus_cat_build_key(sb, dst_fd.search_key,
+                       dst_dir->i_ino, dst_name);
+       if (unlikely(err))
+               goto out;
+
         err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
         if (err != -ENOENT) {
                 if (!err)
@@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid,
         dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
  
         /* finally remove the old entry */
-       hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+       err = hfsplus_cat_build_key(sb, src_fd.search_key,
+                       src_dir->i_ino, src_name);
+       if (unlikely(err))
+               goto out;
+
         err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
         if (err)
                 goto out;
@@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid,
         src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
  
         /* remove old thread entry */
-       hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
+       hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
         err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
         if (err)
                 goto out;
@@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid,
                 goto out;
  
         /* create new thread entry */
-       hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+       hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
         entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
                 dst_dir->i_ino, dst_name);
+       if (unlikely(entry_size < 0)) {
+               err = entry_size;
+               goto out;
+       }
+
         err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
         if (err != -ENOENT) {
                 if (!err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c

index 610a3260bef1d96b868c9de8920b29b41d11eddf..435bea231cc6e83031976a3974bbf24d81a0b8ab 100644 (file)
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
         err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
         if (err)
                 return ERR_PTR(err);
-       hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
+       err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
+                       &dentry->d_name);
+       if (unlikely(err < 0))
+               goto fail;
  again:
         err = hfs_brec_read(&fd, &entry, sizeof(entry));
         if (err) {
@@ -97,9 +100,11 @@ again:
                                         be32_to_cpu(entry.file.permissions.dev);
                                 str.len = sprintf(name, "iNode%d", linkid);
                                 str.name = name;
-                               hfsplus_cat_build_key(sb, fd.search_key,
+                               err = hfsplus_cat_build_key(sb, fd.search_key,
                                         HFSPLUS_SB(sb)->hidden_dir->i_ino,
                                         &str);
+                               if (unlikely(err < 0))
+                                       goto fail;
                                 goto again;
                         }
                 } else if (!dentry->d_fsdata)
@@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
                 err = -ENOMEM;
                 goto out;
         }
-       hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
+       hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
         err = hfs_brec_find(&fd, hfs_find_rec_by_key);
         if (err)
                 goto out;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h

index eb5e059f481ae59edb3d7c68857fa546e85c0c88..b0441d65fa54ec405520e62207d724f71cc61411 100644 (file)
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
                              const hfsplus_btree_key *k2);
  int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
                             const hfsplus_btree_key *k2);
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
+int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
                            u32 parent, struct qstr *str);
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+                                    hfsplus_btree_key *key, u32 parent);
  void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
  int hfsplus_find_cat(struct super_block *sb, u32 cnid,
                      struct hfs_find_data *fd);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c

index 4cf2024b87da3096ec557d71c5c0174d27753156..593af2fdcc2dafec8574caa59d970365aede5239 100644 (file)
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
         err = hfs_find_init(sbi->cat_tree, &fd);
         if (err)
                 goto out_put_root;
-       hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+       err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+       if (unlikely(err < 0))
+               goto out_put_root;
         if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
                 hfs_find_exit(&fd);
                 if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
diff --git a/fs/namespace.c b/fs/namespace.c

index 820af6a1dd6b15f9f1b479b17a2c03634c40f16c..cd1e9681a0cf5d0bdfd64d84ae90c34bd10f265b 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1370,6 +1370,8 @@ void umount_tree(struct mount *mnt, int how)
         }
         if (last) {
                 last->mnt_hash.next = unmounted.first;
+               if (unmounted.first)
+                       unmounted.first->pprev = &last->mnt_hash.next;
                 unmounted.first = tmp_list.first;
                 unmounted.first->pprev = &unmounted.first;
         }
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c

index a93bf98922565ab120d85f995f0cb565deb1e144..fcae9ef1a328750ca08cda37691bb51dad5293ef 100644 (file)
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5662,7 +5662,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
                              struct ocfs2_extent_tree *et,
                              u32 cpos, u32 phys_cpos, u32 len, int flags,
                              struct ocfs2_cached_dealloc_ctxt *dealloc,
-                            u64 refcount_loc)
+                            u64 refcount_loc, bool refcount_tree_locked)
  {
         int ret, credits = 0, extra_blocks = 0;
         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
@@ -5676,11 +5676,13 @@ int ocfs2_remove_btree_range(struct inode *inode,
                 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
                          OCFS2_HAS_REFCOUNT_FL));
  
-               ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
-                                              &ref_tree, NULL);
-               if (ret) {
-                       mlog_errno(ret);
-                       goto bail;
+               if (!refcount_tree_locked) {
+                       ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+                                                      &ref_tree, NULL);
+                       if (ret) {
+                               mlog_errno(ret);
+                               goto bail;
+                       }
                 }
  
                 ret = ocfs2_prepare_refcount_change_for_del(inode,
@@ -7021,6 +7023,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
         u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
         struct ocfs2_extent_tree et;
         struct ocfs2_cached_dealloc_ctxt dealloc;
+       struct ocfs2_refcount_tree *ref_tree = NULL;
  
         ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
         ocfs2_init_dealloc_ctxt(&dealloc);
@@ -7130,9 +7133,18 @@ start:
  
         phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
  
+       if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) {
+               status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+                               &ref_tree, NULL);
+               if (status) {
+                       mlog_errno(status);
+                       goto bail;
+               }
+       }
+
         status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
                                           phys_cpos, trunc_len, flags, &dealloc,
-                                         refcount_loc);
+                                         refcount_loc, true);
         if (status < 0) {
                 mlog_errno(status);
                 goto bail;
@@ -7147,6 +7159,8 @@ start:
         goto start;
  
  bail:
+       if (ref_tree)
+               ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
  
         ocfs2_schedule_truncate_log_flush(osb, 1);
  
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h

index ca381c5841273433d279cd6aca8137ec01d03bbd..fb09b97db162d802e6dbedc495d595a1eee23315 100644 (file)
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -142,7 +142,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
                              struct ocfs2_extent_tree *et,
                              u32 cpos, u32 phys_cpos, u32 len, int flags,
                              struct ocfs2_cached_dealloc_ctxt *dealloc,
-                            u64 refcount_loc);
+                            u64 refcount_loc, bool refcount_tree_locked);
  
  int ocfs2_num_free_extents(struct ocfs2_super *osb,
                            struct ocfs2_extent_tree *et);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c

index d9f222987f24fe438f987b23ecba01c19d9b2c3c..46d93e941f3d832c60ee50315adfd380b993b53a 100644 (file)
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -894,7 +894,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
         }
  }
  
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
  {
         int i;
  
@@ -915,7 +915,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
                 page_cache_release(wc->w_target_page);
         }
         ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+}
  
+static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+{
+       ocfs2_unlock_pages(wc);
         brelse(wc->w_di_bh);
         kfree(wc);
  }
@@ -2042,11 +2046,19 @@ out_write_size:
         ocfs2_update_inode_fsync_trans(handle, inode, 1);
         ocfs2_journal_dirty(handle, wc->w_di_bh);
  
+       /* unlock pages before dealloc since it needs acquiring j_trans_barrier
+        * lock, or it will cause a deadlock since journal commit threads holds
+        * this lock and will ask for the page lock when flushing the data.
+        * put it here to preserve the unlock order.
+        */
+       ocfs2_unlock_pages(wc);
+
         ocfs2_commit_trans(osb, handle);
  
         ocfs2_run_deallocs(osb, &wc->w_dealloc);
  
-       ocfs2_free_write_ctxt(wc);
+       brelse(wc->w_di_bh);
+       kfree(wc);
  
         return copied;
  }
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c

index 79d56dc981bc4b9fd6b927a4bce04d3f79ede736..319e786175afed345660719cfeb4c245741a2461 100644 (file)
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -4479,7 +4479,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
                 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
  
                 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
-                                              &dealloc, 0);
+                                              &dealloc, 0, false);
                 if (ret) {
                         mlog_errno(ret);
                         goto out;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c

index 3689b35920422304b47fa92f6c65ced6a9b316c4..a6944b25fd5b5ddba427ac66c8f50fb582cf1451 100644 (file)
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -695,14 +695,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
                         res->inflight_assert_workers);
  }
  
-static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
-               struct dlm_lock_resource *res)
-{
-       spin_lock(&res->spinlock);
-       __dlm_lockres_grab_inflight_worker(dlm, res);
-       spin_unlock(&res->spinlock);
-}
-
  static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
                 struct dlm_lock_resource *res)
  {
@@ -1646,6 +1638,7 @@ send_response:
                 }
                 mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
                              dlm->node_num, res->lockname.len, res->lockname.name);
+               spin_lock(&res->spinlock);
                 ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
                                                  DLM_ASSERT_MASTER_MLE_CLEANUP);
                 if (ret < 0) {
@@ -1653,7 +1646,8 @@ send_response:
                         response = DLM_MASTER_RESP_ERROR;
                         dlm_lockres_put(res);
                 } else
-                       dlm_lockres_grab_inflight_worker(dlm, res);
+                       __dlm_lockres_grab_inflight_worker(dlm, res);
+               spin_unlock(&res->spinlock);
         } else {
                 if (res)
                         dlm_lockres_put(res);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index 69fb9f75b0825f14527a8e2ec3f8e99c72f5372d..3950693dd0f6d9026587ac71e85037c555790f77 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1803,7 +1803,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
  
                 ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
                                                phys_cpos, trunc_len, flags,
-                                              &dealloc, refcount_loc);
+                                              &dealloc, refcount_loc, false);
                 if (ret < 0) {
                         mlog_errno(ret);
                         goto out;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c

index aa1eee06420f677b9f5a3594dd66b19567e46295..d3ebf2e618535bd2fa7daa0bfe948cb1a9bfc062 100644 (file)
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -12,6 +12,9 @@
  #include <linux/vmstat.h>
  #include <linux/atomic.h>
  #include <linux/vmalloc.h>
+#ifdef CONFIG_CMA
+#include <linux/cma.h>
+#endif
  #include <asm/page.h>
  #include <asm/pgtable.h>
  #include "internal.h"
@@ -137,6 +140,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
  #endif
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
                 "AnonHugePages:  %8lu kB\n"
+#endif
+#ifdef CONFIG_CMA
+               "CmaTotal:       %8lu kB\n"
+               "CmaFree:        %8lu kB\n"
  #endif
                 ,
                 K(i.totalram),
@@ -187,11 +194,15 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                 vmi.used >> 10,
                 vmi.largest_chunk >> 10
  #ifdef CONFIG_MEMORY_FAILURE
-               ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
+               , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
  #endif
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
+               , K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
                    HPAGE_PMD_NR)
+#endif
+#ifdef CONFIG_CMA
+               , K(totalcma_pages)
+               , K(global_page_state(NR_FREE_CMA_PAGES))
  #endif
                 );
  
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h

index ad9db6045b2fcc71dffbdd316900fabe771b2804..b3f45a578344a90eee8d1c76f724636de24447b1 100644 (file)
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -60,7 +60,8 @@ struct arch_timer_cpu {
  
  #ifdef CONFIG_KVM_ARM_TIMER
  int kvm_timer_hyp_init(void);
-int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_enable(struct kvm *kvm);
+void kvm_timer_init(struct kvm *kvm);
  void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
                           const struct kvm_irq_level *irq);
  void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
@@ -77,11 +78,8 @@ static inline int kvm_timer_hyp_init(void)
         return 0;
  };
  
-static inline int kvm_timer_init(struct kvm *kvm)
-{
-       return 0;
-}
-
+static inline void kvm_timer_enable(struct kvm *kvm) {}
+static inline void kvm_timer_init(struct kvm *kvm) {}
  static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
                                         const struct kvm_irq_level *irq) {}
  static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h

index 206dcc3b3f7aa60bace5ba9c6d039d8b4b2c6ace..ac4888dc86bcb51d802cb08d1795c60d9c4180d3 100644 (file)
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -274,7 +274,7 @@ struct kvm_exit_mmio;
  #ifdef CONFIG_KVM_ARM_VGIC
  int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
  int kvm_vgic_hyp_init(void);
-int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_map_resources(struct kvm *kvm);
  int kvm_vgic_create(struct kvm *kvm);
  void kvm_vgic_destroy(struct kvm *kvm);
  void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
@@ -287,7 +287,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
                       struct kvm_exit_mmio *mmio);
  
  #define irqchip_in_kernel(k)   (!!((k)->arch.vgic.in_kernel))
-#define vgic_initialized(k)    ((k)->arch.vgic.ready)
+#define vgic_initialized(k)    (!!((k)->arch.vgic.nr_cpus))
+#define vgic_ready(k)          ((k)->arch.vgic.ready)
  
  int vgic_v2_probe(struct device_node *vgic_node,
                   const struct vgic_ops **ops,
@@ -321,7 +322,7 @@ static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr,
         return -ENXIO;
  }
  
-static inline int kvm_vgic_init(struct kvm *kvm)
+static inline int kvm_vgic_map_resources(struct kvm *kvm)
  {
         return 0;
  }
@@ -373,6 +374,11 @@ static inline bool vgic_initialized(struct kvm *kvm)
  {
         return true;
  }
+
+static inline bool vgic_ready(struct kvm *kvm)
+{
+       return true;
+}
  #endif
  
  #endif
diff --git a/include/linux/cma.h b/include/linux/cma.h

index a93438beb33cf12529c44a8cb905e35df125edae..9384ba66e975969fc84e828590293a0930e5c3bd 100644 (file)
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -15,6 +15,7 @@
  
  struct cma;
  
+extern unsigned long totalcma_pages;
  extern phys_addr_t cma_get_base(struct cma *cma);
  extern unsigned long cma_get_size(struct cma *cma);
  
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index a6059bdf7b03baa4955c069637f686b3d709d819..26f106022c8869dc5609f06f10579fb4ca5c4596 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -43,6 +43,7 @@
   * include/linux/kvm_h.
   */
  #define KVM_MEMSLOT_INVALID    (1UL << 16)
+#define KVM_MEMSLOT_INCOHERENT (1UL << 17)
  
  /* Two fragments for cross MMIO pages. */
  #define KVM_MAX_MMIO_FRAGMENTS 2
@@ -353,6 +354,8 @@ struct kvm_memslots {
         struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
         /* The mapping table from slot id to the index in memslots[]. */
         short id_to_index[KVM_MEM_SLOTS_NUM];
+       atomic_t lru_slot;
+       int used_slots;
  };
  
  struct kvm {
@@ -395,7 +398,6 @@ struct kvm {
          * Update side is protected by irq_lock.
          */
         struct kvm_irq_routing_table __rcu *irq_routing;
-       struct hlist_head mask_notifier_list;
  #endif
  #ifdef CONFIG_HAVE_KVM_IRQFD
         struct hlist_head irq_ack_notifier_list;
@@ -447,6 +449,14 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
  int __must_check vcpu_load(struct kvm_vcpu *vcpu);
  void vcpu_put(struct kvm_vcpu *vcpu);
  
+#ifdef __KVM_HAVE_IOAPIC
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+#else
+static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+}
+#endif
+
  #ifdef CONFIG_HAVE_KVM_IRQFD
  int kvm_irqfd_init(void);
  void kvm_irqfd_exit(void);
@@ -711,44 +721,6 @@ struct kvm_irq_ack_notifier {
         void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
  };
  
-struct kvm_assigned_dev_kernel {
-       struct kvm_irq_ack_notifier ack_notifier;
-       struct list_head list;
-       int assigned_dev_id;
-       int host_segnr;
-       int host_busnr;
-       int host_devfn;
-       unsigned int entries_nr;
-       int host_irq;
-       bool host_irq_disabled;
-       bool pci_2_3;
-       struct msix_entry *host_msix_entries;
-       int guest_irq;
-       struct msix_entry *guest_msix_entries;
-       unsigned long irq_requested_type;
-       int irq_source_id;
-       int flags;
-       struct pci_dev *dev;
-       struct kvm *kvm;
-       spinlock_t intx_lock;
-       spinlock_t intx_mask_lock;
-       char irq_name[32];
-       struct pci_saved_state *pci_saved_state;
-};
-
-struct kvm_irq_mask_notifier {
-       void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
-       int irq;
-       struct hlist_node link;
-};
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-                                   struct kvm_irq_mask_notifier *kimn);
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
-                                     struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
-                            bool mask);
-
  int kvm_irq_map_gsi(struct kvm *kvm,
                     struct kvm_kernel_irq_routing_entry *entries, int gsi);
  int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
@@ -770,12 +742,6 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
  int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
  void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-int kvm_assign_device(struct kvm *kvm,
-                     struct kvm_assigned_dev_kernel *assigned_dev);
-int kvm_deassign_device(struct kvm *kvm,
-                       struct kvm_assigned_dev_kernel *assigned_dev);
  #else
  static inline int kvm_iommu_map_pages(struct kvm *kvm,
                                       struct kvm_memory_slot *slot)
@@ -787,11 +753,6 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
                                          struct kvm_memory_slot *slot)
  {
  }
-
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-       return 0;
-}
  #endif
  
  static inline void kvm_guest_enter(void)
@@ -832,12 +793,28 @@ static inline void kvm_guest_exit(void)
  static inline struct kvm_memory_slot *
  search_memslots(struct kvm_memslots *slots, gfn_t gfn)
  {
-       struct kvm_memory_slot *memslot;
+       int start = 0, end = slots->used_slots;
+       int slot = atomic_read(&slots->lru_slot);
+       struct kvm_memory_slot *memslots = slots->memslots;
+
+       if (gfn >= memslots[slot].base_gfn &&
+           gfn < memslots[slot].base_gfn + memslots[slot].npages)
+               return &memslots[slot];
  
-       kvm_for_each_memslot(memslot, slots)
-               if (gfn >= memslot->base_gfn &&
-                     gfn < memslot->base_gfn + memslot->npages)
-                       return memslot;
+       while (start < end) {
+               slot = start + (end - start) / 2;
+
+               if (gfn >= memslots[slot].base_gfn)
+                       end = slot;
+               else
+                       start = slot + 1;
+       }
+
+       if (gfn >= memslots[start].base_gfn &&
+           gfn < memslots[start].base_gfn + memslots[start].npages) {
+               atomic_set(&slots->lru_slot, start);
+               return &memslots[start];
+       }
  
         return NULL;
  }
@@ -1011,25 +988,6 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
  
  #endif
  
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-                                 unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-
-#else
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-                                               unsigned long arg)
-{
-       return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-
-#endif
-
  static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
  {
         set_bit(req, &vcpu->requests);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h

index b606bb689a3e0329d937f54e7a0e1e0a783f7e5f..931da7e917cf71deceae10a11507f89486fe9b6d 100644 (file)
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -54,33 +54,6 @@ typedef u64            hfn_t;
  
  typedef hfn_t pfn_t;
  
-union kvm_ioapic_redirect_entry {
-       u64 bits;
-       struct {
-               u8 vector;
-               u8 delivery_mode:3;
-               u8 dest_mode:1;
-               u8 delivery_status:1;
-               u8 polarity:1;
-               u8 remote_irr:1;
-               u8 trig_mode:1;
-               u8 mask:1;
-               u8 reserve:7;
-               u8 reserved[4];
-               u8 dest_id;
-       } fields;
-};
-
-struct kvm_lapic_irq {
-       u32 vector;
-       u32 delivery_mode;
-       u32 dest_mode;
-       u32 level;
-       u32 trig_mode;
-       u32 shorthand;
-       u32 dest_id;
-};
-
  struct gfn_to_hva_cache {
         u64 generation;
         gpa_t gpa;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h

index ea4f1c46f761127bf01c87b5d8933bd09819d1b7..4e5bd813bb9a7d9142866857ff3c4be73f956e6e 100644 (file)
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -119,6 +119,15 @@ enum {
         MLX5_MAX_LOG_PKEY_TABLE  = 5,
  };
  
+enum {
+       MLX5_MKEY_INBOX_PG_ACCESS = 1 << 31
+};
+
+enum {
+       MLX5_PFAULT_SUBTYPE_WQE = 0,
+       MLX5_PFAULT_SUBTYPE_RDMA = 1,
+};
+
  enum {
         MLX5_PERM_LOCAL_READ    = 1 << 2,
         MLX5_PERM_LOCAL_WRITE   = 1 << 3,
@@ -180,6 +189,19 @@ enum {
         MLX5_MKEY_MASK_FREE             = 1ull << 29,
  };
  
+enum {
+       MLX5_UMR_TRANSLATION_OFFSET_EN  = (1 << 4),
+
+       MLX5_UMR_CHECK_NOT_FREE         = (1 << 5),
+       MLX5_UMR_CHECK_FREE             = (2 << 5),
+
+       MLX5_UMR_INLINE                 = (1 << 7),
+};
+
+#define MLX5_UMR_MTT_ALIGNMENT 0x40
+#define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
+#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
+
  enum mlx5_event {
         MLX5_EVENT_TYPE_COMP               = 0x0,
  
@@ -206,6 +228,8 @@ enum mlx5_event {
  
         MLX5_EVENT_TYPE_CMD                = 0x0a,
         MLX5_EVENT_TYPE_PAGE_REQUEST       = 0xb,
+
+       MLX5_EVENT_TYPE_PAGE_FAULT         = 0xc,
  };
  
  enum {
@@ -225,6 +249,7 @@ enum {
         MLX5_DEV_CAP_FLAG_APM           = 1LL << 17,
         MLX5_DEV_CAP_FLAG_ATOMIC        = 1LL << 18,
         MLX5_DEV_CAP_FLAG_BLOCK_MCAST   = 1LL << 23,
+       MLX5_DEV_CAP_FLAG_ON_DMND_PG    = 1LL << 24,
         MLX5_DEV_CAP_FLAG_CQ_MODER      = 1LL << 29,
         MLX5_DEV_CAP_FLAG_RESIZE_CQ     = 1LL << 30,
         MLX5_DEV_CAP_FLAG_DCT           = 1LL << 37,
@@ -290,6 +315,8 @@ enum {
  enum {
         HCA_CAP_OPMOD_GET_MAX   = 0,
         HCA_CAP_OPMOD_GET_CUR   = 1,
+       HCA_CAP_OPMOD_GET_ODP_MAX = 4,
+       HCA_CAP_OPMOD_GET_ODP_CUR = 5
  };
  
  struct mlx5_inbox_hdr {
@@ -319,6 +346,23 @@ struct mlx5_cmd_query_adapter_mbox_out {
         u8                      vsd_psid[16];
  };
  
+enum mlx5_odp_transport_cap_bits {
+       MLX5_ODP_SUPPORT_SEND    = 1 << 31,
+       MLX5_ODP_SUPPORT_RECV    = 1 << 30,
+       MLX5_ODP_SUPPORT_WRITE   = 1 << 29,
+       MLX5_ODP_SUPPORT_READ    = 1 << 28,
+};
+
+struct mlx5_odp_caps {
+       char reserved[0x10];
+       struct {
+               __be32                  rc_odp_caps;
+               __be32                  uc_odp_caps;
+               __be32                  ud_odp_caps;
+       } per_transport_caps;
+       char reserved2[0xe4];
+};
+
  struct mlx5_cmd_init_hca_mbox_in {
         struct mlx5_inbox_hdr   hdr;
         u8                      rsvd0[2];
@@ -439,6 +483,27 @@ struct mlx5_eqe_page_req {
         __be32          rsvd1[5];
  };
  
+struct mlx5_eqe_page_fault {
+       __be32 bytes_committed;
+       union {
+               struct {
+                       u16     reserved1;
+                       __be16  wqe_index;
+                       u16     reserved2;
+                       __be16  packet_length;
+                       u8      reserved3[12];
+               } __packed wqe;
+               struct {
+                       __be32  r_key;
+                       u16     reserved1;
+                       __be16  packet_length;
+                       __be32  rdma_op_len;
+                       __be64  rdma_va;
+               } __packed rdma;
+       } __packed;
+       __be32 flags_qpn;
+} __packed;
+
  union ev_data {
         __be32                          raw[7];
         struct mlx5_eqe_cmd             cmd;
@@ -450,6 +515,7 @@ union ev_data {
         struct mlx5_eqe_congestion      cong;
         struct mlx5_eqe_stall_vl        stall_vl;
         struct mlx5_eqe_page_req        req_pages;
+       struct mlx5_eqe_page_fault      page_fault;
  } __packed;
  
  struct mlx5_eqe {
@@ -776,6 +842,10 @@ struct mlx5_query_eq_mbox_out {
         struct mlx5_eq_context  ctx;
  };
  
+enum {
+       MLX5_MKEY_STATUS_FREE = 1 << 6,
+};
+
  struct mlx5_mkey_seg {
         /* This is a two bit field occupying bits 31-30.
          * bit 31 is always 0,
@@ -812,7 +882,7 @@ struct mlx5_query_special_ctxs_mbox_out {
  struct mlx5_create_mkey_mbox_in {
         struct mlx5_inbox_hdr   hdr;
         __be32                  input_mkey_index;
-       u8                      rsvd0[4];
+       __be32                  flags;
         struct mlx5_mkey_seg    seg;
         u8                      rsvd1[16];
         __be32                  xlat_oct_act_size;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h

index b1bf41556b3245fc9970174f5be872202a58eec5..166d9315fe4b565bdd8487da66559f6cbfe19462 100644 (file)
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -113,6 +113,13 @@ enum {
         MLX5_REG_HOST_ENDIANNESS = 0x7004,
  };
  
+enum mlx5_page_fault_resume_flags {
+       MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
+       MLX5_PAGE_FAULT_RESUME_WRITE     = 1 << 1,
+       MLX5_PAGE_FAULT_RESUME_RDMA      = 1 << 2,
+       MLX5_PAGE_FAULT_RESUME_ERROR     = 1 << 7,
+};
+
  enum dbg_rsc_type {
         MLX5_DBG_RSC_QP,
         MLX5_DBG_RSC_EQ,
@@ -467,7 +474,7 @@ struct mlx5_priv {
         struct workqueue_struct *pg_wq;
         struct rb_root          page_root;
         int                     fw_pages;
-       int                     reg_pages;
+       atomic_t                reg_pages;
         struct list_head        free_list;
  
         struct mlx5_core_health health;
@@ -703,6 +710,9 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
  void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
  void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
  void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+#endif
  void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
  struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
  void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -740,6 +750,8 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
                          int npsvs, u32 *sig_index);
  int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
  void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
+int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
+                       struct mlx5_odp_caps *odp_caps);
  
  static inline u32 mlx5_mkey_to_idx(u32 mkey)
  {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h

index 3fa075daeb1d12bbf7eb06394232018f993b32e4..61f7a342d1bfd1cc4f102d1b75be3d8a977e28d0 100644 (file)
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -50,6 +50,9 @@
  #define MLX5_BSF_APPTAG_ESCAPE 0x1
  #define MLX5_BSF_APPREF_ESCAPE 0x2
  
+#define MLX5_QPN_BITS          24
+#define MLX5_QPN_MASK          ((1 << MLX5_QPN_BITS) - 1)
+
  enum mlx5_qp_optpar {
         MLX5_QP_OPTPAR_ALT_ADDR_PATH            = 1 << 0,
         MLX5_QP_OPTPAR_RRE                      = 1 << 1,
@@ -189,6 +192,14 @@ struct mlx5_wqe_ctrl_seg {
         __be32                  imm;
  };
  
+#define MLX5_WQE_CTRL_DS_MASK 0x3f
+#define MLX5_WQE_CTRL_QPN_MASK 0xffffff00
+#define MLX5_WQE_CTRL_QPN_SHIFT 8
+#define MLX5_WQE_DS_UNITS 16
+#define MLX5_WQE_CTRL_OPCODE_MASK 0xff
+#define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
+#define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
+
  struct mlx5_wqe_xrc_seg {
         __be32                  xrc_srqn;
         u8                      rsvd[12];
@@ -292,6 +303,8 @@ struct mlx5_wqe_signature_seg {
         u8      rsvd1[11];
  };
  
+#define MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK 0x3ff
+
  struct mlx5_wqe_inline_seg {
         __be32  byte_count;
  };
@@ -360,9 +373,46 @@ struct mlx5_stride_block_ctrl_seg {
         __be16          num_entries;
  };
  
+enum mlx5_pagefault_flags {
+       MLX5_PFAULT_REQUESTOR = 1 << 0,
+       MLX5_PFAULT_WRITE     = 1 << 1,
+       MLX5_PFAULT_RDMA      = 1 << 2,
+};
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+       u32                     bytes_committed;
+       u8                      event_subtype;
+       enum mlx5_pagefault_flags flags;
+       union {
+               /* Initiator or send message responder pagefault details. */
+               struct {
+                       /* Received packet size, only valid for responders. */
+                       u32     packet_size;
+                       /*
+                        * WQE index. Refers to either the send queue or
+                        * receive queue, according to event_subtype.
+                        */
+                       u16     wqe_index;
+               } wqe;
+               /* RDMA responder pagefault details */
+               struct {
+                       u32     r_key;
+                       /*
+                        * Received packet size, minimal size page fault
+                        * resolution required for forward progress.
+                        */
+                       u32     packet_size;
+                       u32     rdma_op_len;
+                       u64     rdma_va;
+               } rdma;
+       };
+};
+
  struct mlx5_core_qp {
         struct mlx5_core_rsc_common     common; /* must be first */
         void (*event)           (struct mlx5_core_qp *, int);
+       void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
         int                     qpn;
         struct mlx5_rsc_debug   *dbg;
         int                     pid;
@@ -530,6 +580,17 @@ static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u
         return radix_tree_lookup(&dev->priv.mr_table.tree, key);
  }
  
+struct mlx5_page_fault_resume_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  flags_qpn;
+       u8                      reserved[4];
+};
+
+struct mlx5_page_fault_resume_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
  int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                         struct mlx5_core_qp *qp,
                         struct mlx5_create_qp_mbox_in *in,
@@ -549,6 +610,10 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
  void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
  int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
  void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
+                               u8 context, int error);
+#endif
  
  static inline const char *mlx5_qp_type_str(int type)
  {
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h

index f2ca1b459377803094005029cc765ef1f3a9abd6..7e75bfe37cc7cd72d5db63cd094b5110984b066b 100644 (file)
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -11,7 +11,7 @@ struct fixed_phy_status {
  
  struct device_node;
  
-#ifdef CONFIG_FIXED_PHY
+#if IS_ENABLED(CONFIG_FIXED_PHY)
  extern int fixed_phy_add(unsigned int irq, int phy_id,
                          struct fixed_phy_status *status);
  extern struct phy_device *fixed_phy_register(unsigned int irq,
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h

index a2bf41e0bde969c0ea0090a6af6a11087f51fa2f..2d83cfd7e6ce20da3b1606cfe4335fada792fb51 100644 (file)
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -38,11 +38,12 @@
  #include <linux/workqueue.h>
  
  struct ib_ucontext;
+struct ib_umem_odp;
  
  struct ib_umem {
         struct ib_ucontext     *context;
         size_t                  length;
-       int                     offset;
+       unsigned long           address;
         int                     page_size;
         int                     writable;
         int                     hugetlb;
@@ -50,17 +51,43 @@ struct ib_umem {
         struct pid             *pid;
         struct mm_struct       *mm;
         unsigned long           diff;
+       struct ib_umem_odp     *odp_data;
         struct sg_table sg_head;
         int             nmap;
         int             npages;
  };
  
+/* Returns the offset of the umem start relative to the first page. */
+static inline int ib_umem_offset(struct ib_umem *umem)
+{
+       return umem->address & ((unsigned long)umem->page_size - 1);
+}
+
+/* Returns the first page of an ODP umem. */
+static inline unsigned long ib_umem_start(struct ib_umem *umem)
+{
+       return umem->address - ib_umem_offset(umem);
+}
+
+/* Returns the address of the page after the last one of an ODP umem. */
+static inline unsigned long ib_umem_end(struct ib_umem *umem)
+{
+       return PAGE_ALIGN(umem->address + umem->length);
+}
+
+static inline size_t ib_umem_num_pages(struct ib_umem *umem)
+{
+       return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
+}
+
  #ifdef CONFIG_INFINIBAND_USER_MEM
  
  struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
                             size_t size, int access, int dmasync);
  void ib_umem_release(struct ib_umem *umem);
  int ib_umem_page_count(struct ib_umem *umem);
+int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+                     size_t length);
  
  #else /* CONFIG_INFINIBAND_USER_MEM */
  
@@ -73,7 +100,10 @@ static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
  }
  static inline void ib_umem_release(struct ib_umem *umem) { }
  static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
-
+static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+                                   size_t length) {
+       return -EINVAL;
+}
  #endif /* CONFIG_INFINIBAND_USER_MEM */
  
  #endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h

new file mode 100644 (file)

index 0000000..3da0b16
--- /dev/null
+++ b/include/rdma/ib_umem_odp.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_UMEM_ODP_H
+#define IB_UMEM_ODP_H
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <linux/interval_tree.h>
+
+struct umem_odp_node {
+       u64 __subtree_last;
+       struct rb_node rb;
+};
+
+struct ib_umem_odp {
+       /*
+        * An array of the pages included in the on-demand paging umem.
+        * Indices of pages that are currently not mapped into the device will
+        * contain NULL.
+        */
+       struct page             **page_list;
+       /*
+        * An array of the same size as page_list, with DMA addresses mapped
+        * for pages the pages in page_list. The lower two bits designate
+        * access permissions. See ODP_READ_ALLOWED_BIT and
+        * ODP_WRITE_ALLOWED_BIT.
+        */
+       dma_addr_t              *dma_list;
+       /*
+        * The umem_mutex protects the page_list and dma_list fields of an ODP
+        * umem, allowing only a single thread to map/unmap pages. The mutex
+        * also protects access to the mmu notifier counters.
+        */
+       struct mutex            umem_mutex;
+       void                    *private; /* for the HW driver to use. */
+
+       /* When false, use the notifier counter in the ucontext struct. */
+       bool mn_counters_active;
+       int notifiers_seq;
+       int notifiers_count;
+
+       /* A linked list of umems that don't have private mmu notifier
+        * counters yet. */
+       struct list_head no_private_counters;
+       struct ib_umem          *umem;
+
+       /* Tree tracking */
+       struct umem_odp_node    interval_tree;
+
+       struct completion       notifier_completion;
+       int                     dying;
+};
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
+
+void ib_umem_odp_release(struct ib_umem *umem);
+
+/*
+ * The lower 2 bits of the DMA address signal the R/W permissions for
+ * the entry. To upgrade the permissions, provide the appropriate
+ * bitmask to the map_dma_pages function.
+ *
+ * Be aware that upgrading a mapped address might result in change of
+ * the DMA address for the page.
+ */
+#define ODP_READ_ALLOWED_BIT  (1<<0ULL)
+#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
+
+#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
+
+int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
+                             u64 access_mask, unsigned long current_seq);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+                                u64 bound);
+
+void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root);
+void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root);
+typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+                             void *cookie);
+/*
+ * Call the callback on each ib_umem in the range. Returns the logical or of
+ * the return values of the functions called.
+ */
+int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
+                                 umem_call_back cb, void *cookie);
+
+struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
+                                            u64 start, u64 last);
+struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
+                                           u64 start, u64 last);
+
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+                                            unsigned long mmu_seq)
+{
+       /*
+        * This code is strongly based on the KVM code from
+        * mmu_notifier_retry. Should be called with
+        * the relevant locks taken (item->odp_data->umem_mutex
+        * and the ucontext umem_mutex semaphore locked for read).
+        */
+
+       /* Do not allow page faults while the new ib_umem hasn't seen a state
+        * with zero notifiers yet, and doesn't have its own valid set of
+        * private counters. */
+       if (!item->odp_data->mn_counters_active)
+               return 1;
+
+       if (unlikely(item->odp_data->notifiers_count))
+               return 1;
+       if (item->odp_data->notifiers_seq != mmu_seq)
+               return 1;
+       return 0;
+}
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+static inline int ib_umem_odp_get(struct ib_ucontext *context,
+                                 struct ib_umem *umem)
+{
+       return -EINVAL;
+}
+
+static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+#endif /* IB_UMEM_ODP_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h

index 470a011d6fa49d0e44c512fb927000b1d69bf8ba..0d74f1de99aa89dee233ed408815459e2ad66d5f 100644 (file)
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -51,6 +51,7 @@
  #include <uapi/linux/if_ether.h>
  
  #include <linux/atomic.h>
+#include <linux/mmu_notifier.h>
  #include <asm/uaccess.h>
  
  extern struct workqueue_struct *ib_wq;
@@ -123,7 +124,8 @@ enum ib_device_cap_flags {
         IB_DEVICE_MEM_WINDOW_TYPE_2A    = (1<<23),
         IB_DEVICE_MEM_WINDOW_TYPE_2B    = (1<<24),
         IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
-       IB_DEVICE_SIGNATURE_HANDOVER    = (1<<30)
+       IB_DEVICE_SIGNATURE_HANDOVER    = (1<<30),
+       IB_DEVICE_ON_DEMAND_PAGING      = (1<<31),
  };
  
  enum ib_signature_prot_cap {
@@ -143,6 +145,27 @@ enum ib_atomic_cap {
         IB_ATOMIC_GLOB
  };
  
+enum ib_odp_general_cap_bits {
+       IB_ODP_SUPPORT = 1 << 0,
+};
+
+enum ib_odp_transport_cap_bits {
+       IB_ODP_SUPPORT_SEND     = 1 << 0,
+       IB_ODP_SUPPORT_RECV     = 1 << 1,
+       IB_ODP_SUPPORT_WRITE    = 1 << 2,
+       IB_ODP_SUPPORT_READ     = 1 << 3,
+       IB_ODP_SUPPORT_ATOMIC   = 1 << 4,
+};
+
+struct ib_odp_caps {
+       uint64_t general_caps;
+       struct {
+               uint32_t  rc_odp_caps;
+               uint32_t  uc_odp_caps;
+               uint32_t  ud_odp_caps;
+       } per_transport_caps;
+};
+
  struct ib_device_attr {
         u64                     fw_ver;
         __be64                  sys_image_guid;
@@ -186,6 +209,7 @@ struct ib_device_attr {
         u8                      local_ca_ack_delay;
         int                     sig_prot_cap;
         int                     sig_guard_cap;
+       struct ib_odp_caps      odp_caps;
  };
  
  enum ib_mtu {
@@ -1073,7 +1097,8 @@ enum ib_access_flags {
         IB_ACCESS_REMOTE_READ   = (1<<2),
         IB_ACCESS_REMOTE_ATOMIC = (1<<3),
         IB_ACCESS_MW_BIND       = (1<<4),
-       IB_ZERO_BASED           = (1<<5)
+       IB_ZERO_BASED           = (1<<5),
+       IB_ACCESS_ON_DEMAND     = (1<<6),
  };
  
  struct ib_phys_buf {
@@ -1115,6 +1140,8 @@ struct ib_fmr_attr {
         u8      page_shift;
  };
  
+struct ib_umem;
+
  struct ib_ucontext {
         struct ib_device       *device;
         struct list_head        pd_list;
@@ -1127,6 +1154,24 @@ struct ib_ucontext {
         struct list_head        xrcd_list;
         struct list_head        rule_list;
         int                     closing;
+
+       struct pid             *tgid;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       struct rb_root      umem_tree;
+       /*
+        * Protects .umem_rbroot and tree, as well as odp_mrs_count and
+        * mmu notifiers registration.
+        */
+       struct rw_semaphore     umem_rwsem;
+       void (*invalidate_range)(struct ib_umem *umem,
+                                unsigned long start, unsigned long end);
+
+       struct mmu_notifier     mn;
+       atomic_t                notifier_count;
+       /* A list of umems that don't have private mmu notifier counters yet. */
+       struct list_head        no_private_counters;
+       int                     odp_mrs_count;
+#endif
  };
  
  struct ib_uobject {
@@ -1662,7 +1707,10 @@ static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t
  
  static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
  {
-       return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+       size_t copy_sz;
+
+       copy_sz = min_t(size_t, len, udata->outlen);
+       return copy_to_user(udata->outbuf, src, copy_sz) ? -EFAULT : 0;
  }
  
  /**
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h

index 18b2403982f939d5cd2e91ad2822693c8b0a5f20..50ae24335444f3f0188bdbafb7e409ead40a0975 100644 (file)
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -48,6 +48,8 @@
  #define TUNSETQUEUE  _IOW('T', 217, int)
  #define TUNSETIFINDEX  _IOW('T', 218, unsigned int)
  #define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
+#define TUNSETVNETLE _IOW('T', 220, int)
+#define TUNGETVNETLE _IOR('T', 221, int)
  
  /* TUNSETIFF ifr flags */
  #define IFF_TUN                0x0001
@@ -57,7 +59,6 @@
  #define IFF_ONE_QUEUE  0x2000
  #define IFF_VNET_HDR   0x4000
  #define IFF_TUN_EXCL   0x8000
-#define IFF_VNET_LE    0x10000
  #define IFF_MULTI_QUEUE 0x0100
  #define IFF_ATTACH_QUEUE 0x0200
  #define IFF_DETACH_QUEUE 0x0400
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index 60768822b14017c6f1f7322fa9b8e3ff9467679b..a37fd1224f363fe988ea4a7bdc5d65a8c72bfb8a 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -647,11 +647,7 @@ struct kvm_ppc_smmu_info {
  #define KVM_CAP_MP_STATE 14
  #define KVM_CAP_COALESCED_MMIO 15
  #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#define KVM_CAP_DEVICE_ASSIGNMENT 17
  #define KVM_CAP_IOMMU 18
-#ifdef __KVM_HAVE_MSI
-#define KVM_CAP_DEVICE_MSI 20
-#endif
  /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
  #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
  #define KVM_CAP_USER_NMI 22
@@ -663,10 +659,6 @@ struct kvm_ppc_smmu_info {
  #endif
  #define KVM_CAP_IRQ_ROUTING 25
  #define KVM_CAP_IRQ_INJECT_STATUS 26
-#define KVM_CAP_DEVICE_DEASSIGNMENT 27
-#ifdef __KVM_HAVE_MSIX
-#define KVM_CAP_DEVICE_MSIX 28
-#endif
  #define KVM_CAP_ASSIGN_DEV_IRQ 29
  /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
  #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
@@ -1107,9 +1099,6 @@ struct kvm_s390_ucas_mapping {
  #define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
  #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
  #define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
-/* IA64 stack access */
-#define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
-#define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
  /* Available with KVM_CAP_VCPU_EVENTS */
  #define KVM_GET_VCPU_EVENTS       _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
  #define KVM_SET_VCPU_EVENTS       _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)
diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h

index 5a86d8ede09c952711633eb5bf81d4a7a31a088d..26db20647e6f2df48340b01c64445a4e2fa91de8 100644 (file)
--- a/include/uapi/linux/v4l2-mediabus.h
+++ b/include/uapi/linux/v4l2-mediabus.h
@@ -31,9 +31,9 @@ struct v4l2_mbus_framefmt {
         __u32                   code;
         __u32                   field;
         __u32                   colorspace;
-       __u32                   ycbcr_enc;
-       __u32                   quantization;
-       __u32                   reserved[5];
+       __u16                   ycbcr_enc;
+       __u16                   quantization;
+       __u32                   reserved[6];
  };
  
  #ifndef __KERNEL__
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h

index 26daf55ff76ead65f47801620d4697b10e17151c..4275b961bf60f65ec9d93a2dbe680e66d184b6a2 100644 (file)
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -90,8 +90,9 @@ enum {
  };
  
  enum {
+       IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
         IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
-       IB_USER_VERBS_EX_CMD_DESTROY_FLOW
+       IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
  };
  
  /*
@@ -201,6 +202,32 @@ struct ib_uverbs_query_device_resp {
         __u8  reserved[4];
  };
  
+enum {
+       IB_USER_VERBS_EX_QUERY_DEVICE_ODP =             1ULL << 0,
+};
+
+struct ib_uverbs_ex_query_device {
+       __u32 comp_mask;
+       __u32 reserved;
+};
+
+struct ib_uverbs_odp_caps {
+       __u64 general_caps;
+       struct {
+               __u32 rc_odp_caps;
+               __u32 uc_odp_caps;
+               __u32 ud_odp_caps;
+       } per_transport_caps;
+       __u32 reserved;
+};
+
+struct ib_uverbs_ex_query_device_resp {
+       struct ib_uverbs_query_device_resp base;
+       __u32 comp_mask;
+       __u32 reserved;
+       struct ib_uverbs_odp_caps odp_caps;
+};
+
  struct ib_uverbs_query_port {
         __u64 response;
         __u8  port_num;
diff --git a/lib/show_mem.c b/lib/show_mem.c

index 5e256271b47b02b1a0265ad6a89b2bccf5c40002..7de89f4a36cfa54dd01980898a4646f7c41ec550 100644 (file)
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -8,6 +8,7 @@
  #include <linux/mm.h>
  #include <linux/nmi.h>
  #include <linux/quicklist.h>
+#include <linux/cma.h>
  
  void show_mem(unsigned int filter)
  {
@@ -38,7 +39,12 @@ void show_mem(unsigned int filter)
  
         printk("%lu pages RAM\n", total);
         printk("%lu pages HighMem/MovableOnly\n", highmem);
+#ifdef CONFIG_CMA
+       printk("%lu pages reserved\n", (reserved - totalcma_pages));
+       printk("%lu pages cma reserved\n", totalcma_pages);
+#else
         printk("%lu pages reserved\n", reserved);
+#endif
  #ifdef CONFIG_QUICKLIST
         printk("%lu pages in pagetable cache\n",
                 quicklist_total_size());
diff --git a/mm/cma.c b/mm/cma.c

index f8917629cbdd43da2bb4d0392f5f9198701e9cd7..a85ae28709a330b8caa9d0ee8a6af264efa8ed26 100644 (file)
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -337,6 +337,7 @@ int __init cma_declare_contiguous(phys_addr_t base,
         if (ret)
                 goto err;
  
+       totalcma_pages += (size / PAGE_SIZE);
         pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M,
                 &base);
         return 0;
diff --git a/mm/memory.c b/mm/memory.c

index 6efe36a998bae484789441e3ea47e3b38b2ea346..d8aebc52265f59e75e342051fa604c5f1003f59b 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2996,6 +2996,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  
         if (set_page_dirty(fault_page))
                 dirtied = 1;
+       /*
+        * Take a local copy of the address_space - page.mapping may be zeroed
+        * by truncate after unlock_page().   The address_space itself remains
+        * pinned by vma->vm_file's reference.  We rely on unlock_page()'s
+        * release semantics to prevent the compiler from undoing this copying.
+        */
         mapping = fault_page->mapping;
         unlock_page(fault_page);
         if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index e58725aff7e999aeecde0e332960c498aa786128..f22c55947181d87e997b0cadd76c57f36d5a83f8 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -162,12 +162,6 @@ static const struct mempolicy_operations {
                         enum mpol_rebind_step step);
  } mpol_ops[MPOL_MAX];
  
-/* Check that the nodemask contains at least one populated zone */
-static int is_valid_nodemask(const nodemask_t *nodemask)
-{
-       return nodes_intersects(*nodemask, node_states[N_MEMORY]);
-}
-
  static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
  {
         return pol->flags & MPOL_MODE_FLAGS;
@@ -202,7 +196,7 @@ static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
  
  static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
  {
-       if (!is_valid_nodemask(nodes))
+       if (nodes_empty(*nodes))
                 return -EINVAL;
         pol->v.nodes = *nodes;
         return 0;
@@ -234,7 +228,7 @@ static int mpol_set_nodemask(struct mempolicy *pol,
                 nodes = NULL;   /* explicit local allocation */
         else {
                 if (pol->flags & MPOL_F_RELATIVE_NODES)
-                       mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1);
+                       mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1);
                 else
                         nodes_and(nsc->mask2, *nodes, nsc->mask1);
  
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index fa974d87f60df7a614980148500da6dedb696cd8..7633c503a116c221e7447614c6d10ebaa38a0b1c 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -111,6 +111,7 @@ static DEFINE_SPINLOCK(managed_page_count_lock);
  
  unsigned long totalram_pages __read_mostly;
  unsigned long totalreserve_pages __read_mostly;
+unsigned long totalcma_pages __read_mostly;
  /*
   * When calculating the number of globally allowed dirty pages, there
   * is a certain number of per-zone reserves that should not be
@@ -5586,7 +5587,7 @@ void __init mem_init_print_info(const char *str)
  
         pr_info("Memory: %luK/%luK available "
                "(%luK kernel code, %luK rwdata, %luK rodata, "
-              "%luK init, %luK bss, %luK reserved"
+              "%luK init, %luK bss, %luK reserved, %luK cma-reserved"
  #ifdef CONFIG_HIGHMEM
                ", %luK highmem"
  #endif
@@ -5594,7 +5595,8 @@ void __init mem_init_print_info(const char *str)
                nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
                codesize >> 10, datasize >> 10, rosize >> 10,
                (init_data_size + init_code_size) >> 10, bss_size >> 10,
-              (physpages - totalram_pages) << (PAGE_SHIFT-10),
+              (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10),
+              totalcma_pages << (PAGE_SHIFT-10),
  #ifdef CONFIG_HIGHMEM
                totalhigh_pages << (PAGE_SHIFT-10),
  #endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c

index 4d0a063145ec45647a65654dd09388e8b7936fde..b72403927aa4b06610cb08e9506d677c98a1a5cd 100644 (file)
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -884,19 +884,6 @@ static struct notifier_block zs_cpu_nb = {
         .notifier_call = zs_cpu_notifier
  };
  
-static void zs_unregister_cpu_notifier(void)
-{
-       int cpu;
-
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu)
-               zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
-       __unregister_cpu_notifier(&zs_cpu_nb);
-
-       cpu_notifier_register_done();
-}
-
  static int zs_register_cpu_notifier(void)
  {
         int cpu, uninitialized_var(ret);
@@ -914,40 +901,28 @@ static int zs_register_cpu_notifier(void)
         return notifier_to_errno(ret);
  }
  
-static void init_zs_size_classes(void)
+static void zs_unregister_cpu_notifier(void)
  {
-       int nr;
+       int cpu;
  
-       nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;
-       if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)
-               nr += 1;
+       cpu_notifier_register_begin();
  
-       zs_size_classes = nr;
-}
+       for_each_online_cpu(cpu)
+               zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
+       __unregister_cpu_notifier(&zs_cpu_nb);
  
-static void __exit zs_exit(void)
-{
-#ifdef CONFIG_ZPOOL
-       zpool_unregister_driver(&zs_zpool_driver);
-#endif
-       zs_unregister_cpu_notifier();
+       cpu_notifier_register_done();
  }
  
-static int __init zs_init(void)
+static void init_zs_size_classes(void)
  {
-       int ret = zs_register_cpu_notifier();
-
-       if (ret) {
-               zs_unregister_cpu_notifier();
-               return ret;
-       }
+       int nr;
  
-       init_zs_size_classes();
+       nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;
+       if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)
+               nr += 1;
  
-#ifdef CONFIG_ZPOOL
-       zpool_register_driver(&zs_zpool_driver);
-#endif
-       return 0;
+       zs_size_classes = nr;
  }
  
  static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
@@ -967,113 +942,101 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
         return true;
  }
  
+unsigned long zs_get_total_pages(struct zs_pool *pool)
+{
+       return atomic_long_read(&pool->pages_allocated);
+}
+EXPORT_SYMBOL_GPL(zs_get_total_pages);
+
  /**
- * zs_create_pool - Creates an allocation pool to work from.
- * @flags: allocation flags used to allocate pool metadata
+ * zs_map_object - get address of allocated object from handle.
+ * @pool: pool from which the object was allocated
+ * @handle: handle returned from zs_malloc
   *
- * This function must be called before anything when using
- * the zsmalloc allocator.
+ * Before using an object allocated from zs_malloc, it must be mapped using
+ * this function. When done with the object, it must be unmapped using
+ * zs_unmap_object.
   *
- * On success, a pointer to the newly created pool is returned,
- * otherwise NULL.
+ * Only one object can be mapped per cpu at a time. There is no protection
+ * against nested mappings.
+ *
+ * This function returns with preemption and page faults disabled.
   */
-struct zs_pool *zs_create_pool(gfp_t flags)
+void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+                       enum zs_mapmode mm)
  {
-       int i;
-       struct zs_pool *pool;
-       struct size_class *prev_class = NULL;
+       struct page *page;
+       unsigned long obj_idx, off;
  
-       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
-       if (!pool)
-               return NULL;
+       unsigned int class_idx;
+       enum fullness_group fg;
+       struct size_class *class;
+       struct mapping_area *area;
+       struct page *pages[2];
  
-       pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
-                       GFP_KERNEL);
-       if (!pool->size_class) {
-               kfree(pool);
-               return NULL;
-       }
+       BUG_ON(!handle);
  
         /*
-        * Iterate reversly, because, size of size_class that we want to use
-        * for merging should be larger or equal to current size.
+        * Because we use per-cpu mapping areas shared among the
+        * pools/users, we can't allow mapping in interrupt context
+        * because it can corrupt another users mappings.
          */
-       for (i = zs_size_classes - 1; i >= 0; i--) {
-               int size;
-               int pages_per_zspage;
-               struct size_class *class;
-
-               size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
-               if (size > ZS_MAX_ALLOC_SIZE)
-                       size = ZS_MAX_ALLOC_SIZE;
-               pages_per_zspage = get_pages_per_zspage(size);
-
-               /*
-                * size_class is used for normal zsmalloc operation such
-                * as alloc/free for that size. Although it is natural that we
-                * have one size_class for each size, there is a chance that we
-                * can get more memory utilization if we use one size_class for
-                * many different sizes whose size_class have same
-                * characteristics. So, we makes size_class point to
-                * previous size_class if possible.
-                */
-               if (prev_class) {
-                       if (can_merge(prev_class, size, pages_per_zspage)) {
-                               pool->size_class[i] = prev_class;
-                               continue;
-                       }
-               }
-
-               class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
-               if (!class)
-                       goto err;
+       BUG_ON(in_interrupt());
  
-               class->size = size;
-               class->index = i;
-               class->pages_per_zspage = pages_per_zspage;
-               spin_lock_init(&class->lock);
-               pool->size_class[i] = class;
+       obj_handle_to_location(handle, &page, &obj_idx);
+       get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+       class = pool->size_class[class_idx];
+       off = obj_idx_to_offset(page, obj_idx, class->size);
  
-               prev_class = class;
+       area = &get_cpu_var(zs_map_area);
+       area->vm_mm = mm;
+       if (off + class->size <= PAGE_SIZE) {
+               /* this object is contained entirely within a page */
+               area->vm_addr = kmap_atomic(page);
+               return area->vm_addr + off;
         }
  
-       pool->flags = flags;
-
-       return pool;
+       /* this object spans two pages */
+       pages[0] = page;
+       pages[1] = get_next_page(page);
+       BUG_ON(!pages[1]);
  
-err:
-       zs_destroy_pool(pool);
-       return NULL;
+       return __zs_map_object(area, pages, off, class->size);
  }
-EXPORT_SYMBOL_GPL(zs_create_pool);
+EXPORT_SYMBOL_GPL(zs_map_object);
  
-void zs_destroy_pool(struct zs_pool *pool)
+void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
  {
-       int i;
+       struct page *page;
+       unsigned long obj_idx, off;
  
-       for (i = 0; i < zs_size_classes; i++) {
-               int fg;
-               struct size_class *class = pool->size_class[i];
+       unsigned int class_idx;
+       enum fullness_group fg;
+       struct size_class *class;
+       struct mapping_area *area;
  
-               if (!class)
-                       continue;
+       BUG_ON(!handle);
  
-               if (class->index != i)
-                       continue;
+       obj_handle_to_location(handle, &page, &obj_idx);
+       get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+       class = pool->size_class[class_idx];
+       off = obj_idx_to_offset(page, obj_idx, class->size);
  
-               for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
-                       if (class->fullness_list[fg]) {
-                               pr_info("Freeing non-empty class with size %db, fullness group %d\n",
-                                       class->size, fg);
-                       }
-               }
-               kfree(class);
-       }
+       area = this_cpu_ptr(&zs_map_area);
+       if (off + class->size <= PAGE_SIZE)
+               kunmap_atomic(area->vm_addr);
+       else {
+               struct page *pages[2];
  
-       kfree(pool->size_class);
-       kfree(pool);
+               pages[0] = page;
+               pages[1] = get_next_page(page);
+               BUG_ON(!pages[1]);
+
+               __zs_unmap_object(area, pages, off, class->size);
+       }
+       put_cpu_var(zs_map_area);
  }
-EXPORT_SYMBOL_GPL(zs_destroy_pool);
+EXPORT_SYMBOL_GPL(zs_unmap_object);
  
  /**
   * zs_malloc - Allocate block of given size from pool.
@@ -1176,100 +1139,137 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
  EXPORT_SYMBOL_GPL(zs_free);
  
  /**
- * zs_map_object - get address of allocated object from handle.
- * @pool: pool from which the object was allocated
- * @handle: handle returned from zs_malloc
- *
- * Before using an object allocated from zs_malloc, it must be mapped using
- * this function. When done with the object, it must be unmapped using
- * zs_unmap_object.
+ * zs_create_pool - Creates an allocation pool to work from.
+ * @flags: allocation flags used to allocate pool metadata
   *
- * Only one object can be mapped per cpu at a time. There is no protection
- * against nested mappings.
+ * This function must be called before anything when using
+ * the zsmalloc allocator.
   *
- * This function returns with preemption and page faults disabled.
+ * On success, a pointer to the newly created pool is returned,
+ * otherwise NULL.
   */
-void *zs_map_object(struct zs_pool *pool, unsigned long handle,
-                       enum zs_mapmode mm)
+struct zs_pool *zs_create_pool(gfp_t flags)
  {
-       struct page *page;
-       unsigned long obj_idx, off;
+       int i;
+       struct zs_pool *pool;
+       struct size_class *prev_class = NULL;
  
-       unsigned int class_idx;
-       enum fullness_group fg;
-       struct size_class *class;
-       struct mapping_area *area;
-       struct page *pages[2];
+       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool)
+               return NULL;
  
-       BUG_ON(!handle);
+       pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
+                       GFP_KERNEL);
+       if (!pool->size_class) {
+               kfree(pool);
+               return NULL;
+       }
  
         /*
-        * Because we use per-cpu mapping areas shared among the
-        * pools/users, we can't allow mapping in interrupt context
-        * because it can corrupt another users mappings.
+        * Iterate reversly, because, size of size_class that we want to use
+        * for merging should be larger or equal to current size.
          */
-       BUG_ON(in_interrupt());
+       for (i = zs_size_classes - 1; i >= 0; i--) {
+               int size;
+               int pages_per_zspage;
+               struct size_class *class;
  
-       obj_handle_to_location(handle, &page, &obj_idx);
-       get_zspage_mapping(get_first_page(page), &class_idx, &fg);
-       class = pool->size_class[class_idx];
-       off = obj_idx_to_offset(page, obj_idx, class->size);
+               size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
+               if (size > ZS_MAX_ALLOC_SIZE)
+                       size = ZS_MAX_ALLOC_SIZE;
+               pages_per_zspage = get_pages_per_zspage(size);
  
-       area = &get_cpu_var(zs_map_area);
-       area->vm_mm = mm;
-       if (off + class->size <= PAGE_SIZE) {
-               /* this object is contained entirely within a page */
-               area->vm_addr = kmap_atomic(page);
-               return area->vm_addr + off;
+               /*
+                * size_class is used for normal zsmalloc operation such
+                * as alloc/free for that size. Although it is natural that we
+                * have one size_class for each size, there is a chance that we
+                * can get more memory utilization if we use one size_class for
+                * many different sizes whose size_class have same
+                * characteristics. So, we makes size_class point to
+                * previous size_class if possible.
+                */
+               if (prev_class) {
+                       if (can_merge(prev_class, size, pages_per_zspage)) {
+                               pool->size_class[i] = prev_class;
+                               continue;
+                       }
+               }
+
+               class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
+               if (!class)
+                       goto err;
+
+               class->size = size;
+               class->index = i;
+               class->pages_per_zspage = pages_per_zspage;
+               spin_lock_init(&class->lock);
+               pool->size_class[i] = class;
+
+               prev_class = class;
         }
  
-       /* this object spans two pages */
-       pages[0] = page;
-       pages[1] = get_next_page(page);
-       BUG_ON(!pages[1]);
+       pool->flags = flags;
  
-       return __zs_map_object(area, pages, off, class->size);
+       return pool;
+
+err:
+       zs_destroy_pool(pool);
+       return NULL;
  }
-EXPORT_SYMBOL_GPL(zs_map_object);
+EXPORT_SYMBOL_GPL(zs_create_pool);
  
-void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+void zs_destroy_pool(struct zs_pool *pool)
  {
-       struct page *page;
-       unsigned long obj_idx, off;
+       int i;
  
-       unsigned int class_idx;
-       enum fullness_group fg;
-       struct size_class *class;
-       struct mapping_area *area;
+       for (i = 0; i < zs_size_classes; i++) {
+               int fg;
+               struct size_class *class = pool->size_class[i];
  
-       BUG_ON(!handle);
+               if (!class)
+                       continue;
  
-       obj_handle_to_location(handle, &page, &obj_idx);
-       get_zspage_mapping(get_first_page(page), &class_idx, &fg);
-       class = pool->size_class[class_idx];
-       off = obj_idx_to_offset(page, obj_idx, class->size);
+               if (class->index != i)
+                       continue;
  
-       area = this_cpu_ptr(&zs_map_area);
-       if (off + class->size <= PAGE_SIZE)
-               kunmap_atomic(area->vm_addr);
-       else {
-               struct page *pages[2];
+               for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
+                       if (class->fullness_list[fg]) {
+                               pr_info("Freeing non-empty class with size %db, fullness group %d\n",
+                                       class->size, fg);
+                       }
+               }
+               kfree(class);
+       }
  
-               pages[0] = page;
-               pages[1] = get_next_page(page);
-               BUG_ON(!pages[1]);
+       kfree(pool->size_class);
+       kfree(pool);
+}
+EXPORT_SYMBOL_GPL(zs_destroy_pool);
  
-               __zs_unmap_object(area, pages, off, class->size);
+static int __init zs_init(void)
+{
+       int ret = zs_register_cpu_notifier();
+
+       if (ret) {
+               zs_unregister_cpu_notifier();
+               return ret;
         }
-       put_cpu_var(zs_map_area);
+
+       init_zs_size_classes();
+
+#ifdef CONFIG_ZPOOL
+       zpool_register_driver(&zs_zpool_driver);
+#endif
+       return 0;
  }
-EXPORT_SYMBOL_GPL(zs_unmap_object);
  
-unsigned long zs_get_total_pages(struct zs_pool *pool)
+static void __exit zs_exit(void)
  {
-       return atomic_long_read(&pool->pages_allocated);
+#ifdef CONFIG_ZPOOL
+       zpool_unregister_driver(&zs_zpool_driver);
+#endif
+       zs_unregister_cpu_notifier();
  }
-EXPORT_SYMBOL_GPL(zs_get_total_pages);
  
  module_init(zs_init);
  module_exit(zs_exit);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c

index 79d84b88b8f0a766c9d8db9e93f4ef35221031be..fe18825cc8a47ffba031dd1239aa98a7cea8a7cf 100644 (file)
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -661,7 +661,7 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
         memset(&cp, 0, sizeof(cp));
  
         /* Update random address, but set require_privacy to false so
-        * that we never connect with an unresolvable address.
+        * that we never connect with an non-resolvable address.
          */
         if (hci_update_random_address(req, false, &own_addr_type))
                 return;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c

index 93f92a08550694dbe3791109631ab4058afe13ca..5dcacf9607e445777607e8be7492e4c74ec44b2e 100644 (file)
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1373,8 +1373,6 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt)
  
  static void bredr_setup(struct hci_request *req)
  {
-       struct hci_dev *hdev = req->hdev;
-
         __le16 param;
         __u8 flt_type;
  
@@ -1403,14 +1401,6 @@ static void bredr_setup(struct hci_request *req)
         /* Connection accept timeout ~20 secs */
         param = cpu_to_le16(0x7d00);
         hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
-
-       /* AVM Berlin (31), aka "BlueFRITZ!", reports version 1.2,
-        * but it does not support page scan related HCI commands.
-        */
-       if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) {
-               hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
-               hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
-       }
  }
  
  static void le_setup(struct hci_request *req)
@@ -1718,6 +1708,16 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
         if (hdev->commands[5] & 0x10)
                 hci_setup_link_policy(req);
  
+       if (hdev->commands[8] & 0x01)
+               hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
+
+       /* Some older Broadcom based Bluetooth 1.2 controllers do not
+        * support the Read Page Scan Type command. Check support for
+        * this command in the bit mask of supported commands.
+        */
+       if (hdev->commands[13] & 0x01)
+               hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
+
         if (lmp_le_capable(hdev)) {
                 u8 events[8];
  
@@ -2634,6 +2634,12 @@ static int hci_dev_do_close(struct hci_dev *hdev)
         drain_workqueue(hdev->workqueue);
  
         hci_dev_lock(hdev);
+
+       if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+               if (hdev->dev_type == HCI_BREDR)
+                       mgmt_powered(hdev, 0);
+       }
+
         hci_inquiry_cache_flush(hdev);
         hci_pend_le_actions_clear(hdev);
         hci_conn_hash_flush(hdev);
@@ -2681,14 +2687,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
         hdev->flags &= BIT(HCI_RAW);
         hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
  
-       if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
-               if (hdev->dev_type == HCI_BREDR) {
-                       hci_dev_lock(hdev);
-                       mgmt_powered(hdev, 0);
-                       hci_dev_unlock(hdev);
-               }
-       }
-
         /* Controller radio is available but is currently powered down */
         hdev->amp_status = AMP_STATUS_POWERED_DOWN;
  
@@ -3083,7 +3081,9 @@ static void hci_power_on(struct work_struct *work)
  
         err = hci_dev_do_open(hdev);
         if (err < 0) {
+               hci_dev_lock(hdev);
                 mgmt_set_powered_failed(hdev, err);
+               hci_dev_unlock(hdev);
                 return;
         }
  
@@ -3959,17 +3959,29 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
         }
  
         /* In case of required privacy without resolvable private address,
-        * use an unresolvable private address. This is useful for active
+        * use an non-resolvable private address. This is useful for active
          * scanning and non-connectable advertising.
          */
         if (require_privacy) {
-               bdaddr_t urpa;
+               bdaddr_t nrpa;
+
+               while (true) {
+                       /* The non-resolvable private address is generated
+                        * from random six bytes with the two most significant
+                        * bits cleared.
+                        */
+                       get_random_bytes(&nrpa, 6);
+                       nrpa.b[5] &= 0x3f;
  
-               get_random_bytes(&urpa, 6);
-               urpa.b[5] &= 0x3f;      /* Clear two most significant bits */
+                       /* The non-resolvable private address shall not be
+                        * equal to the public address.
+                        */
+                       if (bacmp(&hdev->bdaddr, &nrpa))
+                               break;
+               }
  
                 *own_addr_type = ADDR_LE_DEV_RANDOM;
-               set_random_addr(req, &urpa);
+               set_random_addr(req, &nrpa);
                 return 0;
         }
  
@@ -5625,7 +5637,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
         u8 filter_policy;
  
         /* Set require_privacy to false since no SCAN_REQ are send
-        * during passive scanning. Not using an unresolvable address
+        * during passive scanning. Not using an non-resolvable address
          * here is important so that peer devices using direct
          * advertising with our address will be correctly reported
          * by the controller.
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c

index 322abbbbcef991a36adaaf0750c723ceebba3147..39a5c8a017263694f1dbdca7888abd700c12952b 100644 (file)
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -257,6 +257,8 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
         if (!sent)
                 return;
  
+       hci_dev_lock(hdev);
+
         if (!status) {
                 __u8 param = *((__u8 *) sent);
  
@@ -268,6 +270,8 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
  
         if (test_bit(HCI_MGMT, &hdev->dev_flags))
                 mgmt_auth_enable_complete(hdev, status);
+
+       hci_dev_unlock(hdev);
  }
  
  static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -443,6 +447,8 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
         if (!sent)
                 return;
  
+       hci_dev_lock(hdev);
+
         if (!status) {
                 if (sent->mode)
                         hdev->features[1][0] |= LMP_HOST_SSP;
@@ -458,6 +464,8 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
                 else
                         clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
         }
+
+       hci_dev_unlock(hdev);
  }
  
  static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
@@ -471,6 +479,8 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
         if (!sent)
                 return;
  
+       hci_dev_lock(hdev);
+
         if (!status) {
                 if (sent->support)
                         hdev->features[1][0] |= LMP_HOST_SC;
@@ -486,6 +496,8 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
                 else
                         clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
         }
+
+       hci_dev_unlock(hdev);
  }
  
  static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1135,6 +1147,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
         if (!cp)
                 return;
  
+       hci_dev_lock(hdev);
+
         switch (cp->enable) {
         case LE_SCAN_ENABLE:
                 set_bit(HCI_LE_SCAN, &hdev->dev_flags);
@@ -1184,6 +1198,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
                 BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable);
                 break;
         }
+
+       hci_dev_unlock(hdev);
  }
  
  static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
@@ -1278,6 +1294,8 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
         if (!sent)
                 return;
  
+       hci_dev_lock(hdev);
+
         if (sent->le) {
                 hdev->features[1][0] |= LMP_HOST_LE;
                 set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
@@ -1291,6 +1309,8 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
                 hdev->features[1][0] |= LMP_HOST_LE_BREDR;
         else
                 hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;
+
+       hci_dev_unlock(hdev);
  }
  
  static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c

index a2b6dfa38a0cfd7f020c9d6f5a1584a92ae5c6e6..d04dc009573691cde515d1db82c13cbfd11a459f 100644 (file)
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6966,8 +6966,9 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
             test_bit(HCI_HS_ENABLED, &hcon->hdev->dev_flags))
                 conn->local_fixed_chan |= L2CAP_FC_A2MP;
  
-       if (bredr_sc_enabled(hcon->hdev) &&
-           test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags))
+       if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) &&
+           (bredr_sc_enabled(hcon->hdev) ||
+            test_bit(HCI_FORCE_LESC, &hcon->hdev->dbg_flags)))
                 conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR;
  
         mutex_init(&conn->ident_lock);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c

index 7384f11613369b0997df0229ecc6d8ea2c80bb5b..693ce8bcd06e6eaeee9fbbdffa375c67a93487f4 100644 (file)
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2199,12 +2199,14 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status)
  {
         struct cmd_lookup match = { NULL, hdev };
  
+       hci_dev_lock(hdev);
+
         if (status) {
                 u8 mgmt_err = mgmt_status(status);
  
                 mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp,
                                      &mgmt_err);
-               return;
+               goto unlock;
         }
  
         mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match);
@@ -2222,17 +2224,16 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status)
         if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
                 struct hci_request req;
  
-               hci_dev_lock(hdev);
-
                 hci_req_init(&req, hdev);
                 update_adv_data(&req);
                 update_scan_rsp_data(&req);
                 hci_req_run(&req, NULL);
  
                 hci_update_background_scan(hdev);
-
-               hci_dev_unlock(hdev);
         }
+
+unlock:
+       hci_dev_unlock(hdev);
  }
  
  static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
@@ -3114,14 +3115,13 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)
         conn->disconn_cfm_cb = NULL;
  
         hci_conn_drop(conn);
-       hci_conn_put(conn);
-
-       mgmt_pending_remove(cmd);
  
         /* The device is paired so there is no need to remove
          * its connection parameters anymore.
          */
         clear_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
+
+       hci_conn_put(conn);
  }
  
  void mgmt_smp_complete(struct hci_conn *conn, bool complete)
@@ -3130,8 +3130,10 @@ void mgmt_smp_complete(struct hci_conn *conn, bool complete)
         struct pending_cmd *cmd;
  
         cmd = find_pairing(conn);
-       if (cmd)
+       if (cmd) {
                 cmd->cmd_complete(cmd, status);
+               mgmt_pending_remove(cmd);
+       }
  }
  
  static void pairing_complete_cb(struct hci_conn *conn, u8 status)
@@ -3141,10 +3143,13 @@ static void pairing_complete_cb(struct hci_conn *conn, u8 status)
         BT_DBG("status %u", status);
  
         cmd = find_pairing(conn);
-       if (!cmd)
+       if (!cmd) {
                 BT_DBG("Unable to find a pending command");
-       else
-               cmd->cmd_complete(cmd, mgmt_status(status));
+               return;
+       }
+
+       cmd->cmd_complete(cmd, mgmt_status(status));
+       mgmt_pending_remove(cmd);
  }
  
  static void le_pairing_complete_cb(struct hci_conn *conn, u8 status)
@@ -3157,10 +3162,13 @@ static void le_pairing_complete_cb(struct hci_conn *conn, u8 status)
                 return;
  
         cmd = find_pairing(conn);
-       if (!cmd)
+       if (!cmd) {
                 BT_DBG("Unable to find a pending command");
-       else
-               cmd->cmd_complete(cmd, mgmt_status(status));
+               return;
+       }
+
+       cmd->cmd_complete(cmd, mgmt_status(status));
+       mgmt_pending_remove(cmd);
  }
  
  static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -3274,8 +3282,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
         cmd->user_data = hci_conn_get(conn);
  
         if ((conn->state == BT_CONNECTED || conn->state == BT_CONFIG) &&
-           hci_conn_security(conn, sec_level, auth_type, true))
-               pairing_complete(cmd, 0);
+           hci_conn_security(conn, sec_level, auth_type, true)) {
+               cmd->cmd_complete(cmd, 0);
+               mgmt_pending_remove(cmd);
+       }
  
         err = 0;
  
@@ -3317,7 +3327,8 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
                 goto unlock;
         }
  
-       pairing_complete(cmd, MGMT_STATUS_CANCELLED);
+       cmd->cmd_complete(cmd, MGMT_STATUS_CANCELLED);
+       mgmt_pending_remove(cmd);
  
         err = cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0,
                            addr, sizeof(*addr));
@@ -3791,7 +3802,7 @@ static bool trigger_discovery(struct hci_request *req, u8 *status)
  
                 /* All active scans will be done with either a resolvable
                  * private address (when privacy feature has been enabled)
-                * or unresolvable private address.
+                * or non-resolvable private address.
                  */
                 err = hci_update_random_address(req, true, &own_addr_type);
                 if (err < 0) {
@@ -4279,12 +4290,14 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status)
  {
         struct cmd_lookup match = { NULL, hdev };
  
+       hci_dev_lock(hdev);
+
         if (status) {
                 u8 mgmt_err = mgmt_status(status);
  
                 mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev,
                                      cmd_status_rsp, &mgmt_err);
-               return;
+               goto unlock;
         }
  
         if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
@@ -4299,6 +4312,9 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status)
  
         if (match.sk)
                 sock_put(match.sk);
+
+unlock:
+       hci_dev_unlock(hdev);
  }
  
  static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -6081,6 +6097,11 @@ static int powered_update_hci(struct hci_dev *hdev)
                 hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
         }
  
+       if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) {
+               u8 sc = 0x01;
+               hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, sizeof(sc), &sc);
+       }
+
         if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
             lmp_bredr_capable(hdev)) {
                 struct hci_cp_write_le_host_supported cp;
@@ -6130,8 +6151,7 @@ static int powered_update_hci(struct hci_dev *hdev)
  int mgmt_powered(struct hci_dev *hdev, u8 powered)
  {
         struct cmd_lookup match = { NULL, hdev };
-       u8 status_not_powered = MGMT_STATUS_NOT_POWERED;
-       u8 zero_cod[] = { 0, 0, 0 };
+       u8 status, zero_cod[] = { 0, 0, 0 };
         int err;
  
         if (!test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -6147,7 +6167,20 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered)
         }
  
         mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
-       mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status_not_powered);
+
+       /* If the power off is because of hdev unregistration let
+        * use the appropriate INVALID_INDEX status. Otherwise use
+        * NOT_POWERED. We cover both scenarios here since later in
+        * mgmt_index_removed() any hci_conn callbacks will have already
+        * been triggered, potentially causing misleading DISCONNECTED
+        * status responses.
+        */
+       if (test_bit(HCI_UNREGISTER, &hdev->dev_flags))
+               status = MGMT_STATUS_INVALID_INDEX;
+       else
+               status = MGMT_STATUS_NOT_POWERED;
+
+       mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
  
         if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
                 mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
@@ -6681,8 +6714,10 @@ void mgmt_auth_failed(struct hci_conn *conn, u8 hci_status)
         mgmt_event(MGMT_EV_AUTH_FAILED, conn->hdev, &ev, sizeof(ev),
                     cmd ? cmd->sk : NULL);
  
-       if (cmd)
-               pairing_complete(cmd, status);
+       if (cmd) {
+               cmd->cmd_complete(cmd, status);
+               mgmt_pending_remove(cmd);
+       }
  }
  
  void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
@@ -7046,13 +7081,15 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
                  * kept and checking possible scan response data
                  * will be skipped.
                  */
-               if (hdev->discovery.uuid_count > 0) {
+               if (hdev->discovery.uuid_count > 0)
                         match = eir_has_uuids(eir, eir_len,
                                               hdev->discovery.uuid_count,
                                               hdev->discovery.uuids);
-                       if (!match)
-                               return;
-               }
+               else
+                       match = true;
+
+               if (!match && !scan_rsp_len)
+                       return;
  
                 /* Copy EIR or advertising data into event */
                 memcpy(ev->eir, eir, eir_len);
@@ -7061,8 +7098,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
                  * provided, results with empty EIR or advertising data
                  * should be dropped since they do not match any UUID.
                  */
-               if (hdev->discovery.uuid_count > 0)
+               if (hdev->discovery.uuid_count > 0 && !scan_rsp_len)
                         return;
+
+               match = false;
         }
  
         if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV))
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c

index 6a46252fe66f39fb1cc490987d6c14d2ec955dfa..b67749bb55bffa511fd180ef52b2e9c271fa35a1 100644 (file)
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1673,7 +1673,8 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
         /* SMP over BR/EDR requires special treatment */
         if (conn->hcon->type == ACL_LINK) {
                 /* We must have a BR/EDR SC link */
-               if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags))
+               if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) &&
+                   !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
                         return SMP_CROSS_TRANSP_NOT_ALLOWED;
  
                 set_bit(SMP_FLAG_SC, &smp->flags);
@@ -2927,7 +2928,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
         tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, 0);
         if (IS_ERR(tfm_aes)) {
                 BT_ERR("Unable to create crypto context");
-               return ERR_PTR(PTR_ERR(tfm_aes));
+               return ERR_CAST(tfm_aes);
         }
  
  create_chan:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c

index d06107d36ec892a2168e7638e43cf004d8f7f92a..9cf6fe9ddc0c99e189916dee672d16e6c4efe19a 100644 (file)
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2368,6 +2368,11 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
                 return err;
         }
  
+       if (vid) {
+               pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+               return err;
+       }
+
         if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
                 err = dev_uc_add_excl(dev, addr);
         else if (is_multicast_ether_addr(addr))
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c

index a457232f0131c49d1a9fd574c683df8604f48e99..95e47c97585e2e34635976d6a352a1484d5c091c 100644 (file)
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -159,6 +159,15 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs)
         }
  }
  
+static void geneve_notify_del_rx_port(struct geneve_sock *gs)
+{
+       struct sock *sk = gs->sock->sk;
+       sa_family_t sa_family = sk->sk_family;
+
+       if (sa_family == AF_INET)
+               udp_del_offload(&gs->udp_offloads);
+}
+
  /* Callback from net/ipv4/udp.c to receive packets */
  static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
  {
@@ -287,6 +296,7 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
                                     geneve_rcv_t *rcv, void *data,
                                     bool no_share, bool ipv6)
  {
+       struct geneve_net *gn = net_generic(net, geneve_net_id);
         struct geneve_sock *gs;
  
         gs = geneve_socket_create(net, port, rcv, data, ipv6);
@@ -296,15 +306,15 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
         if (no_share)   /* Return error if sharing is not allowed. */
                 return ERR_PTR(-EINVAL);
  
+       spin_lock(&gn->sock_lock);
         gs = geneve_find_sock(net, port);
-       if (gs) {
-               if (gs->rcv == rcv)
-                       atomic_inc(&gs->refcnt);
-               else
+       if (gs && ((gs->rcv != rcv) ||
+                  !atomic_add_unless(&gs->refcnt, 1, 0)))
                         gs = ERR_PTR(-EBUSY);
-       } else {
+       spin_unlock(&gn->sock_lock);
+
+       if (!gs)
                 gs = ERR_PTR(-EINVAL);
-       }
  
         return gs;
  }
@@ -312,9 +322,17 @@ EXPORT_SYMBOL_GPL(geneve_sock_add);
  
  void geneve_sock_release(struct geneve_sock *gs)
  {
+       struct net *net = sock_net(gs->sock->sk);
+       struct geneve_net *gn = net_generic(net, geneve_net_id);
+
         if (!atomic_dec_and_test(&gs->refcnt))
                 return;
  
+       spin_lock(&gn->sock_lock);
+       hlist_del_rcu(&gs->hlist);
+       geneve_notify_del_rx_port(gs);
+       spin_unlock(&gn->sock_lock);
+
         queue_work(geneve_wq, &gs->del_work);
  }
  EXPORT_SYMBOL_GPL(geneve_sock_release);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c

index ac8491245e5b2e99a50259bd7a6d95d92bcff4f4..4f4bf5b99686ee8f13cfd353b7dca307f9260dbc 100644 (file)
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -252,10 +252,6 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
         struct ip_tunnel *tunnel = netdev_priv(dev);
         const struct iphdr *tnl_params;
  
-       skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
-       if (IS_ERR(skb))
-               goto out;
-
         if (dev->header_ops) {
                 /* Need space for new headers */
                 if (skb_cow_head(skb, dev->needed_headroom -
@@ -268,6 +264,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
                  * to gre header.
                  */
                 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+               skb_reset_mac_header(skb);
         } else {
                 if (skb_cow_head(skb, dev->needed_headroom))
                         goto free_skb;
@@ -275,6 +272,10 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
                 tnl_params = &tunnel->parms.iph;
         }
  
+       skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
+       if (IS_ERR(skb))
+               goto out;
+
         __gre_xmit(skb, dev, tnl_params, skb->protocol);
  
         return NETDEV_TX_OK;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c

index 63e745aadab6466b2e32e083c99ad31af39243c4..d3e4479367208cd16d4ea50b012e23bfafa76357 100644 (file)
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -514,6 +514,9 @@ const struct ip_tunnel_encap_ops __rcu *
  int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
                             unsigned int num)
  {
+       if (num >= MAX_IPTUN_ENCAP_OPS)
+               return -ERANGE;
+
         return !cmpxchg((const struct ip_tunnel_encap_ops **)
                         &iptun_encaps[num],
                         NULL, ops) ? 0 : -1;
@@ -525,6 +528,9 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
  {
         int ret;
  
+       if (num >= MAX_IPTUN_ENCAP_OPS)
+               return -ERANGE;
+
         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
                        &iptun_encaps[num],
                        ops, NULL) == ops) ? 0 : -1;
@@ -567,6 +573,9 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
         if (t->encap.type == TUNNEL_ENCAP_NONE)
                 return 0;
  
+       if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
+               return -EINVAL;
+
         rcu_read_lock();
         ops = rcu_dereference(iptun_encaps[t->encap.type]);
         if (likely(ops && ops->build_header))
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c

index 5d6dae9e4aac099379e6ad8ed54d5ef40eac0f88..da1c12c34487e31b8d183ab70b856c5f3f4466bb 100644 (file)
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1011,6 +1011,10 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
  
         ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
  
+       ieee80211_recalc_smps_chanctx(local, new_ctx);
+       ieee80211_recalc_radar_chanctx(local, new_ctx);
+       ieee80211_recalc_chanctx_min_def(local, new_ctx);
+
         if (changed)
                 ieee80211_bss_info_change_notify(sdata, changed);
  
diff --git a/net/mac80211/key.c b/net/mac80211/key.c

index 434a91ad12c88dabcc2674413e86d08386c3b37d..0bb7038121ac5557ba90114b706f8cbc0e404af2 100644 (file)
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -656,7 +656,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
         int i;
  
         mutex_lock(&local->key_mtx);
-       for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+       for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) {
                 key = key_mtx_dereference(local, sta->gtk[i]);
                 if (!key)
                         continue;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c

index 75a9bf50207ecd6cdf5ac41cae7101d03ac58a11..2c36c4765f47f28c42d1b2112b59a764ba4ed82f 100644 (file)
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -174,6 +174,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
         if (!(ht_cap->cap_info &
               cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
                 ret = IEEE80211_STA_DISABLE_40MHZ;
+               vht_chandef = *chandef;
                 goto out;
         }
  
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c

index 49c23bdf08bb2f65786269ccb11ccf8b520a6d9a..683b10f4650577c7d0172733e935f7c169f20292 100644 (file)
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1761,14 +1761,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
         sc = le16_to_cpu(hdr->seq_ctrl);
         frag = sc & IEEE80211_SCTL_FRAG;
  
-       if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
-               goto out;
-
         if (is_multicast_ether_addr(hdr->addr1)) {
                 rx->local->dot11MulticastReceivedFrameCount++;
-               goto out;
+               goto out_no_led;
         }
  
+       if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
+               goto out;
+
         I802_DEBUG_INC(rx->local->rx_handlers_fragments);
  
         if (skb_linearize(rx->skb))
@@ -1859,9 +1859,10 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
         status->rx_flags |= IEEE80211_RX_FRAGMENTED;
  
   out:
+       ieee80211_led_rx(rx->local);
+ out_no_led:
         if (rx->sta)
                 rx->sta->rx_packets++;
-       ieee80211_led_rx(rx->local);
         return RX_CONTINUE;
  }
  
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c

index ef5f77b44ec72131e45b86456c83be7462da224e..074cf3e91c6f2d5cb27ff7599f42d5b6c0ee1fd4 100644 (file)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -525,14 +525,14 @@ out:
         return err;
  }
  
-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
+static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
  {
  #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
         struct page *p_start, *p_end;
  
         /* First page is flushed through netlink_{get,set}_status */
         p_start = pgvec_to_page(hdr + PAGE_SIZE);
-       p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
+       p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
         while (p_start <= p_end) {
                 flush_dcache_page(p_start);
                 p_start++;
@@ -550,9 +550,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
  static void netlink_set_status(struct nl_mmap_hdr *hdr,
                                enum nl_mmap_status status)
  {
+       smp_mb();
         hdr->nm_status = status;
         flush_dcache_page(pgvec_to_page(hdr));
-       smp_wmb();
  }
  
  static struct nl_mmap_hdr *
@@ -714,24 +714,16 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
         struct nl_mmap_hdr *hdr;
         struct sk_buff *skb;
         unsigned int maxlen;
-       bool excl = true;
         int err = 0, len = 0;
  
-       /* Netlink messages are validated by the receiver before processing.
-        * In order to avoid userspace changing the contents of the message
-        * after validation, the socket and the ring may only be used by a
-        * single process, otherwise we fall back to copying.
-        */
-       if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
-           atomic_read(&nlk->mapped) > 1)
-               excl = false;
-
         mutex_lock(&nlk->pg_vec_lock);
  
         ring   = &nlk->tx_ring;
         maxlen = ring->frame_size - NL_MMAP_HDRLEN;
  
         do {
+               unsigned int nm_len;
+
                 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
                 if (hdr == NULL) {
                         if (!(msg->msg_flags & MSG_DONTWAIT) &&
@@ -739,35 +731,23 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
                                 schedule();
                         continue;
                 }
-               if (hdr->nm_len > maxlen) {
+
+               nm_len = ACCESS_ONCE(hdr->nm_len);
+               if (nm_len > maxlen) {
                         err = -EINVAL;
                         goto out;
                 }
  
-               netlink_frame_flush_dcache(hdr);
+               netlink_frame_flush_dcache(hdr, nm_len);
  
-               if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
-                       skb = alloc_skb_head(GFP_KERNEL);
-                       if (skb == NULL) {
-                               err = -ENOBUFS;
-                               goto out;
-                       }
-                       sock_hold(sk);
-                       netlink_ring_setup_skb(skb, sk, ring, hdr);
-                       NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
-                       __skb_put(skb, hdr->nm_len);
-                       netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
-                       atomic_inc(&ring->pending);
-               } else {
-                       skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
-                       if (skb == NULL) {
-                               err = -ENOBUFS;
-                               goto out;
-                       }
-                       __skb_put(skb, hdr->nm_len);
-                       memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
-                       netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+               skb = alloc_skb(nm_len, GFP_KERNEL);
+               if (skb == NULL) {
+                       err = -ENOBUFS;
+                       goto out;
                 }
+               __skb_put(skb, nm_len);
+               memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
+               netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
  
                 netlink_increment_head(ring);
  
@@ -813,7 +793,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
         hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
         hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
         hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
-       netlink_frame_flush_dcache(hdr);
+       netlink_frame_flush_dcache(hdr, hdr->nm_len);
         netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
  
         NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
diff --git a/net/rds/message.c b/net/rds/message.c

index ff2202218187530378b270b0df4838286cb9fc14..5a21e6f5986f1b822e41077f54f5f6235fe47068 100644 (file)
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -325,7 +325,8 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
         copied = 0;
  
         while (iov_iter_count(to) && copied < len) {
-               to_copy = min(iov_iter_count(to), sg->length - vec_off);
+               to_copy = min_t(unsigned long, iov_iter_count(to),
+                               sg->length - vec_off);
                 to_copy = min_t(unsigned long, to_copy, len - copied);
  
                 rds_stats_add(s_copy_to_user, to_copy);
diff --git a/net/wireless/chan.c b/net/wireless/chan.c

index 85506f1d078920117b1030abc1e50a0ec70fdc6b..7aaf7415dc4cfffda21a0597a3b57fec482d2842 100644 (file)
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -603,7 +603,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
  {
         struct ieee80211_sta_ht_cap *ht_cap;
         struct ieee80211_sta_vht_cap *vht_cap;
-       u32 width, control_freq;
+       u32 width, control_freq, cap;
  
         if (WARN_ON(!cfg80211_chandef_valid(chandef)))
                 return false;
@@ -643,7 +643,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
                         return false;
                 break;
         case NL80211_CHAN_WIDTH_80P80:
-               if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ))
+               cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+               if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
                         return false;
         case NL80211_CHAN_WIDTH_80:
                 if (!vht_cap->vht_supported)
@@ -654,7 +655,9 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
         case NL80211_CHAN_WIDTH_160:
                 if (!vht_cap->vht_supported)
                         return false;
-               if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ))
+               cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+               if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
+                   cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
                         return false;
                 prohibited_flags |= IEEE80211_CHAN_NO_160MHZ;
                 width = 160;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c

index a17d6bc6b22ca7ccda9716af9a3367b66bc47632..7ca4b5133123f4464e289976d152ac1d2fc1769c 100644 (file)
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6002,7 +6002,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
                 }
  
                 /* there was no other matchset, so the RSSI one is alone */
-               if (i == 0)
+               if (i == 0 && n_match_sets)
                         request->match_sets[0].rssi_thold = default_match_rssi;
  
                 request->min_rssi_thold = INT_MAX;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c

index 47be6163381caadf41afab40122d74f4a19448e6..7b8309840d4e1b499cc7efbd73bc07222c0d5def 100644 (file)
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1546,12 +1546,18 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
                 if (!wdev->beacon_interval)
                         goto out;
  
+               ret = cfg80211_reg_can_beacon(wiphy,
+                                             &wdev->chandef, wdev->iftype);
+               break;
+       case NL80211_IFTYPE_ADHOC:
+               if (!wdev->ssid_len)
+                       goto out;
+
                 ret = cfg80211_reg_can_beacon(wiphy,
                                               &wdev->chandef, wdev->iftype);
                 break;
         case NL80211_IFTYPE_STATION:
         case NL80211_IFTYPE_P2P_CLIENT:
-       case NL80211_IFTYPE_ADHOC:
                 if (!wdev->current_bss ||
                     !wdev->current_bss->pub.channel)
                         goto out;
@@ -1907,7 +1913,7 @@ static enum reg_request_treatment
  reg_process_hint_driver(struct wiphy *wiphy,
                         struct regulatory_request *driver_request)
  {
-       const struct ieee80211_regdomain *regd;
+       const struct ieee80211_regdomain *regd, *tmp;
         enum reg_request_treatment treatment;
  
         treatment = __reg_process_hint_driver(driver_request);
@@ -1927,7 +1933,10 @@ reg_process_hint_driver(struct wiphy *wiphy,
                         reg_free_request(driver_request);
                         return REG_REQ_IGNORE;
                 }
+
+               tmp = get_wiphy_regdom(wiphy);
                 rcu_assign_pointer(wiphy->regd, regd);
+               rcu_free_regdom(tmp);
         }
  
  
@@ -1986,11 +1995,8 @@ __reg_process_hint_country_ie(struct wiphy *wiphy,
                         return REG_REQ_IGNORE;
                 return REG_REQ_ALREADY_SET;
         }
-       /*
-        * Two consecutive Country IE hints on the same wiphy.
-        * This should be picked up early by the driver/stack
-        */
-       if (WARN_ON(regdom_changes(country_ie_request->alpha2)))
+
+       if (regdom_changes(country_ie_request->alpha2))
                 return REG_REQ_OK;
         return REG_REQ_ALREADY_SET;
  }
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile

index b3831f4ba8457c1814766191b3a25af289eefbf8..4e511221a0c11cd53588fc2ae507848ab94339cb 100644 (file)
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,22 +1,23 @@
  TARGETS = breakpoints
  TARGETS += cpu-hotplug
  TARGETS += efivarfs
+TARGETS += exec
+TARGETS += firmware
+TARGETS += ftrace
  TARGETS += kcmp
  TARGETS += memfd
  TARGETS += memory-hotplug
-TARGETS += mqueue
  TARGETS += mount
+TARGETS += mqueue
  TARGETS += net
+TARGETS += powerpc
  TARGETS += ptrace
+TARGETS += size
+TARGETS += sysctl
  TARGETS += timers
-TARGETS += vm
-TARGETS += powerpc
  TARGETS += user
-TARGETS += sysctl
-TARGETS += firmware
-TARGETS += ftrace
-TARGETS += exec
-TARGETS += size
+TARGETS += vm
+#Please keep the TARGETS list alphabetically sorted
  
  TARGETS_HOTPLUG = cpu-hotplug
  TARGETS_HOTPLUG += memory-hotplug
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c

index 22fa819a9b6a7ba06b9c788bfc7b84ba0b07a0ea..1c0772b340d84f69a64f6deb773a40026879c977 100644 (file)
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer)
  
  static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
  {
+       int ret;
         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
  
         timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
-       kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-                           timer->irq->irq,
-                           timer->irq->level);
+       ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+                                 timer->irq->irq,
+                                 timer->irq->level);
+       WARN_ON(ret);
  }
  
  static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -307,12 +309,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
         timer_disarm(timer);
  }
  
-int kvm_timer_init(struct kvm *kvm)
+void kvm_timer_enable(struct kvm *kvm)
  {
-       if (timecounter && wqueue) {
-               kvm->arch.timer.cntvoff = kvm_phys_timer_read();
+       if (kvm->arch.timer.enabled)
+               return;
+
+       /*
+        * There is a potential race here between VCPUs starting for the first
+        * time, which may be enabling the timer multiple times.  That doesn't
+        * hurt though, because we're just setting a variable to the same
+        * variable that it already was.  The important thing is that all
+        * VCPUs have the enabled variable set, before entering the guest, if
+        * the arch timers are enabled.
+        */
+       if (timecounter && wqueue)
                 kvm->arch.timer.enabled = 1;
-       }
+}
  
-       return 0;
+void kvm_timer_init(struct kvm *kvm)
+{
+       kvm->arch.timer.cntvoff = kvm_phys_timer_read();
  }
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c

index aacdb59f30dedcd780ee29e2903d928194a42a5c..03affc7bf453244300552bb5a448546e9730c08a 100644 (file)
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -91,6 +91,7 @@
  #define ACCESS_WRITE_VALUE     (3 << 1)
  #define ACCESS_WRITE_MASK(x)   ((x) & (3 << 1))
  
+static int vgic_init(struct kvm *kvm);
  static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
  static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
  static void vgic_update_state(struct kvm *kvm);
@@ -1607,7 +1608,7 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
         }
  }
  
-static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
+static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
                                   unsigned int irq_num, bool level)
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
@@ -1643,9 +1644,10 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
                         vgic_dist_irq_clear_level(vcpu, irq_num);
                         if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
                                 vgic_dist_irq_clear_pending(vcpu, irq_num);
-               } else {
-                       vgic_dist_irq_clear_pending(vcpu, irq_num);
                 }
+
+               ret = false;
+               goto out;
         }
  
         enabled = vgic_irq_is_enabled(vcpu, irq_num);
@@ -1672,7 +1674,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
  out:
         spin_unlock(&dist->lock);
  
-       return ret;
+       return ret ? cpuid : -EINVAL;
  }
  
  /**
@@ -1692,11 +1694,26 @@ out:
  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
                         bool level)
  {
-       if (likely(vgic_initialized(kvm)) &&
-           vgic_update_irq_pending(kvm, cpuid, irq_num, level))
-               vgic_kick_vcpus(kvm);
+       int ret = 0;
+       int vcpu_id;
  
-       return 0;
+       if (unlikely(!vgic_initialized(kvm))) {
+               mutex_lock(&kvm->lock);
+               ret = vgic_init(kvm);
+               mutex_unlock(&kvm->lock);
+
+               if (ret)
+                       goto out;
+       }
+
+       vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
+       if (vcpu_id >= 0) {
+               /* kick the specified vcpu */
+               kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
+       }
+
+out:
+       return ret;
  }
  
  static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1726,39 +1743,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
  
         int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
         vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
-       vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+       vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
  
         if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
                 kvm_vgic_vcpu_destroy(vcpu);
                 return -ENOMEM;
         }
  
-       return 0;
-}
-
-/**
- * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
- * @vcpu: pointer to the vcpu struct
- *
- * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
- * this vcpu and enable the VGIC for this VCPU
- */
-static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-       int i;
-
-       for (i = 0; i < dist->nr_irqs; i++) {
-               if (i < VGIC_NR_PPIS)
-                       vgic_bitmap_set_irq_val(&dist->irq_enabled,
-                                               vcpu->vcpu_id, i, 1);
-               if (i < VGIC_NR_PRIVATE_IRQS)
-                       vgic_bitmap_set_irq_val(&dist->irq_cfg,
-                                               vcpu->vcpu_id, i, VGIC_CFG_EDGE);
-
-               vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
-       }
+       memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
  
         /*
          * Store the number of LRs per vcpu, so we don't have to go
@@ -1767,7 +1759,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
          */
         vgic_cpu->nr_lr = vgic->nr_lr;
  
-       vgic_enable(vcpu);
+       return 0;
  }
  
  void kvm_vgic_destroy(struct kvm *kvm)
@@ -1798,20 +1790,21 @@ void kvm_vgic_destroy(struct kvm *kvm)
         dist->irq_spi_cpu = NULL;
         dist->irq_spi_target = NULL;
         dist->irq_pending_on_cpu = NULL;
+       dist->nr_cpus = 0;
  }
  
  /*
   * Allocate and initialize the various data structures. Must be called
   * with kvm->lock held!
   */
-static int vgic_init_maps(struct kvm *kvm)
+static int vgic_init(struct kvm *kvm)
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
         struct kvm_vcpu *vcpu;
         int nr_cpus, nr_irqs;
-       int ret, i;
+       int ret, i, vcpu_id;
  
-       if (dist->nr_cpus)      /* Already allocated */
+       if (vgic_initialized(kvm))
                 return 0;
  
         nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
@@ -1859,16 +1852,28 @@ static int vgic_init_maps(struct kvm *kvm)
         if (ret)
                 goto out;
  
-       kvm_for_each_vcpu(i, vcpu, kvm) {
+       for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+               vgic_set_target_reg(kvm, 0, i);
+
+       kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
                 ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
                 if (ret) {
                         kvm_err("VGIC: Failed to allocate vcpu memory\n");
                         break;
                 }
-       }
  
-       for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
-               vgic_set_target_reg(kvm, 0, i);
+               for (i = 0; i < dist->nr_irqs; i++) {
+                       if (i < VGIC_NR_PPIS)
+                               vgic_bitmap_set_irq_val(&dist->irq_enabled,
+                                                       vcpu->vcpu_id, i, 1);
+                       if (i < VGIC_NR_PRIVATE_IRQS)
+                               vgic_bitmap_set_irq_val(&dist->irq_cfg,
+                                                       vcpu->vcpu_id, i,
+                                                       VGIC_CFG_EDGE);
+               }
+
+               vgic_enable(vcpu);
+       }
  
  out:
         if (ret)
@@ -1878,25 +1883,23 @@ out:
  }
  
  /**
- * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
   * @kvm: pointer to the kvm struct
   *
   * Map the virtual CPU interface into the VM before running any VCPUs.  We
   * can't do this at creation time, because user space must first set the
- * virtual CPU interface address in the guest physical address space.  Also
- * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ * virtual CPU interface address in the guest physical address space.
   */
-int kvm_vgic_init(struct kvm *kvm)
+int kvm_vgic_map_resources(struct kvm *kvm)
  {
-       struct kvm_vcpu *vcpu;
-       int ret = 0, i;
+       int ret = 0;
  
         if (!irqchip_in_kernel(kvm))
                 return 0;
  
         mutex_lock(&kvm->lock);
  
-       if (vgic_initialized(kvm))
+       if (vgic_ready(kvm))
                 goto out;
  
         if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
@@ -1906,7 +1909,11 @@ int kvm_vgic_init(struct kvm *kvm)
                 goto out;
         }
  
-       ret = vgic_init_maps(kvm);
+       /*
+        * Initialize the vgic if this hasn't already been done on demand by
+        * accessing the vgic state from userspace.
+        */
+       ret = vgic_init(kvm);
         if (ret) {
                 kvm_err("Unable to allocate maps\n");
                 goto out;
@@ -1920,9 +1927,6 @@ int kvm_vgic_init(struct kvm *kvm)
                 goto out;
         }
  
-       kvm_for_each_vcpu(i, vcpu, kvm)
-               kvm_vgic_vcpu_init(vcpu);
-
         kvm->arch.vgic.ready = true;
  out:
         if (ret)
@@ -2167,7 +2171,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
  
         mutex_lock(&dev->kvm->lock);
  
-       ret = vgic_init_maps(dev->kvm);
+       ret = vgic_init(dev->kvm);
         if (ret)
                 goto out;
  
@@ -2289,7 +2293,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
  
                 mutex_lock(&dev->kvm->lock);
  
-               if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+               if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
                         ret = -EBUSY;
                 else
                         dev->kvm->arch.vgic.nr_irqs = val;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c

index b0fb390943c6949e6a9635277191ee7177bce0c2..148b2392c762ba763a6ad09b314699c451b463d0 100644 (file)
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,9 +36,6 @@
  #include <linux/seqlock.h>
  #include <trace/events/kvm.h>
  
-#ifdef __KVM_HAVE_IOAPIC
-#include "ioapic.h"
-#endif
  #include "iodev.h"
  
  #ifdef CONFIG_HAVE_KVM_IRQFD
@@ -492,9 +489,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
         mutex_lock(&kvm->irq_lock);
         hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
         mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
         kvm_vcpu_request_scan_ioapic(kvm);
-#endif
  }
  
  void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -504,9 +499,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
         hlist_del_init_rcu(&kian->link);
         mutex_unlock(&kvm->irq_lock);
         synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
         kvm_vcpu_request_scan_ioapic(kvm);
-#endif
  }
  #endif
  
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 3cee7b167052b58e07c147abb65985865e39e0f9..f5283438ee05e165b50b693c2d864f248d82a90d 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -124,15 +124,6 @@ int vcpu_load(struct kvm_vcpu *vcpu)
  
         if (mutex_lock_killable(&vcpu->mutex))
                 return -EINTR;
-       if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
-               /* The thread running this VCPU changed. */
-               struct pid *oldpid = vcpu->pid;
-               struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
-               rcu_assign_pointer(vcpu->pid, newpid);
-               if (oldpid)
-                       synchronize_rcu();
-               put_pid(oldpid);
-       }
         cpu = get_cpu();
         preempt_notifier_register(&vcpu->preempt_notifier);
         kvm_arch_vcpu_load(vcpu, cpu);
@@ -468,9 +459,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
         if (r)
                 goto out_err_no_disable;
  
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
-       INIT_HLIST_HEAD(&kvm->mask_notifier_list);
-#endif
  #ifdef CONFIG_HAVE_KVM_IRQFD
         INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
  #endif
@@ -668,48 +656,46 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
         return 0;
  }
  
-static int cmp_memslot(const void *slot1, const void *slot2)
-{
-       struct kvm_memory_slot *s1, *s2;
-
-       s1 = (struct kvm_memory_slot *)slot1;
-       s2 = (struct kvm_memory_slot *)slot2;
-
-       if (s1->npages < s2->npages)
-               return 1;
-       if (s1->npages > s2->npages)
-               return -1;
-
-       return 0;
-}
-
  /*
- * Sort the memslots base on its size, so the larger slots
- * will get better fit.
+ * Insert memslot and re-sort memslots based on their GFN,
+ * so binary search could be used to lookup GFN.
+ * Sorting algorithm takes advantage of having initially
+ * sorted array and known changed memslot position.
   */
-static void sort_memslots(struct kvm_memslots *slots)
-{
-       int i;
-
-       sort(slots->memslots, KVM_MEM_SLOTS_NUM,
-             sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
-
-       for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-               slots->id_to_index[slots->memslots[i].id] = i;
-}
-
  static void update_memslots(struct kvm_memslots *slots,
                             struct kvm_memory_slot *new)
  {
-       if (new) {
-               int id = new->id;
-               struct kvm_memory_slot *old = id_to_memslot(slots, id);
-               unsigned long npages = old->npages;
+       int id = new->id;
+       int i = slots->id_to_index[id];
+       struct kvm_memory_slot *mslots = slots->memslots;
  
-               *old = *new;
-               if (new->npages != npages)
-                       sort_memslots(slots);
+       WARN_ON(mslots[i].id != id);
+       if (!new->npages) {
+               new->base_gfn = 0;
+               if (mslots[i].npages)
+                       slots->used_slots--;
+       } else {
+               if (!mslots[i].npages)
+                       slots->used_slots++;
         }
+
+       while (i < KVM_MEM_SLOTS_NUM - 1 &&
+              new->base_gfn <= mslots[i + 1].base_gfn) {
+               if (!mslots[i + 1].npages)
+                       break;
+               mslots[i] = mslots[i + 1];
+               slots->id_to_index[mslots[i].id] = i;
+               i++;
+       }
+       while (i > 0 &&
+              new->base_gfn > mslots[i - 1].base_gfn) {
+               mslots[i] = mslots[i - 1];
+               slots->id_to_index[mslots[i].id] = i;
+               i--;
+       }
+
+       mslots[i] = *new;
+       slots->id_to_index[mslots[i].id] = i;
  }
  
  static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
@@ -727,7 +713,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
  }
  
  static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
-               struct kvm_memslots *slots, struct kvm_memory_slot *new)
+               struct kvm_memslots *slots)
  {
         struct kvm_memslots *old_memslots = kvm->memslots;
  
@@ -738,7 +724,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
         WARN_ON(old_memslots->generation & 1);
         slots->generation = old_memslots->generation + 1;
  
-       update_memslots(slots, new);
         rcu_assign_pointer(kvm->memslots, slots);
         synchronize_srcu_expedited(&kvm->srcu);
  
@@ -760,7 +745,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
   *
   * Discontiguous memory is allowed, mostly for framebuffers.
   *
- * Must be called holding mmap_sem for write.
+ * Must be called holding kvm->slots_lock for write.
   */
  int __kvm_set_memory_region(struct kvm *kvm,
                             struct kvm_userspace_memory_region *mem)
@@ -866,15 +851,16 @@ int __kvm_set_memory_region(struct kvm *kvm,
                         goto out_free;
         }
  
+       slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+                       GFP_KERNEL);
+       if (!slots)
+               goto out_free;
+
         if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-               slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-                               GFP_KERNEL);
-               if (!slots)
-                       goto out_free;
                 slot = id_to_memslot(slots, mem->slot);
                 slot->flags |= KVM_MEMSLOT_INVALID;
  
-               old_memslots = install_new_memslots(kvm, slots, NULL);
+               old_memslots = install_new_memslots(kvm, slots);
  
                 /* slot was deleted or moved, clear iommu mapping */
                 kvm_iommu_unmap_pages(kvm, &old);
@@ -886,6 +872,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
                  *      - kvm_is_visible_gfn (mmu_check_roots)
                  */
                 kvm_arch_flush_shadow_memslot(kvm, slot);
+
+               /*
+                * We can re-use the old_memslots from above, the only difference
+                * from the currently installed memslots is the invalid flag.  This
+                * will get overwritten by update_memslots anyway.
+                */
                 slots = old_memslots;
         }
  
@@ -893,26 +885,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
         if (r)
                 goto out_slots;
  
-       r = -ENOMEM;
-       /*
-        * We can re-use the old_memslots from above, the only difference
-        * from the currently installed memslots is the invalid flag.  This
-        * will get overwritten by update_memslots anyway.
-        */
-       if (!slots) {
-               slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-                               GFP_KERNEL);
-               if (!slots)
-                       goto out_free;
-       }
-
         /* actual memory is freed via old in kvm_free_physmem_slot below */
         if (change == KVM_MR_DELETE) {
                 new.dirty_bitmap = NULL;
                 memset(&new.arch, 0, sizeof(new.arch));
         }
  
-       old_memslots = install_new_memslots(kvm, slots, &new);
+       update_memslots(slots, &new);
+       old_memslots = install_new_memslots(kvm, slots);
  
         kvm_arch_commit_memory_region(kvm, mem, &old, change);
  
@@ -1799,10 +1779,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target)
         rcu_read_unlock();
         if (!task)
                 return ret;
-       if (task->flags & PF_VCPU) {
-               put_task_struct(task);
-               return ret;
-       }
         ret = yield_to(task, 1);
         put_task_struct(task);
  
@@ -2065,6 +2041,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
                 r = -EINVAL;
                 if (arg)
                         goto out;
+               if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+                       /* The thread running this VCPU changed. */
+                       struct pid *oldpid = vcpu->pid;
+                       struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+                       rcu_assign_pointer(vcpu->pid, newpid);
+                       if (oldpid)
+                               synchronize_rcu();
+                       put_pid(oldpid);
+               }
                 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
                 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
                 break;
@@ -2599,8 +2584,6 @@ static long kvm_vm_ioctl(struct file *filp,
                 break;
         default:
                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
-               if (r == -ENOTTY)
-                       r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
         }
  out:
         return r;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 19 Dec 2014 04:21:32 +0000 (20:21 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 19 Dec 2014 04:21:32 +0000 (20:21 -0800)
.mailmap		patch \| blob \| history
Documentation/DocBook/media/v4l/compat.xml		patch \| blob \| history
Documentation/DocBook/media/v4l/pixfmt.xml		patch \| blob \| history
Documentation/DocBook/media/v4l/subdev-formats.xml		patch \| blob \| history
Documentation/DocBook/media/v4l/v4l2.xml		patch \| blob \| history
Documentation/devicetree/bindings/media/rcar_vin.txt		patch \| blob \| history
Documentation/ia64/kvm.txt	[deleted file]	patch \| blob \| history
Documentation/networking/fib_trie.txt		patch \| blob \| history
Documentation/video4linux/vivid.txt		patch \| blob \| history
Documentation/virtual/kvm/api.txt		patch \| blob \| history
Documentation/virtual/kvm/devices/vm.txt		patch \| blob \| history
Documentation/virtual/kvm/msr.txt		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
arch/arc/Kconfig		patch \| blob \| history
arch/arc/Makefile		patch \| blob \| history
arch/arc/boot/dts/nsimosci.dts		patch \| blob \| history
arch/arc/configs/fpga_noramfs_defconfig	[deleted file]	patch \| blob \| history
arch/arc/configs/nsim_700_defconfig	[moved from arch/arc/configs/fpga_defconfig with 100% similarity]	patch \| blob \| history
arch/arc/include/asm/irqflags.h		patch \| blob \| history
arch/arc/kernel/smp.c		patch \| blob \| history
arch/arm/include/asm/kvm_emulate.h		patch \| blob \| history
arch/arm/include/asm/kvm_host.h		patch \| blob \| history
arch/arm/include/asm/kvm_mmu.h		patch \| blob \| history
arch/arm/kvm/arm.c		patch \| blob \| history
arch/arm/kvm/guest.c		patch \| blob \| history
arch/arm/kvm/mmio.c		patch \| blob \| history
arch/arm/kvm/mmu.c		patch \| blob \| history
arch/arm/kvm/psci.c		patch \| blob \| history
arch/arm64/include/asm/kvm_emulate.h		patch \| blob \| history
arch/arm64/include/asm/kvm_host.h		patch \| blob \| history
arch/arm64/include/asm/kvm_mmu.h		patch \| blob \| history
arch/arm64/kvm/guest.c		patch \| blob \| history
arch/ia64/Kconfig		patch \| blob \| history
arch/ia64/Makefile		patch \| blob \| history
arch/ia64/include/asm/kvm_host.h	[deleted file]	patch \| blob \| history
arch/ia64/include/asm/pvclock-abi.h	[deleted file]	patch \| blob \| history
arch/ia64/include/uapi/asm/kvm.h	[deleted file]	patch \| blob \| history
arch/ia64/kvm/Kconfig	[deleted file]	patch \| blob \| history
arch/ia64/kvm/Makefile	[deleted file]	patch \| blob \| history
arch/ia64/kvm/asm-offsets.c	[deleted file]	patch \| blob \| history
arch/ia64/kvm/irq.h	[deleted file]	patch \| blob \| history
arch/ia64/kvm/kvm-ia64.c	[deleted file]	patch \| blob \| history
arch/ia64/kvm/kvm_fw.c	[deleted file]	patch \| blob \| history
arch/ia64/kvm/kvm_lib.c	[deleted file]	patch \| blob \| history
arch/ia64/kvm/kvm_minstate.h	[deleted file]	patch \| blob \| history
arch/ia64/kvm/lapic.h	[deleted file]	patch \| blob \| history
arch/ia64/kvm/memcpy.S	[deleted file]	patch \| blob \| history
arch/ia64/kvm/memset.S	[deleted file]	patch \| blob \| history
arch/ia64/kvm/misc.h	[deleted file]	patch \| blob \| history
arch/ia64/kvm/mmio.c	[deleted file]	patch \| blob \| history
arch/ia64/kvm/optvfault.S	[deleted file]	patch \| blob \| history
arch/ia64/kvm/process.c	[deleted file]	patch \| blob \| history
arch/ia64/kvm/trampoline.S	[deleted file]	patch \| blob \| history
arch/ia64/kvm/vcpu.c	[deleted file]	patch \| blob \| history
arch/ia64/kvm/vcpu.h	[deleted file]	patch \| blob \| history
arch/ia64/kvm/vmm.c	[deleted file]	patch \| blob \| history
arch/ia64/kvm/vmm_ivt.S	[deleted file]	patch \| blob \| history
arch/ia64/kvm/vti.h	[deleted file]	patch \| blob \| history
arch/ia64/kvm/vtlb.c	[deleted file]	patch \| blob \| history
arch/nios2/Makefile		patch \| blob \| history
arch/nios2/include/asm/io.h		patch \| blob \| history
arch/nios2/include/asm/uaccess.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_book3s.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_book3s_64.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_host.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_ppc.h		patch \| blob \| history
arch/powerpc/kernel/asm-offsets.c		patch \| blob \| history
arch/powerpc/kvm/Kconfig		patch \| blob \| history
arch/powerpc/kvm/book3s.c		patch \| blob \| history
arch/powerpc/kvm/book3s_32_mmu.c		patch \| blob \| history
arch/powerpc/kvm/book3s_64_mmu_hv.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_builtin.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_interrupts.S		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_ras.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_rm_mmu.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_rm_xics.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_rmhandlers.S		patch \| blob \| history
arch/powerpc/kvm/book3s_paired_singles.c		patch \| blob \| history
arch/powerpc/kvm/book3s_pr.c		patch \| blob \| history
arch/powerpc/kvm/book3s_xics.c		patch \| blob \| history
arch/powerpc/kvm/book3s_xics.h		patch \| blob \| history
arch/powerpc/kvm/e500.c		patch \| blob \| history
arch/powerpc/kvm/powerpc.c		patch \| blob \| history
arch/powerpc/kvm/trace_book3s.h	[new file with mode: 0644]	patch \| blob
arch/powerpc/kvm/trace_booke.h		patch \| blob \| history
arch/powerpc/kvm/trace_hv.h	[new file with mode: 0644]	patch \| blob
arch/powerpc/kvm/trace_pr.h		patch \| blob \| history
arch/s390/include/asm/kvm_host.h		patch \| blob \| history
arch/s390/include/asm/pgalloc.h		patch \| blob \| history
arch/s390/include/asm/sigp.h		patch \| blob \| history
arch/s390/kvm/gaccess.c		patch \| blob \| history
arch/s390/kvm/intercept.c		patch \| blob \| history
arch/s390/kvm/interrupt.c		patch \| blob \| history
arch/s390/kvm/kvm-s390.c		patch \| blob \| history
arch/s390/kvm/kvm-s390.h		patch \| blob \| history
arch/s390/kvm/priv.c		patch \| blob \| history
arch/s390/kvm/sigp.c		patch \| blob \| history
arch/s390/mm/pgtable.c		patch \| blob \| history
arch/sparc/mm/srmmu.c		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/include/asm/vmx.h		patch \| blob \| history
arch/x86/include/asm/xsave.h		patch \| blob \| history
arch/x86/include/uapi/asm/vmx.h		patch \| blob \| history
arch/x86/kernel/kvm.c		patch \| blob \| history
arch/x86/kernel/kvmclock.c		patch \| blob \| history
arch/x86/kernel/xsave.c		patch \| blob \| history
arch/x86/kvm/Makefile		patch \| blob \| history
arch/x86/kvm/assigned-dev.c	[moved from virt/kvm/assigned-dev.c with 97% similarity]	patch \| blob \| history
arch/x86/kvm/assigned-dev.h	[new file with mode: 0644]	patch \| blob
arch/x86/kvm/cpuid.c		patch \| blob \| history
arch/x86/kvm/emulate.c		patch \| blob \| history
arch/x86/kvm/ioapic.c	[moved from virt/kvm/ioapic.c with 98% similarity]	patch \| blob \| history
arch/x86/kvm/ioapic.h	[moved from virt/kvm/ioapic.h with 89% similarity]	patch \| blob \| history
arch/x86/kvm/iommu.c	[moved from virt/kvm/iommu.c with 96% similarity]	patch \| blob \| history
arch/x86/kvm/irq_comm.c	[moved from virt/kvm/irq_comm.c with 89% similarity]	patch \| blob \| history
arch/x86/kvm/lapic.c		patch \| blob \| history
arch/x86/kvm/lapic.h		patch \| blob \| history
arch/x86/kvm/mmu.c		patch \| blob \| history
arch/x86/kvm/svm.c		patch \| blob \| history
arch/x86/kvm/trace.h		patch \| blob \| history
arch/x86/kvm/vmx.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/kvm/x86.h		patch \| blob \| history
drivers/bluetooth/ath3k.c		patch \| blob \| history
drivers/bluetooth/btusb.c		patch \| blob \| history
drivers/infiniband/Kconfig		patch \| blob \| history
drivers/infiniband/core/Makefile		patch \| blob \| history
drivers/infiniband/core/addr.c		patch \| blob \| history
drivers/infiniband/core/multicast.c		patch \| blob \| history
drivers/infiniband/core/umem.c		patch \| blob \| history
drivers/infiniband/core/umem_odp.c	[new file with mode: 0644]	patch \| blob
drivers/infiniband/core/umem_rbtree.c	[new file with mode: 0644]	patch \| blob
drivers/infiniband/core/uverbs.h		patch \| blob \| history
drivers/infiniband/core/uverbs_cmd.c		patch \| blob \| history
drivers/infiniband/core/uverbs_main.c		patch \| blob \| history
drivers/infiniband/core/verbs.c		patch \| blob \| history
drivers/infiniband/hw/amso1100/c2_provider.c		patch \| blob \| history
drivers/infiniband/hw/cxgb4/cm.c		patch \| blob \| history
drivers/infiniband/hw/cxgb4/device.c		patch \| blob \| history
drivers/infiniband/hw/cxgb4/mem.c		patch \| blob \| history
drivers/infiniband/hw/cxgb4/qp.c		patch \| blob \| history
drivers/infiniband/hw/ehca/ehca_mrmw.c		patch \| blob \| history
drivers/infiniband/hw/ipath/ipath_mr.c		patch \| blob \| history
drivers/infiniband/hw/mlx4/mr.c		patch \| blob \| history
drivers/infiniband/hw/mlx5/Makefile		patch \| blob \| history
drivers/infiniband/hw/mlx5/main.c		patch \| blob \| history
drivers/infiniband/hw/mlx5/mem.c		patch \| blob \| history
drivers/infiniband/hw/mlx5/mlx5_ib.h		patch \| blob \| history
drivers/infiniband/hw/mlx5/mr.c		patch \| blob \| history
drivers/infiniband/hw/mlx5/odp.c	[new file with mode: 0644]	patch \| blob
drivers/infiniband/hw/mlx5/qp.c		patch \| blob \| history
drivers/infiniband/hw/nes/nes_verbs.c		patch \| blob \| history
drivers/infiniband/hw/ocrdma/ocrdma_ah.c		patch \| blob \| history
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c		patch \| blob \| history
drivers/infiniband/hw/qib/qib_mr.c		patch \| blob \| history
drivers/infiniband/ulp/ipoib/ipoib.h		patch \| blob \| history
drivers/infiniband/ulp/ipoib/ipoib_cm.c		patch \| blob \| history
drivers/infiniband/ulp/ipoib/ipoib_ib.c		patch \| blob \| history
drivers/infiniband/ulp/ipoib/ipoib_main.c		patch \| blob \| history
drivers/infiniband/ulp/ipoib/ipoib_multicast.c		patch \| blob \| history
drivers/infiniband/ulp/ipoib/ipoib_verbs.c		patch \| blob \| history
drivers/infiniband/ulp/iser/iscsi_iser.c		patch \| blob \| history
drivers/infiniband/ulp/iser/iscsi_iser.h		patch \| blob \| history
drivers/infiniband/ulp/iser/iser_initiator.c		patch \| blob \| history
drivers/infiniband/ulp/iser/iser_memory.c		patch \| blob \| history
drivers/infiniband/ulp/iser/iser_verbs.c		patch \| blob \| history
drivers/infiniband/ulp/srp/ib_srp.c		patch \| blob \| history
drivers/media/Kconfig		patch \| blob \| history
drivers/media/Makefile		patch \| blob \| history
drivers/media/i2c/Kconfig		patch \| blob \| history
drivers/media/i2c/Makefile		patch \| blob \| history
drivers/media/pci/cx88/cx88-blackbird.c		patch \| blob \| history
drivers/media/pci/cx88/cx88-dvb.c		patch \| blob \| history
drivers/media/pci/cx88/cx88-mpeg.c		patch \| blob \| history
drivers/media/pci/cx88/cx88-vbi.c		patch \| blob \| history
drivers/media/pci/cx88/cx88-video.c		patch \| blob \| history
drivers/media/pci/cx88/cx88.h		patch \| blob \| history
drivers/media/platform/Kconfig		patch \| blob \| history
drivers/media/platform/Makefile		patch \| blob \| history
drivers/media/platform/soc_camera/rcar_vin.c		patch \| blob \| history
drivers/media/platform/vivid/vivid-vid-out.c		patch \| blob \| history
drivers/media/usb/Kconfig		patch \| blob \| history
drivers/media/usb/Makefile		patch \| blob \| history
drivers/media/v4l2-core/v4l2-ioctl.c		patch \| blob \| history
drivers/net/dsa/Kconfig		patch \| blob \| history
drivers/net/ethernet/broadcom/Kconfig		patch \| blob \| history
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c		patch \| blob \| history
drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h		patch \| blob \| history
drivers/net/ethernet/cadence/macb.c		patch \| blob \| history
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c		patch \| blob \| history
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h		patch \| blob \| history
drivers/net/ethernet/cirrus/cs89x0.c		patch \| blob \| history
drivers/net/ethernet/emulex/benet/be_main.c		patch \| blob \| history
drivers/net/ethernet/freescale/fec_main.c		patch \| blob \| history
drivers/net/ethernet/intel/i40e/i40e_main.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx4/en_netdev.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx4/fw.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx4/fw.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx4/main.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/eq.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/fw.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/qp.c		patch \| blob \| history
drivers/net/ethernet/smsc/Kconfig		patch \| blob \| history
drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c		patch \| blob \| history
drivers/net/macvtap.c		patch \| blob \| history
drivers/net/phy/Kconfig		patch \| blob \| history
drivers/net/phy/Makefile		patch \| blob \| history
drivers/net/phy/fixed_phy.c	[moved from drivers/net/phy/fixed.c with 100% similarity]	patch \| blob \| history
drivers/net/tun.c		patch \| blob \| history
drivers/net/wireless/brcm80211/brcmsmac/main.c		patch \| blob \| history
drivers/net/wireless/hostap/hostap_cs.c		patch \| blob \| history
drivers/net/wireless/rtlwifi/rtl8192ce/hw.c		patch \| blob \| history
drivers/net/wireless/rtlwifi/rtl8192cu/hw.c		patch \| blob \| history
drivers/net/wireless/rtlwifi/rtl8821ae/dm.c		patch \| blob \| history
drivers/net/wireless/zd1211rw/zd_chip.c		patch \| blob \| history
drivers/net/xen-netback/common.h		patch \| blob \| history
drivers/net/xen-netback/interface.c		patch \| blob \| history
drivers/net/xen-netback/netback.c		patch \| blob \| history
drivers/net/xen-netback/xenbus.c		patch \| blob \| history
drivers/net/xen-netfront.c		patch \| blob \| history
drivers/staging/media/Kconfig		patch \| blob \| history
drivers/staging/media/Makefile		patch \| blob \| history
drivers/staging/media/parport/Kconfig	[moved from drivers/media/parport/Kconfig with 65% similarity]	patch \| blob \| history
drivers/staging/media/parport/Makefile	[moved from drivers/media/parport/Makefile with 100% similarity]	patch \| blob \| history
drivers/staging/media/parport/bw-qcam.c	[moved from drivers/media/parport/bw-qcam.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/parport/c-qcam.c	[moved from drivers/media/parport/c-qcam.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/parport/pms.c	[moved from drivers/media/parport/pms.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/parport/w9966.c	[moved from drivers/media/parport/w9966.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/tlg2300/Kconfig	[moved from drivers/media/usb/tlg2300/Kconfig with 63% similarity]	patch \| blob \| history
drivers/staging/media/tlg2300/Makefile	[moved from drivers/media/usb/tlg2300/Makefile with 100% similarity]	patch \| blob \| history
drivers/staging/media/tlg2300/pd-alsa.c	[moved from drivers/media/usb/tlg2300/pd-alsa.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/tlg2300/pd-common.h	[moved from drivers/media/usb/tlg2300/pd-common.h with 100% similarity]	patch \| blob \| history
drivers/staging/media/tlg2300/pd-dvb.c	[moved from drivers/media/usb/tlg2300/pd-dvb.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/tlg2300/pd-main.c	[moved from drivers/media/usb/tlg2300/pd-main.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/tlg2300/pd-radio.c	[moved from drivers/media/usb/tlg2300/pd-radio.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/tlg2300/pd-video.c	[moved from drivers/media/usb/tlg2300/pd-video.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/tlg2300/vendorcmds.h	[moved from drivers/media/usb/tlg2300/vendorcmds.h with 100% similarity]	patch \| blob \| history
drivers/staging/media/vino/Kconfig	[new file with mode: 0644]	patch \| blob
drivers/staging/media/vino/Makefile	[new file with mode: 0644]	patch \| blob
drivers/staging/media/vino/indycam.c	[moved from drivers/media/platform/indycam.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/vino/indycam.h	[moved from drivers/media/platform/indycam.h with 100% similarity]	patch \| blob \| history
drivers/staging/media/vino/saa7191.c	[moved from drivers/media/i2c/saa7191.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/vino/saa7191.h	[moved from drivers/media/i2c/saa7191.h with 100% similarity]	patch \| blob \| history
drivers/staging/media/vino/vino.c	[moved from drivers/media/platform/vino.c with 100% similarity]	patch \| blob \| history
drivers/staging/media/vino/vino.h	[moved from drivers/media/platform/vino.h with 100% similarity]	patch \| blob \| history
fs/hfsplus/catalog.c		patch \| blob \| history
fs/hfsplus/dir.c		patch \| blob \| history
fs/hfsplus/hfsplus_fs.h		patch \| blob \| history
fs/hfsplus/super.c		patch \| blob \| history
fs/namespace.c		patch \| blob \| history
fs/ocfs2/alloc.c		patch \| blob \| history
fs/ocfs2/alloc.h		patch \| blob \| history
fs/ocfs2/aops.c		patch \| blob \| history
fs/ocfs2/dir.c		patch \| blob \| history
fs/ocfs2/dlm/dlmmaster.c		patch \| blob \| history
fs/ocfs2/file.c		patch \| blob \| history
fs/proc/meminfo.c		patch \| blob \| history
include/kvm/arm_arch_timer.h		patch \| blob \| history
include/kvm/arm_vgic.h		patch \| blob \| history
include/linux/cma.h		patch \| blob \| history
include/linux/kvm_host.h		patch \| blob \| history
include/linux/kvm_types.h		patch \| blob \| history
include/linux/mlx5/device.h		patch \| blob \| history
include/linux/mlx5/driver.h		patch \| blob \| history
include/linux/mlx5/qp.h		patch \| blob \| history
include/linux/phy_fixed.h		patch \| blob \| history
include/rdma/ib_umem.h		patch \| blob \| history
include/rdma/ib_umem_odp.h	[new file with mode: 0644]	patch \| blob
include/rdma/ib_verbs.h		patch \| blob \| history
include/uapi/linux/if_tun.h		patch \| blob \| history
include/uapi/linux/kvm.h		patch \| blob \| history
include/uapi/linux/v4l2-mediabus.h		patch \| blob \| history
include/uapi/rdma/ib_user_verbs.h		patch \| blob \| history
lib/show_mem.c		patch \| blob \| history
mm/cma.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mempolicy.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/zsmalloc.c		patch \| blob \| history
net/bluetooth/hci_conn.c		patch \| blob \| history
net/bluetooth/hci_core.c		patch \| blob \| history
net/bluetooth/hci_event.c		patch \| blob \| history
net/bluetooth/l2cap_core.c		patch \| blob \| history
net/bluetooth/mgmt.c		patch \| blob \| history
net/bluetooth/smp.c		patch \| blob \| history
net/core/rtnetlink.c		patch \| blob \| history
net/ipv4/geneve.c		patch \| blob \| history
net/ipv4/ip_gre.c		patch \| blob \| history
net/ipv4/ip_tunnel.c		patch \| blob \| history
net/mac80211/chan.c		patch \| blob \| history
net/mac80211/key.c		patch \| blob \| history
net/mac80211/mlme.c		patch \| blob \| history
net/mac80211/rx.c		patch \| blob \| history
net/netlink/af_netlink.c		patch \| blob \| history
net/rds/message.c		patch \| blob \| history
net/wireless/chan.c		patch \| blob \| history
net/wireless/nl80211.c		patch \| blob \| history
net/wireless/reg.c		patch \| blob \| history
tools/testing/selftests/Makefile		patch \| blob \| history
virt/kvm/arm/arch_timer.c		patch \| blob \| history
virt/kvm/arm/vgic.c		patch \| blob \| history
virt/kvm/eventfd.c		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history