DEFINE_GUEST_HANDLE(uint64_t);
DEFINE_GUEST_HANDLE(uint32_t);
DEFINE_GUEST_HANDLE(xen_pfn_t);
+DEFINE_GUEST_HANDLE(xen_ulong_t);
/* Maximum number of virtual CPUs in multi-processor guests. */
#define MAX_VIRT_CPUS 1
#include <xen/features.h>
#include <xen/platform_pci.h>
#include <xen/xenbus.h>
+#include <xen/page.h>
+#include <xen/xen-ops.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <linux/interrupt.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
+#include <linux/mm.h>
+
struct start_info _xen_start_info;
struct start_info *xen_start_info = &_xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
+/* These are unused until we support booting "pre-ballooned" */
+unsigned long xen_released_pages;
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
+
/* TODO: to be removed */
__read_mostly int xen_have_vector_callback;
EXPORT_SYMBOL_GPL(xen_have_vector_callback);
static __read_mostly int xen_events_irq = -1;
+/* map fgmfn of domid to lpfn in the current domain */
+static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
+ unsigned int domid)
+{
+ int rc;
+ struct xen_add_to_physmap_range xatp = {
+ .domid = DOMID_SELF,
+ .foreign_domid = domid,
+ .size = 1,
+ .space = XENMAPSPACE_gmfn_foreign,
+ };
+ xen_ulong_t idx = fgmfn;
+ xen_pfn_t gpfn = lpfn;
+
+ set_xen_guest_handle(xatp.idxs, &idx);
+ set_xen_guest_handle(xatp.gpfns, &gpfn);
+
+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
+ if (rc) {
+ pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n",
+ rc, lpfn, fgmfn);
+ return 1;
+ }
+ return 0;
+}
+
+struct remap_data {
+ xen_pfn_t fgmfn; /* foreign domain's gmfn */
+ pgprot_t prot;
+ domid_t domid;
+ struct vm_area_struct *vma;
+ int index;
+ struct page **pages;
+ struct xen_remap_mfn_info *info;
+};
+
+static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
+ void *data)
+{
+ struct remap_data *info = data;
+ struct page *page = info->pages[info->index++];
+ unsigned long pfn = page_to_pfn(page);
+ pte_t pte = pfn_pte(pfn, info->prot);
+
+ if (map_foreign_page(pfn, info->fgmfn, info->domid))
+ return -EFAULT;
+ set_pte_at(info->vma->vm_mm, addr, ptep, pte);
+
+ return 0;
+}
+
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
- unsigned long mfn, int nr,
- pgprot_t prot, unsigned domid)
+ xen_pfn_t mfn, int nr,
+ pgprot_t prot, unsigned domid,
+ struct page **pages)
{
- return -ENOSYS;
+ int err;
+ struct remap_data data;
+
+ /* TBD: Batching, current sole caller only does page at a time */
+ if (nr > 1)
+ return -EINVAL;
+
+ data.fgmfn = mfn;
+ data.prot = prot;
+ data.domid = domid;
+ data.vma = vma;
+ data.index = 0;
+ data.pages = pages;
+ err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+ remap_pte_fn, &data);
+ return err;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
+ int nr, struct page **pages)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ struct xen_remove_from_physmap xrp;
+ unsigned long rc, pfn;
+
+ pfn = page_to_pfn(pages[i]);
+
+ xrp.domid = DOMID_SELF;
+ xrp.gpfn = pfn;
+ rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+ if (rc) {
+ pr_warn("Failed to unmap pfn:%lx rc:%ld\n",
+ pfn, rc);
+ return rc;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
+
/*
* see Documentation/devicetree/bindings/arm/xen.txt for the
* documentation of the Xen Device Tree format.
return 0;
}
postcore_initcall(xen_init_events);
-
-/* XXX: only until balloon is properly working */
-int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
-{
- *pages = alloc_pages(highmem ? GFP_HIGHUSER : GFP_KERNEL,
- get_order(nr_pages));
- if (*pages == NULL)
- return -ENOMEM;
- return 0;
-}
-EXPORT_SYMBOL_GPL(alloc_xenballooned_pages);
-
-void free_xenballooned_pages(int nr_pages, struct page **pages)
-{
- kfree(*pages);
- *pages = NULL;
-}
-EXPORT_SYMBOL_GPL(free_xenballooned_pages);
return _hypercall4(int, update_va_mapping, va,
new_val.pte, new_val.pte >> 32, flags);
}
+extern int __must_check xen_HYPERVISOR_event_channel_op_compat(int, void *);
static inline int
HYPERVISOR_event_channel_op(int cmd, void *arg)
{
int rc = _hypercall2(int, event_channel_op, cmd, arg);
- if (unlikely(rc == -ENOSYS)) {
- struct evtchn_op op;
- op.cmd = cmd;
- memcpy(&op.u, arg, sizeof(op.u));
- rc = _hypercall1(int, event_channel_op_compat, &op);
- memcpy(arg, &op.u, sizeof(op.u));
- }
+ if (unlikely(rc == -ENOSYS))
+ rc = xen_HYPERVISOR_event_channel_op_compat(cmd, arg);
return rc;
}
return _hypercall3(int, console_io, cmd, count, str);
}
+extern int __must_check xen_HYPERVISOR_physdev_op_compat(int, void *);
+
static inline int
HYPERVISOR_physdev_op(int cmd, void *arg)
{
int rc = _hypercall2(int, physdev_op, cmd, arg);
- if (unlikely(rc == -ENOSYS)) {
- struct physdev_op op;
- op.cmd = cmd;
- memcpy(&op.u, arg, sizeof(op.u));
- rc = _hypercall1(int, physdev_op_compat, &op);
- memcpy(arg, &op.u, sizeof(op.u));
- }
+ if (unlikely(rc == -ENOSYS))
+ rc = xen_HYPERVISOR_physdev_op_compat(cmd, arg);
return rc;
}
DEFINE_GUEST_HANDLE(uint64_t);
DEFINE_GUEST_HANDLE(uint32_t);
DEFINE_GUEST_HANDLE(xen_pfn_t);
+DEFINE_GUEST_HANDLE(xen_ulong_t);
#endif
#ifndef HYPERVISOR_VIRT_START
struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
- unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
+ union {
+ struct {
+ /* PV: GDT (machine frames, # ents).*/
+ unsigned long gdt_frames[16], gdt_ents;
+ } pv;
+ struct {
+ /* PVH: GDTR addr and size */
+ unsigned long gdtaddr, gdtsz;
+ } pvh;
+ } u;
unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
/* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
{
unsigned long pfn = mfn_to_pfn(mfn);
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return mfn;
if (get_phys_to_machine(pfn) != mfn)
return -1; /* force !pfn_valid() */
return pfn;
bool "Xen guest support"
select PARAVIRT
select PARAVIRT_CLOCK
+ select XEN_HAVE_PVMMU
depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
depends on X86_CMPXCHG && X86_TSC
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
+config XEN_X86_PVH
+ bool "Support for running as a PVH guest (EXPERIMENTAL)"
+ depends on X86_64 && XEN && EXPERIMENTAL
+ default n
+ help
+ This option enables support for running as a PVH guest (PV guest
+ using hardware extensions) under a suitably capable hypervisor.
+ This option is EXPERIMENTAL because the hypervisor interfaces
+ which it uses is not yet considered stable therefore backwards and
+ forwards compatibility is not yet guaranteed. If unsure, say N.
__read_mostly int xen_have_vector_callback;
EXPORT_SYMBOL_GPL(xen_have_vector_callback);
+#define xen_pvh_domain() (xen_pv_domain() && \
+ xen_feature(XENFEAT_auto_translated_physmap) && \
+ xen_have_vector_callback)
/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
struct xen_extraversion extra;
HYPERVISOR_xen_version(XENVER_extraversion, &extra);
- printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
- pv_info.name);
+ pr_info("Booting paravirtualized kernel %son %s\n",
+ xen_feature(XENFEAT_auto_translated_physmap) ?
+ "with PVH extensions " : "", pv_info.name);
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
break;
}
- asm(XEN_EMULATE_PREFIX "cpuid"
- : "=a" (*ax),
- "=b" (*bx),
- "=c" (*cx),
- "=d" (*dx)
- : "0" (*ax), "2" (*cx));
+ if (xen_pvh_domain())
+ native_cpuid(ax, bx, cx, dx);
+ else
+ asm(XEN_EMULATE_PREFIX "cpuid"
+ : "=a" (*ax),
+ "=b" (*bx),
+ "=c" (*cx),
+ "=d" (*dx)
+ : "0" (*ax), "2" (*cx));
*bx &= maskebx;
*cx &= maskecx;
HYPERVISOR_shared_info =
(struct shared_info *)__va(xen_start_info->shared_info);
+ /* PVH TBD/FIXME: vcpu info placement in phase 2 */
+ if (xen_pvh_domain())
+ return;
+
#ifndef CONFIG_SMP
/* In UP this is as good a place as any to set up shared info */
xen_setup_vcpu_info_placement();
*/
static void __init xen_setup_stackprotector(void)
{
+ /* PVH TBD/FIXME: investigate setup_stack_canary_segment */
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ switch_to_new_gdt(0);
+ return;
+ }
pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
pv_cpu_ops.load_gdt = xen_load_gdt_boot;
pv_cpu_ops.load_gdt = xen_load_gdt;
}
+static void __init xen_pvh_early_guest_init(void)
+{
+ if (xen_feature(XENFEAT_hvm_callback_vector))
+ xen_have_vector_callback = 1;
+
+#ifdef CONFIG_X86_32
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ xen_raw_printk("ERROR: 32bit PVH guests are not supported\n");
+ BUG();
+ }
+#endif
+}
+
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
xen_domain_type = XEN_PV_DOMAIN;
+ xen_setup_features();
+ xen_pvh_early_guest_init();
xen_setup_machphys_mapping();
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
- pv_cpu_ops = xen_cpu_ops;
pv_apic_ops = xen_apic_ops;
+ if (xen_pvh_domain())
+ pv_cpu_ops.cpuid = xen_cpuid;
+ else
+ pv_cpu_ops = xen_cpu_ops;
x86_init.resources.memory_setup = xen_memory_setup;
x86_init.oem.arch_setup = xen_arch_setup;
/* Work out if we support NX */
x86_configure_nx();
- xen_setup_features();
-
/* Get mfn list */
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_build_dynamic_phys_to_machine();
/* set the limit of our address space */
xen_reserve_top();
- /* We used to do this in xen_arch_setup, but that is too late on AMD
- * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
- * which pokes 0xcf8 port.
- */
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- xen_raw_printk("physdev_op failed %d\n", rc);
+ /* PVH: runs at default kernel iopl of 0 */
+ if (!xen_pvh_domain()) {
+ /*
+ * We used to do this in xen_arch_setup, but that is too late
+ * on AMD were early_cpu_init (run before ->arch_setup()) calls
+ * early_amd_init which pokes 0xcf8 port.
+ */
+ set_iopl.iopl = 1;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+ if (rc != 0)
+ xen_raw_printk("physdev_op failed %d\n", rc);
+ }
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
#endif
}
+/* Use a pfn in RAM, may move to MMIO before kexec.
+ * This function also called for PVH dom0 */
void __ref xen_hvm_init_shared_info(void)
{
int cpu;
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
+#include <xen/features.h>
#include <xen/events.h>
#include <asm/xen/hypercall.h>
void __init xen_init_irq_ops(void)
{
- pv_irq_ops = xen_irq_ops;
+ /* For PVH we use default pv_irq_ops settings */
+ if (!xen_feature(XENFEAT_hvm_callback_vector))
+ pv_irq_ops = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}
#include <xen/interface/version.h>
#include <xen/interface/memory.h>
#include <xen/hvc-console.h>
+#include <xen/balloon.h>
#include "multicalls.h"
#include "mmu.h"
__xen_set_pte(ptep, pteval);
}
+void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
+ int nr_mfns, int add_mapping)
+{
+ struct physdev_map_iomem iomem;
+
+ iomem.first_gfn = pfn;
+ iomem.first_mfn = mfn;
+ iomem.nr_mfns = nr_mfns;
+ iomem.add_mapping = add_mapping;
+
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_map_iomem, &iomem))
+ BUG();
+}
+
static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
#endif
paging_init();
xen_setup_shared_info();
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
#ifdef CONFIG_X86_64
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long new_mfn_list;
static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
{
struct mmuext_op op;
+
+ if (xen_feature(XENFEAT_writable_page_tables))
+ return;
+
op.cmd = cmd;
op.arg1.mfn = pfn_to_mfn(pfn);
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
+ /* recall for PVH, page tables are native. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
BUG();
}
pte_t *pte = v;
int i;
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
/* All levels are converted the same way, so just treat them
as ptes. */
for (i = 0; i < PTRS_PER_PTE; i++)
(*pt_end)--;
}
}
+
/*
* Set up the initial kernel pagetable.
*
* but that's enough to get __va working. We need to fill in the rest
* of the physical mapping once some sort of allocator has been set
* up.
+ * NOTE: for PVH, the page tables are native.
*/
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{
* structure to attach it to, so make sure we just set kernel
* pgd.
*/
- xen_mc_batch();
- __xen_write_cr3(true, __pa(init_level4_pgt));
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-
+ if (xen_feature(XENFEAT_writable_page_tables)) {
+ native_write_cr3(__pa(init_level4_pgt));
+ } else {
+ xen_mc_batch();
+ __xen_write_cr3(true, __pa(init_level4_pgt));
+ xen_mc_issue(PARAVIRT_LAZY_CPU);
+ }
/* We can't that easily rip out L3 and L2, as the Xen pagetables are
* set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
* the initial domain. For guests using the toolstack, they are in:
void __init xen_init_mmu_ops(void)
{
- x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_init = xen_pagetable_init;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
+ return;
+ }
+ x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
}
#endif
+/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space
+ * creating new guest on PVH dom0 and needs to map domU pages.
+ */
+static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
+ unsigned int domid)
+{
+ int rc;
+ struct xen_add_to_physmap_range xatp = {
+ .domid = DOMID_SELF,
+ .foreign_domid = domid,
+ .size = 1,
+ .space = XENMAPSPACE_gmfn_foreign,
+ };
+ xen_ulong_t idx = fgmfn;
+ xen_pfn_t gpfn = lpfn;
+
+ set_xen_guest_handle(xatp.idxs, &idx);
+ set_xen_guest_handle(xatp.gpfns, &gpfn);
+
+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
+ if (rc)
+ pr_warn("d0: Failed to map pfn (0x%lx) to mfn (0x%lx) rc:%d\n",
+ lpfn, fgmfn, rc);
+ return rc;
+}
+
+static int pvh_rem_xen_p2m(unsigned long spfn, int count)
+{
+ struct xen_remove_from_physmap xrp;
+ int i, rc;
+
+ for (i = 0; i < count; i++) {
+ xrp.domid = DOMID_SELF;
+ xrp.gpfn = spfn+i;
+ rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+ if (rc) {
+ pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
+ spfn+i, rc, i);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+struct pvh_remap_data {
+ unsigned long fgmfn; /* foreign domain's gmfn */
+ pgprot_t prot;
+ domid_t domid;
+ int index;
+ struct page **pages;
+};
+
+static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
+ void *data)
+{
+ int rc;
+ struct pvh_remap_data *remap = data;
+ unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
+ pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
+
+ rc = pvh_add_to_xen_p2m(pfn, remap->fgmfn, remap->domid);
+ if (rc)
+ return rc;
+ native_set_pte(ptep, pteval);
+
+ return 0;
+}
+
+static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long mfn, int nr,
+ pgprot_t prot, unsigned domid,
+ struct page **pages)
+{
+ int err;
+ struct pvh_remap_data pvhdata;
+
+ BUG_ON(!pages);
+
+ pvhdata.fgmfn = mfn;
+ pvhdata.prot = prot;
+ pvhdata.domid = domid;
+ pvhdata.index = 0;
+ pvhdata.pages = pages;
+ err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+ pvh_map_pte_fn, &pvhdata);
+ flush_tlb_all();
+ return err;
+}
+
#define REMAP_BATCH_SIZE 16
struct remap_data {
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
- unsigned long mfn, int nr,
- pgprot_t prot, unsigned domid)
+ xen_pfn_t mfn, int nr,
+ pgprot_t prot, unsigned domid,
+ struct page **pages)
+
{
struct remap_data rmd;
struct mmu_update mmu_update[REMAP_BATCH_SIZE];
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* We need to update the local page tables and the xen HAP */
+ return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid, pages);
+ }
rmd.mfn = mfn;
rmd.prot = prot;
return err;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+
+/* Returns: 0 success */
+int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
+ int numpgs, struct page **pages)
+{
+ if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ while (numpgs--) {
+
+ /* the mmu has already cleaned up the process mmu resources at
+ * this point (lookup_address will return NULL). */
+ unsigned long pfn = page_to_pfn(pages[numpgs]);
+
+ pvh_rem_xen_p2m(pfn, 1);
+ }
+ /* We don't need to flush tlbs because as part of pvh_rem_xen_p2m(),
+ * the hypervisor will do tlb flushes after removing the p2m entries
+ * from the EPT/NPT */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
extern void xen_init_mmu_ops(void);
extern void xen_hvm_init_mmu_ops(void);
+extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
+ int nr_mfns, int add_mapping);
#endif /* _XEN_MMU_H */
{
unsigned topidx, mididx, idx;
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
return true;
}
#include <xen/interface/memory.h>
#include <xen/interface/physdev.h>
#include <xen/features.h>
+#include "mmu.h"
#include "xen-ops.h"
#include "vdso.h"
memblock_reserve(start, size);
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
xen_max_p2m_pfn = PFN_DOWN(start + size);
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
.domid = DOMID_SELF
};
unsigned long len = 0;
+ int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
unsigned long pfn;
int ret;
continue;
frame = mfn;
} else {
- if (mfn != INVALID_P2M_ENTRY)
+ if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
continue;
frame = pfn;
}
*identity += set_phys_range_identity(start_pfn, end_pfn);
}
+/* For PVH, the pfns [0..MAX] are mapped to mfn's in the EPT/NPT. The mfns
+ * are released as part of this 1:1 mapping hypercall back to the dom heap.
+ * Also, we map the entire IO space, ie, beyond max_pfn_mapped.
+ */
+static void __init xen_pvh_identity_map_chunk(unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long *released,
+ unsigned long *identity, unsigned long max_pfn)
+{
+ unsigned long pfn;
+ int numpfns = 1, add_mapping = 1;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++)
+ xen_set_clr_mmio_pvh_pte(pfn, pfn, numpfns, add_mapping);
+
+ if (start_pfn <= max_pfn) {
+ unsigned long end = min(max_pfn_mapped, end_pfn);
+ *released += end - start_pfn;
+ }
+ *identity += end_pfn - start_pfn;
+}
+
static unsigned long __init xen_set_identity_and_release(
const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
unsigned long identity = 0;
const struct e820entry *entry;
int i;
+ int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
/*
* Combine non-RAM regions and gaps until a RAM region (or the
if (entry->type == E820_RAM)
end_pfn = PFN_UP(entry->addr);
- if (start_pfn < end_pfn)
- xen_set_identity_and_release_chunk(
- start_pfn, end_pfn, nr_pages,
- &released, &identity);
-
+ if (start_pfn < end_pfn) {
+ if (xlated_phys) {
+ xen_pvh_identity_map_chunk(start_pfn,
+ end_pfn, &released, &identity,
+ nr_pages);
+ } else {
+ xen_set_identity_and_release_chunk(
+ start_pfn, end_pfn, nr_pages,
+ &released, &identity);
+ }
+ }
start = end;
}
}
#endif /* CONFIG_X86_64 */
}
-void __init xen_arch_setup(void)
+/* Non auto translated PV domain, ie, it's not PVH. */
+static __init void xen_pvmmu_arch_setup(void)
{
- xen_panic_handler_init();
-
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_pae_extended_cr3);
+ HYPERVISOR_vm_assist(VMASST_CMD_enable,
+ VMASST_TYPE_pae_extended_cr3);
if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
xen_enable_sysenter();
xen_enable_syscall();
+}
+
+/* This function not called for HVM domain */
+void __init xen_arch_setup(void)
+{
+ xen_panic_handler_init();
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ xen_pvmmu_arch_setup();
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
touch_softlockup_watchdog();
preempt_disable();
- xen_enable_sysenter();
- xen_enable_syscall();
-
+ /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
+ if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
+ xen_enable_sysenter();
+ xen_enable_syscall();
+ }
cpu = smp_processor_id();
smp_store_cpu_info(cpu);
cpu_data(cpu).x86_max_cores = 1;
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();
- /* We've switched to the "real" per-cpu gdt, so make sure the
- old memory can be recycled */
- make_lowmem_page_readwrite(xen_initial_gdt);
-
+ if (!xen_feature(XENFEAT_writable_page_tables)) {
+ /* We've switched to the "real" per-cpu gdt, so make sure the
+ * old memory can be recycled */
+ make_lowmem_page_readwrite(xen_initial_gdt);
+ }
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
gdt = get_cpu_gdt_table(cpu);
ctxt->flags = VGCF_IN_KERNEL;
- ctxt->user_regs.ds = __USER_DS;
- ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.ss = __KERNEL_DS;
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
- ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
- xen_copy_trap_info(ctxt->trap_ctxt);
+ if (xen_feature(XENFEAT_auto_translated_physmap) &&
+ xen_feature(XENFEAT_supervisor_mode_kernel)) {
+ /* Note: PVH is not supported on x86_32. */
+#ifdef CONFIG_X86_64
+ ctxt->user_regs.ds = __KERNEL_DS;
+ ctxt->user_regs.es = 0;
+ ctxt->user_regs.gs = 0;
+
+ /* GUEST_GDTR_BASE and */
+ ctxt->u.pvh.gdtaddr = (unsigned long)gdt;
+ /* GUEST_GDTR_LIMIT in the VMCS. */
+ ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1);
+
+ ctxt->gs_base_user = (unsigned long)
+ per_cpu(irq_stack_union.gs_base, cpu);
+#endif
+ } else {
+ ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
+ ctxt->user_regs.ds = __USER_DS;
+ ctxt->user_regs.es = __USER_DS;
- ctxt->ldt_ents = 0;
+ xen_copy_trap_info(ctxt->trap_ctxt);
- BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+ ctxt->ldt_ents = 0;
- gdt_mfn = arbitrary_virt_to_mfn(gdt);
- make_lowmem_page_readonly(gdt);
- make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
+ BUG_ON((unsigned long)gdt & ~PAGE_MASK);
- ctxt->gdt_frames[0] = gdt_mfn;
- ctxt->gdt_ents = GDT_ENTRIES;
+ gdt_mfn = arbitrary_virt_to_mfn(gdt);
+ make_lowmem_page_readonly(gdt);
+ make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
- ctxt->user_regs.cs = __KERNEL_CS;
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
+ ctxt->u.pv.gdt_frames[0] = gdt_mfn;
+ ctxt->u.pv.gdt_ents = GDT_ENTRIES;
- ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.sp0;
+ ctxt->kernel_ss = __KERNEL_DS;
+ ctxt->kernel_sp = idle->thread.sp0;
#ifdef CONFIG_X86_32
- ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->failsafe_callback_cs = __KERNEL_CS;
+ ctxt->event_callback_cs = __KERNEL_CS;
+ ctxt->failsafe_callback_cs = __KERNEL_CS;
#endif
- ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
- ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
+ ctxt->event_callback_eip =
+ (unsigned long)xen_hypervisor_callback;
+ ctxt->failsafe_callback_eip =
+ (unsigned long)xen_failsafe_callback;
+ }
+ ctxt->user_regs.cs = __KERNEL_CS;
+ ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
#include <xen/interface/elfnote.h>
#include <asm/xen/interface.h>
+#ifdef CONFIG_XEN_X86_PVH
+#define FEATURES_PVH "|writable_descriptor_tables" \
+ "|auto_translated_physmap" \
+ "|supervisor_mode_kernel" \
+ "|hvm_callback_vector"
+#else
+#define FEATURES_PVH /* Not supported */
+#endif
+
__INIT
ENTRY(startup_xen)
cld
#endif
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
+ ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb"FEATURES_PVH)
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
return;
BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
- gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod);
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
+ npo.copy_prod);
+ BUG_ON(ret != 0);
while ((skb = __skb_dequeue(&rxq)) != NULL) {
sco = (struct skb_cb_overlay *)skb->cb;
static void xen_netbk_tx_action(struct xen_netbk *netbk)
{
unsigned nr_gops;
+ int ret;
nr_gops = xen_netbk_tx_build_gops(netbk);
if (nr_gops == 0)
return;
-
- gnttab_batch_copy(netbk->tx_copy_ops, nr_gops);
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
+ netbk->tx_copy_ops, nr_gops);
+ BUG_ON(ret);
xen_netbk_tx_submit(netbk);
+
}
static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
Allow kernel fetching MCE error from Xen platform and
converting it into Linux mcelog format for mcelog tools
+config XEN_HAVE_PVMMU
+ bool
+
endmenu
ifneq ($(CONFIG_ARM),y)
-obj-y += manage.o balloon.o
+obj-y += manage.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
endif
-obj-y += grant-table.o features.o events.o
+obj-y += grant-table.o features.o events.o balloon.o fallback.o
obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
set_phys_to_machine(pfn, frame_list[i]);
+#ifdef CONFIG_XEN_HAVE_PVMMU
/* Link back into the page tables if not highmem. */
- if (xen_pv_domain() && !PageHighMem(page)) {
+ if (xen_pv_domain() && !PageHighMem(page) &&
+ !xen_feature(XENFEAT_auto_translated_physmap)) {
+
int ret;
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
0);
BUG_ON(ret);
}
+#endif
/* Relinquish the page back to the allocator. */
ClearPageReserved(page);
scrub_page(page);
+#ifdef CONFIG_XEN_HAVE_PVMMU
if (xen_pv_domain() && !PageHighMem(page)) {
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
- BUG_ON(ret);
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ __pte_ma(0), 0);
+ BUG_ON(ret);
+ }
}
-
+#endif
}
/* Ensure that ballooned highmem pages don't have kmaps. */
#include <xen/xen.h>
#include <xen/xenbus.h>
+#include <xen/features.h>
#include <asm/xen/hypervisor.h>
#include <asm/cpu.h>
static struct notifier_block xsn_cpu = {
.notifier_call = setup_cpu_watcher };
- if (!xen_pv_domain())
+ /* PVH TBD/FIXME: future work */
+ if (!xen_pv_domain() || xen_feature(XENFEAT_auto_translated_physmap))
return -ENODEV;
register_xenstore_notifier(&xsn_cpu);
}
EXPORT_SYMBOL_GPL(xen_set_callback_via);
-#ifdef CONFIG_XEN_PVHVM
+#ifdef CONFIG_X86
/* Vector callbacks are better than PCI interrupts to receive event
* channel notifications because we can receive vector callbacks on any
* vcpu and we don't need PCI support or APIC interactions. */
if (xen_initial_domain())
pci_xen_initial_domain();
+ if (xen_feature(XENFEAT_hvm_callback_vector)) {
+ xen_callback_vector();
+ return;
+ }
+
+ /* PVH: TBD/FIXME: debug and fix eio map to work with pvh */
+
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
--- /dev/null
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <asm/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+int xen_HYPERVISOR_event_channel_op_compat(int cmd, void *arg)
+{
+ struct evtchn_op op;
+ int rc;
+
+ op.cmd = cmd;
+ memcpy(&op.u, arg, sizeof(op.u));
+ rc = _hypercall1(int, event_channel_op_compat, &op);
+
+ switch (cmd) {
+ case EVTCHNOP_close:
+ case EVTCHNOP_send:
+ case EVTCHNOP_bind_vcpu:
+ case EVTCHNOP_unmask:
+ /* no output */
+ break;
+
+#define COPY_BACK(eop) \
+ case EVTCHNOP_##eop: \
+ memcpy(arg, &op.u.eop, sizeof(op.u.eop)); \
+ break
+
+ COPY_BACK(bind_interdomain);
+ COPY_BACK(bind_virq);
+ COPY_BACK(bind_pirq);
+ COPY_BACK(status);
+ COPY_BACK(alloc_unbound);
+ COPY_BACK(bind_ipi);
+#undef COPY_BACK
+
+ default:
+ WARN_ON(rc != -ENOSYS);
+ break;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(xen_HYPERVISOR_event_channel_op_compat);
+
+int xen_HYPERVISOR_physdev_op_compat(int cmd, void *arg)
+{
+ struct physdev_op op;
+ int rc;
+
+ op.cmd = cmd;
+ memcpy(&op.u, arg, sizeof(op.u));
+ rc = _hypercall1(int, physdev_op_compat, &op);
+
+ switch (cmd) {
+ case PHYSDEVOP_IRQ_UNMASK_NOTIFY:
+ case PHYSDEVOP_set_iopl:
+ case PHYSDEVOP_set_iobitmap:
+ case PHYSDEVOP_apic_write:
+ /* no output */
+ break;
+
+#define COPY_BACK(pop, fld) \
+ case PHYSDEVOP_##pop: \
+ memcpy(arg, &op.u.fld, sizeof(op.u.fld)); \
+ break
+
+ COPY_BACK(irq_status_query, irq_status_query);
+ COPY_BACK(apic_read, apic_op);
+ COPY_BACK(ASSIGN_VECTOR, irq_op);
+#undef COPY_BACK
+
+ default:
+ WARN_ON(rc != -ENOSYS);
+ break;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(xen_HYPERVISOR_physdev_op_compat);
if (!xen_domain())
return -ENODEV;
- use_ptemod = xen_pv_domain();
+ use_ptemod = xen_pv_domain() &&
+ !xen_feature(XENFEAT_auto_translated_physmap);
err = misc_register(&gntdev_miscdev);
if (err != 0) {
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <linux/io.h>
-#include <linux/delay.h>
#include <linux/hardirq.h>
#include <xen/xen.h>
}
EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
-/* Handling of paged out grant targets (GNTST_eagain) */
-#define MAX_DELAY 256
-static inline void
-gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
- const char *func)
-{
- unsigned delay = 1;
-
- do {
- BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
- if (*status == GNTST_eagain)
- msleep(delay++);
- } while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
-
- if (delay >= MAX_DELAY) {
- printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm);
- *status = GNTST_bad_page;
- }
-}
-
-void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
-{
- struct gnttab_map_grant_ref *op;
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
- BUG();
- for (op = batch; op < batch + count; op++)
- if (op->status == GNTST_eagain)
- gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
- &op->status, __func__);
-}
-EXPORT_SYMBOL_GPL(gnttab_batch_map);
-
-void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
-{
- struct gnttab_copy *op;
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
- BUG();
- for (op = batch; op < batch + count; op++)
- if (op->status == GNTST_eagain)
- gnttab_retry_eagain_gop(GNTTABOP_copy, op,
- &op->status, __func__);
-}
-EXPORT_SYMBOL_GPL(gnttab_batch_copy);
-
int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count)
if (ret)
return ret;
- /* Retry eagain maps */
- for (i = 0; i < count; i++)
- if (map_ops[i].status == GNTST_eagain)
- gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
- &map_ops[i].status, __func__);
-
if (xen_feature(XENFEAT_auto_translated_physmap))
return ret;
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
+ unsigned long start_gpfn;
xen_pfn_t *frames;
unsigned int nr_gframes = end_idx + 1;
int rc;
- if (xen_hvm_domain()) {
+ if (xen_hvm_domain() || xen_feature(XENFEAT_auto_translated_physmap)) {
struct xen_add_to_physmap xatp;
unsigned int i = end_idx;
rc = 0;
+
+ if (xen_hvm_domain())
+ start_gpfn = xen_hvm_resume_frames >> PAGE_SHIFT;
+ else
+ start_gpfn = virt_to_pfn(gnttab_shared.addr);
/*
* Loop backwards, so that the first hypercall has the largest
* index, ensuring that the table will grow only once.
xatp.domid = DOMID_SELF;
xatp.idx = i;
xatp.space = XENMAPSPACE_grant_table;
- xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
+ xatp.gpfn = start_gpfn + i;
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
if (rc != 0) {
printk(KERN_WARNING
int rc;
struct gnttab_set_version gsv;
- if (xen_hvm_domain())
+ if (xen_hvm_domain() || xen_feature(XENFEAT_auto_translated_physmap))
gsv.version = 1;
else
gsv.version = 2;
int gnttab_resume(void)
{
unsigned int max_nr_gframes;
+ char *kmsg = "Failed to kmalloc pages for pv in hvm grant frames\n";
gnttab_request_version();
max_nr_gframes = gnttab_max_grant_frames();
if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
+ /* PVH note: xen will free existing kmalloc'd mfn in
+ * XENMEM_add_to_physmap. TBD/FIXME: use xen ballooning instead of
+ * kmalloc(). */
+ if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap) &&
+ !gnttab_shared.addr) {
+ gnttab_shared.addr =
+ kmalloc(max_nr_gframes * PAGE_SIZE, GFP_KERNEL);
+ if (!gnttab_shared.addr) {
+ pr_warn("%s", kmsg);
+ return -ENOMEM;
+ }
+ }
if (xen_pv_domain())
return gnttab_map(0, nr_grant_frames - 1);
#include <xen/features.h>
#include <xen/page.h>
#include <xen/xen-ops.h>
+#include <xen/balloon.h>
#include "privcmd.h"
MODULE_LICENSE("GPL");
+#define PRIV_VMA_LOCKED ((void *)1)
+
#ifndef HAVE_ARCH_PRIVCMD_MMAP
static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
#endif
msg->va & PAGE_MASK,
msg->mfn, msg->npages,
vma->vm_page_prot,
- st->domain);
+ st->domain, NULL);
if (rc < 0)
return rc;
if (!xen_initial_domain())
return -EPERM;
+ /* We only support privcmd_ioctl_mmap_batch for auto translated. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -ENOSYS;
+
if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
return -EFAULT;
domid_t domain;
unsigned long va;
struct vm_area_struct *vma;
+ int index;
/* A tristate:
* 0 for no errors
* 1 if at least one error has happened (and no
xen_pfn_t __user *user_mfn;
};
+/* auto translated dom0 note: if domU being created is PV, then mfn is
+ * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
+ */
static int mmap_batch_fn(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
+ struct vm_area_struct *vma = st->vma;
+ struct page **pages = vma->vm_private_data;
+ struct page *cur_page = NULL;
int ret;
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ cur_page = pages[st->index++];
+
ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
- st->vma->vm_page_prot, st->domain);
+ st->vma->vm_page_prot, st->domain,
+ &cur_page);
/* Store error code for second pass. */
*(st->err++) = ret;
return __put_user(*mfnp, st->user_mfn++);
}
+/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
+ * the vma with the page info to use later.
+ * Returns: 0 if success, otherwise -errno
+ */
+static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
+{
+ int rc;
+ struct page **pages;
+
+ pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
+ if (pages == NULL)
+ return -ENOMEM;
+
+ rc = alloc_xenballooned_pages(numpgs, pages, 0);
+ if (rc != 0) {
+ pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
+ numpgs, rc);
+ kfree(pages);
+ return -ENOMEM;
+ }
+ BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
+ vma->vm_private_data = pages;
+
+ return 0;
+}
+
static struct vm_operations_struct privcmd_vm_ops;
static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
up_write(&mm->mmap_sem);
goto out;
}
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = alloc_empty_pages(vma, m.num);
+ if (ret < 0) {
+ up_write(&mm->mmap_sem);
+ goto out;
+ }
+ }
state.domain = m.dom;
state.vma = vma;
state.va = m.addr;
+ state.index = 0;
state.global_error = 0;
state.err = err_array;
return ret;
}
+static void privcmd_close(struct vm_area_struct *vma)
+{
+ struct page **pages = vma->vm_private_data;
+ int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages))
+ return;
+
+ xen_unmap_domain_mfn_range(vma, numpgs, pages);
+ free_xenballooned_pages(numpgs, pages);
+ kfree(pages);
+}
+
static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
}
static struct vm_operations_struct privcmd_vm_ops = {
+ .close = privcmd_close,
.fault = privcmd_fault
};
static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
{
- return (xchg(&vma->vm_private_data, (void *)1) == NULL);
+ return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
}
const struct file_operations xen_privcmd_fops = {
#include <xen/grant_table.h>
#include <xen/xenbus.h>
#include <xen/xen.h>
+#include <xen/features.h>
#include "xenbus_probe.h"
op.host_addr = arbitrary_virt_to_machine(pte).maddr;
- gnttab_batch_map(&op, 1);
+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+ BUG();
if (op.status != GNTST_okay) {
free_vm_area(area);
gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref,
dev->otherend_id);
- gnttab_batch_map(&op, 1);
+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+ BUG();
if (op.status != GNTST_okay) {
xenbus_dev_fatal(dev, op.status,
void __init xenbus_ring_ops_init(void)
{
- if (xen_pv_domain())
+ if (xen_pv_domain() && !xen_feature(XENFEAT_auto_translated_physmap))
ring_ops = &ring_ops_pv;
else
ring_ops = &ring_ops_hvm;
struct gnttab_map_grant_ref *kunmap_ops,
struct page **pages, unsigned int count);
-/* Perform a batch of grant map/copy operations. Retry every batch slot
- * for which the hypervisor returns GNTST_eagain. This is typically due
- * to paged out target frames.
- *
- * Will retry for 1, 2, ... 255 ms, i.e. 256 times during 32 seconds.
- *
- * Return value in each iand every status field of the batch guaranteed
- * to not be GNTST_eagain.
- */
-void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count);
-void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count);
-
#endif /* __ASM_GNTTAB_H__ */
};
DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
+#define XENMAPSPACE_shared_info 0 /* shared info page */
+#define XENMAPSPACE_grant_table 1 /* grant table page */
+#define XENMAPSPACE_gmfn 2 /* GMFN */
+#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */
+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom,
+ * XENMEM_add_to_physmap_range only.
+ */
+
/*
* Sets the GPFN at which a particular page appears in the specified guest's
* pseudophysical address space.
uint16_t size;
/* Source mapping space. */
-#define XENMAPSPACE_shared_info 0 /* shared info page */
-#define XENMAPSPACE_grant_table 1 /* grant table page */
unsigned int space;
/* Index into source mapping space. */
/*** REMOVED ***/
/*#define XENMEM_translate_gpfn_list 8*/
+#define XENMEM_add_to_physmap_range 23
+struct xen_add_to_physmap_range {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+ uint16_t space; /* => enum phys_map_space */
+
+ /* Number of pages to go through */
+ uint16_t size;
+ domid_t foreign_domid; /* IFF gmfn_foreign */
+
+ /* Indexes into space being mapped. */
+ GUEST_HANDLE(xen_ulong_t) idxs;
+
+ /* GPFN in domid where the source mapping page should appear. */
+ GUEST_HANDLE(xen_pfn_t) gpfns;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range);
+
/*
* Returns the pseudo-physical memory map as it was when the domain
* was started (specified by XENMEM_set_memory_map).
* during a driver critical region.
*/
extern spinlock_t xen_reservation_lock;
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap 15
+struct xen_remove_from_physmap {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+
+ /* GPFN of the current mapping of the page. */
+ xen_pfn_t gpfn;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
+
#endif /* __XEN_PUBLIC_MEMORY_H__ */
} u;
};
+#define PHYSDEVOP_map_iomem 30
+struct physdev_map_iomem {
+ /* IN */
+ uint64_t first_gfn;
+ uint64_t first_mfn;
+ uint32_t nr_mfns;
+ uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */
+
+};
+
/*
* Notify that some PIRQ-bound event channels have been unmasked.
* ** This command is obsolete since interface version 0x00030202 and is **
#define INCLUDE_XEN_OPS_H
#include <linux/percpu.h>
+#include <asm/xen/interface.h>
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
struct vm_area_struct;
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
- unsigned long mfn, int nr,
- pgprot_t prot, unsigned domid);
+ xen_pfn_t mfn, int nr,
+ pgprot_t prot, unsigned domid,
+ struct page **pages);
+int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
+ int numpgs, struct page **pages);
#endif /* INCLUDE_XEN_OPS_H */