]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
x86, fpu: split FPU state from task struct - v5
authorSuresh Siddha <suresh.b.siddha@intel.com>
Mon, 10 Mar 2008 22:28:04 +0000 (15:28 -0700)
committerIngo Molnar <mingo@elte.hu>
Sat, 19 Apr 2008 17:19:55 +0000 (19:19 +0200)
Split the FPU save area from the task struct. This allows easy migration
of FPU context, and it's generally cleaner. It also allows the following
two optimizations:

1) only allocate when the application actually uses FPU, so in the first
lazy FPU trap. This could save memory for non-fpu using apps. Next patch
does this lazy allocation.

2) allocate the right size for the actual cpu rather than 512 bytes always.
Patches enabling xsave/xrstor support (coming shortly) will take advantage
of this.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
16 files changed:
arch/x86/kernel/Makefile
arch/x86/kernel/i387.c
arch/x86/kernel/process.c [new file with mode: 0644]
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/traps_32.c
arch/x86/kernel/traps_64.c
arch/x86/math-emu/fpu_entry.c
arch/x86/math-emu/fpu_system.h
arch/x86/math-emu/reg_ld_str.c
include/asm-x86/i387.h
include/asm-x86/processor.h
include/asm-x86/thread_info.h
include/asm-x86/thread_info_32.h
include/asm-x86/thread_info_64.h
kernel/fork.c

index c3920ea8ac56f99020c2de9562b422c97ec2a0c3..7a2a2e93e84b48dda98d5e4951978478af7855c5 100644 (file)
@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64)  += pci-nommu_64.o bugs_64.o
 obj-y                  += tsc_$(BITS).o io_delay.o rtc.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline.o
+obj-y                          += process.o
 obj-y                          += i387.o
 obj-y                          += ptrace.o
 obj-y                          += ds.o
index 8f8102d967b3f4111c58be6dccbd6e4192fc3864..baf632b221d43366b1f026b28add587e249e5840 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/regset.h>
 #include <linux/sched.h>
+#include <linux/bootmem.h>
 
 #include <asm/sigcontext.h>
 #include <asm/processor.h>
 #endif
 
 static unsigned int            mxcsr_feature_mask __read_mostly = 0xffffffffu;
+unsigned int xstate_size;
+static struct i387_fxsave_struct fx_scratch __cpuinitdata;
 
-void mxcsr_feature_mask_init(void)
+void __cpuinit mxcsr_feature_mask_init(void)
 {
        unsigned long mask = 0;
 
        clts();
        if (cpu_has_fxsr) {
-               memset(&current->thread.i387.fxsave, 0,
-                      sizeof(struct i387_fxsave_struct));
-               asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
-               mask = current->thread.i387.fxsave.mxcsr_mask;
+               memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
+               asm volatile("fxsave %0" : : "m" (fx_scratch));
+               mask = fx_scratch.mxcsr_mask;
                if (mask == 0)
                        mask = 0x0000ffbf;
        }
@@ -53,6 +55,17 @@ void mxcsr_feature_mask_init(void)
        stts();
 }
 
+void __init init_thread_xstate(void)
+{
+       if (cpu_has_fxsr)
+               xstate_size = sizeof(struct i387_fxsave_struct);
+#ifdef CONFIG_X86_32
+       else
+               xstate_size = sizeof(struct i387_fsave_struct);
+#endif
+       init_task.thread.xstate = alloc_bootmem(xstate_size);
+}
+
 #ifdef CONFIG_X86_64
 /*
  * Called at bootup to set up the initial FPU state that is later cloned
@@ -61,10 +74,6 @@ void mxcsr_feature_mask_init(void)
 void __cpuinit fpu_init(void)
 {
        unsigned long oldcr0 = read_cr0();
-       extern void __bad_fxsave_alignment(void);
-
-       if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
-               __bad_fxsave_alignment();
 
        set_in_cr4(X86_CR4_OSFXSR);
        set_in_cr4(X86_CR4_OSXMMEXCPT);
@@ -93,18 +102,19 @@ void init_fpu(struct task_struct *tsk)
        }
 
        if (cpu_has_fxsr) {
-               memset(&tsk->thread.i387.fxsave, 0,
-                      sizeof(struct i387_fxsave_struct));
-               tsk->thread.i387.fxsave.cwd = 0x37f;
+               struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
+
+               memset(fx, 0, xstate_size);
+               fx->cwd = 0x37f;
                if (cpu_has_xmm)
-                       tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT;
+                       fx->mxcsr = MXCSR_DEFAULT;
        } else {
-               memset(&tsk->thread.i387.fsave, 0,
-                      sizeof(struct i387_fsave_struct));
-               tsk->thread.i387.fsave.cwd = 0xffff037fu;
-               tsk->thread.i387.fsave.swd = 0xffff0000u;
-               tsk->thread.i387.fsave.twd = 0xffffffffu;
-               tsk->thread.i387.fsave.fos = 0xffff0000u;
+               struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
+               memset(fp, 0, xstate_size);
+               fp->cwd = 0xffff037fu;
+               fp->swd = 0xffff0000u;
+               fp->twd = 0xffffffffu;
+               fp->fos = 0xffff0000u;
        }
        /*
         * Only the device not available exception or ptrace can call init_fpu.
@@ -132,7 +142,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
        init_fpu(target);
 
        return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-                                  &target->thread.i387.fxsave, 0, -1);
+                                  &target->thread.xstate->fxsave, 0, -1);
 }
 
 int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
@@ -148,12 +158,12 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
        set_stopped_child_used_math(target);
 
        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                &target->thread.i387.fxsave, 0, -1);
+                                &target->thread.xstate->fxsave, 0, -1);
 
        /*
         * mxcsr reserved bits must be masked to zero for security reasons.
         */
-       target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+       target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
 
        return ret;
 }
@@ -233,7 +243,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
 static void
 convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
 {
-       struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
+       struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
        struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
        struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
        int i;
@@ -273,7 +283,7 @@ static void convert_to_fxsr(struct task_struct *tsk,
                            const struct user_i387_ia32_struct *env)
 
 {
-       struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
+       struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
        struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
        struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
        int i;
@@ -310,7 +320,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 
        if (!cpu_has_fxsr) {
                return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-                                          &target->thread.i387.fsave, 0, -1);
+                                          &target->thread.xstate->fsave, 0,
+                                          -1);
        }
 
        if (kbuf && pos == 0 && count == sizeof(env)) {
@@ -338,7 +349,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 
        if (!cpu_has_fxsr) {
                return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                         &target->thread.i387.fsave, 0, -1);
+                                         &target->thread.xstate->fsave, 0, -1);
        }
 
        if (pos > 0 || count < sizeof(env))
@@ -358,11 +369,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
 {
        struct task_struct *tsk = current;
+       struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
 
        unlazy_fpu(tsk);
-       tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
-       if (__copy_to_user(buf, &tsk->thread.i387.fsave,
-                          sizeof(struct i387_fsave_struct)))
+       fp->status = fp->swd;
+       if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
                return -1;
        return 1;
 }
@@ -370,6 +381,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
 static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
 {
        struct task_struct *tsk = current;
+       struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
        struct user_i387_ia32_struct env;
        int err = 0;
 
@@ -379,12 +391,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
        if (__copy_to_user(buf, &env, sizeof(env)))
                return -1;
 
-       err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
+       err |= __put_user(fx->swd, &buf->status);
        err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
        if (err)
                return -1;
 
-       if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
+       if (__copy_to_user(&buf->_fxsr_env[0], fx,
                           sizeof(struct i387_fxsave_struct)))
                return -1;
        return 1;
@@ -417,7 +429,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
        struct task_struct *tsk = current;
 
        clear_fpu(tsk);
-       return __copy_from_user(&tsk->thread.i387.fsave, buf,
+       return __copy_from_user(&tsk->thread.xstate->fsave, buf,
                                sizeof(struct i387_fsave_struct));
 }
 
@@ -428,10 +440,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
        int err;
 
        clear_fpu(tsk);
-       err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
+       err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
                               sizeof(struct i387_fxsave_struct));
        /* mxcsr reserved bits must be masked to zero for security reasons */
-       tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+       tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
        if (err || __copy_from_user(&env, buf, sizeof(env)))
                return 1;
        convert_to_fxsr(tsk, &env);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
new file mode 100644 (file)
index 0000000..ead24ef
--- /dev/null
@@ -0,0 +1,35 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+static struct kmem_cache *task_xstate_cachep;
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+       *dst = *src;
+       dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
+       if (!dst->thread.xstate)
+               return -ENOMEM;
+       WARN_ON((unsigned long)dst->thread.xstate & 15);
+       memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
+       return 0;
+}
+
+void free_thread_info(struct thread_info *ti)
+{
+       kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate);
+       ti->task->thread.xstate = NULL;
+
+       free_pages((unsigned long)(ti), get_order(THREAD_SIZE));
+}
+
+void arch_task_cache_init(void)
+{
+        task_xstate_cachep =
+               kmem_cache_create("task_xstate", xstate_size,
+                                 __alignof__(union thread_xstate),
+                                 SLAB_PANIC, NULL);
+}
index a3790a3f8a8399cf53aaad1557730ca7e2726d11..3890a5dd25f926241bb3e43d0d2a6014647a4bb5 100644 (file)
@@ -703,7 +703,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
 
        /* we're going to use this soon, after a few expensive things */
        if (next_p->fpu_counter > 5)
-               prefetch(&next->i387.fxsave);
+               prefetch(next->xstate);
 
        /*
         * Reload esp0.
index 4c13b1406c7049f3ad36b71e503263c0b5e92d61..b795e831afd65b226f590bbb1472e9d20f29367d 100644 (file)
@@ -682,7 +682,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
        /* we're going to use this soon, after a few expensive things */
        if (next_p->fpu_counter>5)
-               prefetch(&next->i387.fxsave);
+               prefetch(next->xstate);
 
        /*
         * Reload esp0, LDT and the page table pointer:
index dc4273010f2a2e098f075bd1b33c91bc3225e3d7..8d136a73ce8e37d9710bdbb1505b963a32533a6a 100644 (file)
@@ -1208,11 +1208,6 @@ void __init trap_init(void)
 #endif
        set_trap_gate(19, &simd_coprocessor_error);
 
-       /*
-        * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
-        * Generate a build-time error if the alignment is wrong.
-        */
-       BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
        if (cpu_has_fxsr) {
                printk(KERN_INFO "Enabling fast FPU save and restore... ");
                set_in_cr4(X86_CR4_OSFXSR);
@@ -1233,6 +1228,7 @@ void __init trap_init(void)
 
        set_bit(SYSCALL_VECTOR, used_vectors);
 
+       init_thread_xstate();
        /*
         * Should be a barrier for any external CPU state:
         */
index 6d883b13ef4f5114d3b7a3fad4e7ba48cb9cfb91..dc0cb497eec38d4a142d274f6eac7764b6ce6560 100644 (file)
@@ -1128,7 +1128,7 @@ asmlinkage void math_state_restore(void)
 
        if (!used_math())
                init_fpu(me);
-       restore_fpu_checking(&me->thread.i387.fxsave);
+       restore_fpu_checking(&me->thread.xstate->fxsave);
        task_thread_info(me)->status |= TS_USEDFPU;
        me->fpu_counter++;
 }
@@ -1163,6 +1163,10 @@ void __init trap_init(void)
        set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
 #endif
        
+       /*
+        * initialize the per thread extended state:
+        */
+        init_thread_xstate();
        /*
         * Should be a barrier for any external CPU state.
         */
index 4bab3b14539242ceeddf3a1580617f71c9227710..6e38d877ea7725fb8d94d2c91a0ee7daab2585d1 100644 (file)
@@ -678,7 +678,7 @@ int fpregs_soft_set(struct task_struct *target,
                    unsigned int pos, unsigned int count,
                    const void *kbuf, const void __user *ubuf)
 {
-       struct i387_soft_struct *s387 = &target->thread.i387.soft;
+       struct i387_soft_struct *s387 = &target->thread.xstate->soft;
        void *space = s387->st_space;
        int ret;
        int offset, other, i, tags, regnr, tag, newtop;
@@ -730,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target,
                    unsigned int pos, unsigned int count,
                    void *kbuf, void __user *ubuf)
 {
-       struct i387_soft_struct *s387 = &target->thread.i387.soft;
+       struct i387_soft_struct *s387 = &target->thread.xstate->soft;
        const void *space = s387->st_space;
        int ret;
        int offset = (S387->ftop & 7) * 10, other = 80 - offset;
index a3ae28c49dddad063de9c177f7d0f13480df3da5..13488fa153e0c81dacc9370edc1f0a2128d0babd 100644 (file)
@@ -35,8 +35,8 @@
 #define SEG_EXPAND_DOWN(s)     (((s).b & ((1 << 11) | (1 << 10))) \
                                 == (1 << 10))
 
-#define I387                   (current->thread.i387)
-#define FPU_info               (I387.soft.info)
+#define I387                   (current->thread.xstate)
+#define FPU_info               (I387->soft.info)
 
 #define FPU_CS                 (*(unsigned short *) &(FPU_info->___cs))
 #define FPU_SS                 (*(unsigned short *) &(FPU_info->___ss))
 #define FPU_EIP                        (FPU_info->___eip)
 #define FPU_ORIG_EIP           (FPU_info->___orig_eip)
 
-#define FPU_lookahead           (I387.soft.lookahead)
+#define FPU_lookahead           (I387->soft.lookahead)
 
 /* nz if ip_offset and cs_selector are not to be set for the current
    instruction. */
-#define no_ip_update           (*(u_char *)&(I387.soft.no_update))
-#define FPU_rm                 (*(u_char *)&(I387.soft.rm))
+#define no_ip_update           (*(u_char *)&(I387->soft.no_update))
+#define FPU_rm                 (*(u_char *)&(I387->soft.rm))
 
 /* Number of bytes of data which can be legally accessed by the current
    instruction. This only needs to hold a number <= 108, so a byte will do. */
-#define access_limit           (*(u_char *)&(I387.soft.alimit))
+#define access_limit           (*(u_char *)&(I387->soft.alimit))
 
-#define partial_status         (I387.soft.swd)
-#define control_word           (I387.soft.cwd)
-#define fpu_tag_word           (I387.soft.twd)
-#define registers              (I387.soft.st_space)
-#define top                    (I387.soft.ftop)
+#define partial_status         (I387->soft.swd)
+#define control_word           (I387->soft.cwd)
+#define fpu_tag_word           (I387->soft.twd)
+#define registers              (I387->soft.st_space)
+#define top                    (I387->soft.ftop)
 
-#define instruction_address    (*(struct address *)&I387.soft.fip)
-#define operand_address                (*(struct address *)&I387.soft.foo)
+#define instruction_address    (*(struct address *)&I387->soft.fip)
+#define operand_address                (*(struct address *)&I387->soft.foo)
 
 #define FPU_access_ok(x,y,z)   if ( !access_ok(x,y,z) ) \
                                math_abort(FPU_info,SIGSEGV)
index 02af772a24db24f12d2fbd17cfbd53fc5d22515b..d597fe7423c98441f7ed52f8bcb0df3bd45646a9 100644 (file)
@@ -1180,8 +1180,8 @@ u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
                control_word |= 0xffff0040;
                partial_status = status_word() | 0xffff0000;
                fpu_tag_word |= 0xffff0000;
-               I387.soft.fcs &= ~0xf8000000;
-               I387.soft.fos |= 0xffff0000;
+               I387->soft.fcs &= ~0xf8000000;
+               I387->soft.fos |= 0xffff0000;
 #endif /* PECULIAR_486 */
                if (__copy_to_user(d, &control_word, 7 * 4))
                        FPU_abort;
index 54522b814f1c796e36f30c65632b30a35eb8d1a8..382a5fa9d492a1715f1c2b378139d6ad32c46951 100644 (file)
@@ -23,6 +23,7 @@ extern void fpu_init(void);
 extern void mxcsr_feature_mask_init(void);
 extern void init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
+extern void init_thread_xstate(void);
 
 extern user_regset_active_fn fpregs_active, xfpregs_active;
 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
@@ -117,24 +118,22 @@ static inline void __save_init_fpu(struct task_struct *tsk)
        /* Using "fxsaveq %0" would be the ideal choice, but is only supported
           starting with gas 2.16. */
        __asm__ __volatile__("fxsaveq %0"
-                            : "=m" (tsk->thread.i387.fxsave));
+                            : "=m" (tsk->thread.xstate->fxsave));
 #elif 0
        /* Using, as a workaround, the properly prefixed form below isn't
           accepted by any binutils version so far released, complaining that
           the same type of prefix is used twice if an extended register is
           needed for addressing (fix submitted to mainline 2005-11-21). */
        __asm__ __volatile__("rex64/fxsave %0"
-                            : "=m" (tsk->thread.i387.fxsave));
+                            : "=m" (tsk->thread.xstate->fxsave));
 #else
        /* This, however, we can work around by forcing the compiler to select
           an addressing mode that doesn't require extended registers. */
-       __asm__ __volatile__("rex64/fxsave %P2(%1)"
-                            : "=m" (tsk->thread.i387.fxsave)
-                            : "cdaSDb" (tsk),
-                               "i" (offsetof(__typeof__(*tsk),
-                                             thread.i387.fxsave)));
+       __asm__ __volatile__("rex64/fxsave (%1)"
+                            : "=m" (tsk->thread.xstate->fxsave)
+                            : "cdaSDb" (&tsk->thread.xstate->fxsave));
 #endif
-       clear_fpu_state(&tsk->thread.i387.fxsave);
+       clear_fpu_state(&tsk->thread.xstate->fxsave);
        task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
@@ -148,7 +147,7 @@ static inline int save_i387(struct _fpstate __user *buf)
        int err = 0;
 
        BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
-                       sizeof(tsk->thread.i387.fxsave));
+                       sizeof(tsk->thread.xstate->fxsave));
 
        if ((unsigned long)buf % 16)
                printk("save_i387: bad fpstate %p\n", buf);
@@ -164,7 +163,7 @@ static inline int save_i387(struct _fpstate __user *buf)
                task_thread_info(tsk)->status &= ~TS_USEDFPU;
                stts();
        } else {
-               if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
+               if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
                                   sizeof(struct i387_fxsave_struct)))
                        return -1;
        }
@@ -201,7 +200,7 @@ static inline void restore_fpu(struct task_struct *tsk)
                "nop ; frstor %1",
                "fxrstor %1",
                X86_FEATURE_FXSR,
-               "m" ((tsk)->thread.i387.fxsave));
+               "m" (tsk->thread.xstate->fxsave));
 }
 
 /* We need a safe address that is cheap to find and that is already
@@ -225,8 +224,8 @@ static inline void __save_init_fpu(struct task_struct *tsk)
                "fxsave %[fx]\n"
                "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
                X86_FEATURE_FXSR,
-               [fx] "m" (tsk->thread.i387.fxsave),
-               [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
+               [fx] "m" (tsk->thread.xstate->fxsave),
+               [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
        /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
           is pending.  Clear the x87 state here by setting it to fixed
           values. safe_address is a random variable that should be in L1 */
@@ -327,25 +326,25 @@ static inline void clear_fpu(struct task_struct *tsk)
 static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
 {
        if (cpu_has_fxsr) {
-               return tsk->thread.i387.fxsave.cwd;
+               return tsk->thread.xstate->fxsave.cwd;
        } else {
-               return (unsigned short)tsk->thread.i387.fsave.cwd;
+               return (unsigned short) tsk->thread.xstate->fsave.cwd;
        }
 }
 
 static inline unsigned short get_fpu_swd(struct task_struct *tsk)
 {
        if (cpu_has_fxsr) {
-               return tsk->thread.i387.fxsave.swd;
+               return tsk->thread.xstate->fxsave.swd;
        } else {
-               return (unsigned short)tsk->thread.i387.fsave.swd;
+               return (unsigned short) tsk->thread.xstate->fsave.swd;
        }
 }
 
 static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
 {
        if (cpu_has_xmm) {
-               return tsk->thread.i387.fxsave.mxcsr;
+               return tsk->thread.xstate->fxsave.mxcsr;
        } else {
                return MXCSR_DEFAULT;
        }
index eaf4548a23d2d8a7842599508737253f19f8e772..99d297885780084d7884b991ac32d2633b9a1f48 100644 (file)
@@ -354,7 +354,7 @@ struct i387_soft_struct {
        u32                     entry_eip;
 };
 
-union i387_union {
+union thread_xstate {
        struct i387_fsave_struct        fsave;
        struct i387_fxsave_struct       fxsave;
        struct i387_soft_struct         soft;
@@ -365,6 +365,7 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);
 #endif
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
+extern unsigned int xstate_size;
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
@@ -397,8 +398,8 @@ struct thread_struct {
        unsigned long           cr2;
        unsigned long           trap_no;
        unsigned long           error_code;
-       /* Floating point info: */
-       union i387_union        i387 __attribute__((aligned(16)));;
+       /* floating point and extended processor state */
+       union thread_xstate     *xstate;
 #ifdef CONFIG_X86_32
        /* Virtual 86 mode info */
        struct vm86_struct __user *vm86_info;
index d5fd12f2abdbb141668c205dd424de8c91de90e4..407b88c170d3228642d2a9835593e4e96166eeb4 100644 (file)
@@ -1,5 +1,13 @@
+#ifndef _ASM_X86_THREAD_INFO_H
 #ifdef CONFIG_X86_32
 # include "thread_info_32.h"
 #else
 # include "thread_info_64.h"
 #endif
+
+#ifndef __ASSEMBLY__
+extern void arch_task_cache_init(void);
+extern void free_thread_info(struct thread_info *ti);
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+#endif
+#endif /* _ASM_X86_THREAD_INFO_H */
index 4e053fa561a9c5e643336d10e68d835e8863e1b8..53185996209664c82588904ca3429844696ab9b2 100644 (file)
@@ -102,8 +102,6 @@ static inline struct thread_info *current_thread_info(void)
        __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
 #endif
 
-#define free_thread_info(info) free_pages((unsigned long)(info), get_order(THREAD_SIZE))
-
 #else /* !__ASSEMBLY__ */
 
 /* how to get the thread information struct from ASM */
index b17f5f6c2c5951011a02a4978c501024c9081e7b..ed664e874decb873d83bad65ba2eb5a549dab69d 100644 (file)
@@ -85,8 +85,6 @@ static inline struct thread_info *stack_thread_info(void)
 #define alloc_thread_info(tsk)                                         \
        ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
 
-#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
-
 #else /* !__ASSEMBLY__ */
 
 /* how to get the thread information struct from ASM */
index 9c042f901570e1b789d40fdbda111739c733cdb4..44a18192c420e85dd3ef8f904cbd9e1db573b553 100644 (file)
@@ -132,6 +132,10 @@ void __put_task_struct(struct task_struct *tsk)
                free_task(tsk);
 }
 
+void __attribute__((weak)) arch_task_cache_init(void)
+{
+}
+
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -144,6 +148,9 @@ void __init fork_init(unsigned long mempages)
                        ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
 #endif
 
+       /* do the arch specific task caches init */
+       arch_task_cache_init();
+
        /*
         * The default maximum number of threads is set to a safe
         * value: the thread structures can take up at most half
@@ -163,6 +170,13 @@ void __init fork_init(unsigned long mempages)
                init_task.signal->rlim[RLIMIT_NPROC];
 }
 
+int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
+                                              struct task_struct *src)
+{
+       *dst = *src;
+       return 0;
+}
+
 static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
        struct task_struct *tsk;
@@ -181,15 +195,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
                return NULL;
        }
 
-       *tsk = *orig;
+       err = arch_dup_task_struct(tsk, orig);
+       if (err)
+               goto out;
+
        tsk->stack = ti;
 
        err = prop_local_init_single(&tsk->dirties);
-       if (err) {
-               free_thread_info(ti);
-               free_task_struct(tsk);
-               return NULL;
-       }
+       if (err)
+               goto out;
 
        setup_thread_stack(tsk, orig);
 
@@ -205,6 +219,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 #endif
        tsk->splice_pipe = NULL;
        return tsk;
+
+out:
+       free_thread_info(ti);
+       free_task_struct(tsk);
+       return NULL;
 }
 
 #ifdef CONFIG_MMU