Discussion:
[PATCH 2/8] x86_64: Move getcpu code from vsyscall_64.c to vdso/vma.c
Andy Lutomirski
2014-09-23 17:50:52 UTC
Permalink
This is pure cut-and-paste. At this point, vsyscall_64.c contains
only code needed for vsyscall emulation, but some of the comments
and function names are still confused.

Signed-off-by: Andy Lutomirski <***@amacapital.net>
---
arch/x86/kernel/vsyscall_64.c | 57 ----------------------------------------
arch/x86/vdso/vma.c | 61 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+), 57 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 420c4688c78a..31ce584981f3 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -332,49 +332,6 @@ int in_gate_area_no_mm(unsigned long addr)
return (addr & PAGE_MASK) == VSYSCALL_ADDR;
}

-/*
- * Assume __initcall executes before all user space. Hopefully kmod
- * doesn't violate that. We'll find out if it does.
- */
-static void vsyscall_set_cpu(int cpu)
-{
- unsigned long d;
- unsigned long node = 0;
-#ifdef CONFIG_NUMA
- node = cpu_to_node(cpu);
-#endif
- if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
- write_rdtscp_aux((node << 12) | cpu);
-
- /*
- * Store cpu number in limit so that it can be loaded quickly
- * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
- */
- d = 0x0f40000000000ULL;
- d |= cpu;
- d |= (node & 0xf) << 12;
- d |= (node >> 4) << 48;
-
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
-}
-
-static void cpu_vsyscall_init(void *arg)
-{
- /* preemption should be already off */
- vsyscall_set_cpu(raw_smp_processor_id());
-}
-
-static int
-cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
-{
- long cpu = (long)arg;
-
- if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
- smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
-
- return NOTIFY_DONE;
-}
-
void __init map_vsyscall(void)
{
extern char __vsyscall_page;
@@ -387,17 +344,3 @@ void __init map_vsyscall(void)
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
}
-
-static int __init vsyscall_init(void)
-{
- cpu_notifier_register_begin();
-
- on_each_cpu(cpu_vsyscall_init, NULL, 1);
- /* notifier priority > KVM */
- __hotcpu_notifier(cpu_vsyscall_notifier, 30);
-
- cpu_notifier_register_done();
-
- return 0;
-}
-__initcall(vsyscall_init);
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 970463b566cf..a155dca5edb5 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -10,12 +10,14 @@
#include <linux/init.h>
#include <linux/random.h>
#include <linux/elf.h>
+#include <linux/cpu.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
#include <asm/page.h>
#include <asm/hpet.h>
+#include <asm/desc.h>

#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
@@ -238,3 +240,62 @@ static __init int vdso_setup(char *s)
}
__setup("vdso=", vdso_setup);
#endif
+
+#ifdef CONFIG_X86_64
+/*
+ * Assume __initcall executes before all user space. Hopefully kmod
+ * doesn't violate that. We'll find out if it does.
+ */
+static void vsyscall_set_cpu(int cpu)
+{
+ unsigned long d;
+ unsigned long node = 0;
+#ifdef CONFIG_NUMA
+ node = cpu_to_node(cpu);
+#endif
+ if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
+ write_rdtscp_aux((node << 12) | cpu);
+
+ /*
+ * Store cpu number in limit so that it can be loaded quickly
+ * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
+ */
+ d = 0x0f40000000000ULL;
+ d |= cpu;
+ d |= (node & 0xf) << 12;
+ d |= (node >> 4) << 48;
+
+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+}
+
+static void cpu_vsyscall_init(void *arg)
+{
+ /* preemption should be already off */
+ vsyscall_set_cpu(raw_smp_processor_id());
+}
+
+static int
+cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
+{
+ long cpu = (long)arg;
+
+ if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
+
+ return NOTIFY_DONE;
+}
+
+static int __init vsyscall_init(void)
+{
+ cpu_notifier_register_begin();
+
+ on_each_cpu(cpu_vsyscall_init, NULL, 1);
+ /* notifier priority > KVM */
+ __hotcpu_notifier(cpu_vsyscall_notifier, 30);
+
+ cpu_notifier_register_done();
+
+ return 0;
+}
+__initcall(vsyscall_init);
+#endif
--
1.9.3
Andy Lutomirski
2014-09-23 17:50:51 UTC
Permalink
This code exists for the sole purpose of making the vsyscall page
look sort of like real userspace memory. Move it so that it lives
with the rest of the vsyscall code.

Signed-off-by: Andy Lutomirski <***@amacapital.net>
---
arch/x86/kernel/vsyscall_64.c | 49 +++++++++++++++++++++++++++++++++++++++++++
arch/x86/mm/init_64.c | 49 -------------------------------------------
2 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e1e1e80fc6a6..420c4688c78a 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -284,6 +284,55 @@ sigsegv:
}

/*
+ * A pseudo VMA to allow ptrace access for the vsyscall page. This only
+ * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
+ * not need special handling anymore:
+ */
+static const char *gate_vma_name(struct vm_area_struct *vma)
+{
+ return "[vsyscall]";
+}
+static struct vm_operations_struct gate_vma_ops = {
+ .name = gate_vma_name,
+};
+static struct vm_area_struct gate_vma = {
+ .vm_start = VSYSCALL_ADDR,
+ .vm_end = VSYSCALL_ADDR + PAGE_SIZE,
+ .vm_page_prot = PAGE_READONLY_EXEC,
+ .vm_flags = VM_READ | VM_EXEC,
+ .vm_ops = &gate_vma_ops,
+};
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+#ifdef CONFIG_IA32_EMULATION
+ if (!mm || mm->context.ia32_compat)
+ return NULL;
+#endif
+ return &gate_vma;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+ struct vm_area_struct *vma = get_gate_vma(mm);
+
+ if (!vma)
+ return 0;
+
+ return (addr >= vma->vm_start) && (addr < vma->vm_end);
+}
+
+/*
+ * Use this when you have no reliable mm, typically from interrupt
+ * context. It is less reliable than using a task's mm and may give
+ * false positives.
+ */
+int in_gate_area_no_mm(unsigned long addr)
+{
+ return (addr & PAGE_MASK) == VSYSCALL_ADDR;
+}
+
+/*
* Assume __initcall executes before all user space. Hopefully kmod
* doesn't violate that. We'll find out if it does.
*/
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5621c47d7a1a..e4e132956285 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1181,55 +1181,6 @@ int kern_addr_valid(unsigned long addr)
return pfn_valid(pte_pfn(*pte));
}

-/*
- * A pseudo VMA to allow ptrace access for the vsyscall page. This only
- * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
- * not need special handling anymore:
- */
-static const char *gate_vma_name(struct vm_area_struct *vma)
-{
- return "[vsyscall]";
-}
-static struct vm_operations_struct gate_vma_ops = {
- .name = gate_vma_name,
-};
-static struct vm_area_struct gate_vma = {
- .vm_start = VSYSCALL_ADDR,
- .vm_end = VSYSCALL_ADDR + PAGE_SIZE,
- .vm_page_prot = PAGE_READONLY_EXEC,
- .vm_flags = VM_READ | VM_EXEC,
- .vm_ops = &gate_vma_ops,
-};
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (!mm || mm->context.ia32_compat)
- return NULL;
-#endif
- return &gate_vma;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
- struct vm_area_struct *vma = get_gate_vma(mm);
-
- if (!vma)
- return 0;
-
- return (addr >= vma->vm_start) && (addr < vma->vm_end);
-}
-
-/*
- * Use this when you have no reliable mm, typically from interrupt
- * context. It is less reliable than using a task's mm and may give
- * false positives.
- */
-int in_gate_area_no_mm(unsigned long addr)
-{
- return (addr & PAGE_MASK) == VSYSCALL_ADDR;
-}
-
static unsigned long probe_memory_block_size(void)
{
/* start from 2g */
--
1.9.3
Andy Lutomirski
2014-09-23 17:50:54 UTC
Permalink
The first userspace attempt to read or write the PER_CPU segment
will write the accessed bit to the GDT. This is visible to
userspace using the LAR instruction, and it also pointlessly
dirties a cache line.

Set the segment's accessed bit at boot to prevent userspace
access to segments from having side effects.

Signed-off-by: Andy Lutomirski <***@amacapital.net>
---
arch/x86/vdso/vma.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 261b1349acc9..0c7997467be0 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -264,7 +264,7 @@ static void vsyscall_set_cpu(int cpu)
d = (struct desc_struct) {
.limit0 = cpu | ((node & 0xf) << 12),
.limit = node >> 4,
- .type = 4, /* RO data, expand down */
+ .type = 5, /* RO data, expand down, accessed */
.dpl = 3, /* Visible to user code */
.s = 1, /* Not a system segment */
.p = 1, /* Present */
--
1.9.3
Andy Lutomirski
2014-09-23 17:50:55 UTC
Permalink
IMO users ought not to be able to use 16-bit segments without using
modify_ldt. Fortunately, it's impossible to break espfix64 by
loading the PER_CPU segment into SS because it's PER_CPU is marked
read-only and SS cannot contain an RO segment, but marking PER_CPU
as 32-bit is less fragile.

Signed-off-by: Andy Lutomirski <***@amacapital.net>
---
arch/x86/vdso/vma.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 0c7997467be0..32ca60c8157b 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -268,6 +268,7 @@ static void vsyscall_set_cpu(int cpu)
.dpl = 3, /* Visible to user code */
.s = 1, /* Not a system segment */
.p = 1, /* Present */
+ .d = 1, /* 32-bit */
};

write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
--
1.9.3
Andy Lutomirski
2014-09-23 17:50:58 UTC
Permalink
The vdso has supported alternatives for a while; use them instead of
a vvar to select the vgetcpu mode.

Signed-off-by: Andy Lutomirski <***@amacapital.net>
---
arch/x86/include/asm/vgtod.h | 21 +++++++++++++++++++++
arch/x86/include/asm/vsyscall.h | 29 -----------------------------
arch/x86/include/asm/vvar.h | 1 -
arch/x86/kernel/cpu/common.c | 10 ----------
arch/x86/kernel/vsyscall_64.c | 2 --
arch/x86/vdso/vclock_gettime.c | 3 +++
arch/x86/vdso/vgetcpu.c | 4 +++-
7 files changed, 27 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 3c3366c2e37f..44e00370c438 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -70,4 +70,25 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s)
++s->seq;
}

+#ifdef CONFIG_X86_64
+
+#define VGETCPU_CPU_MASK 0xfff
+
+static inline unsigned int __getcpu(void)
+{
+ unsigned int p;
+
+ if (static_cpu_has(X86_FEATURE_RDTSCP)) {
+ /* Load per CPU data from RDTSCP */
+ native_read_tscp(&p);
+ } else {
+ /* Load per CPU data from GDT */
+ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+ }
+
+ return p;
+}
+
+#endif /* CONFIG_X86_64 */
+
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 2a46ca720afc..34f7d8857542 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -4,15 +4,6 @@
#include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h>

-#define VGETCPU_RDTSCP 1
-#define VGETCPU_LSL 2
-
-/* kernel space (writeable) */
-extern int vgetcpu_mode;
-extern struct timezone sys_tz;
-
-#include <asm/vvar.h>
-
extern void map_vsyscall(void);

/*
@@ -21,24 +12,4 @@ extern void map_vsyscall(void);
*/
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);

-#ifdef CONFIG_X86_64
-
-#define VGETCPU_CPU_MASK 0xfff
-
-static inline unsigned int __getcpu(void)
-{
- unsigned int p;
-
- if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
- /* Load per CPU data from RDTSCP */
- native_read_tscp(&p);
- } else {
- /* Load per CPU data from GDT */
- asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
- }
-
- return p;
-}
-#endif /* CONFIG_X86_64 */
-
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 5f6d40734a3b..3f32dfc2ab73 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -44,7 +44,6 @@ extern char __vvar_page;

/* DECLARE_VVAR(offset, type, name) */

-DECLARE_VVAR(16, int, vgetcpu_mode)
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)

#undef DECLARE_VVAR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e4ab2b42bd6f..c12b864c9d98 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -956,14 +956,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
}

#ifdef CONFIG_X86_64
-static void vgetcpu_set_mode(void)
-{
- if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
- vgetcpu_mode = VGETCPU_RDTSCP;
- else
- vgetcpu_mode = VGETCPU_LSL;
-}
-
/* May not be __init: called during resume */
static void syscall32_cpu_init(void)
{
@@ -1004,8 +996,6 @@ void __init identify_boot_cpu(void)
#ifdef CONFIG_X86_32
sysenter_setup();
enable_sep_cpu();
-#else
- vgetcpu_set_mode();
#endif
cpu_detect_tlb(&boot_cpu_data);
}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 31ce584981f3..9d2c660cfb70 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -52,8 +52,6 @@
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"

-DEFINE_VVAR(int, vgetcpu_mode);
-
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;

static int __init vsyscall_setup(char *str)
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 9793322751e0..d14bb8faad16 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -11,6 +11,9 @@
* Check with readelf after changing.
*/

+/* Disable static_cpu_has debugging; it doesn't work in the vDSO */
+#undef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+
#include <uapi/linux/time.h>
#include <asm/vgtod.h>
#include <asm/hpet.h>
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index add1d98d2477..fd8470c98ab5 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -5,10 +5,12 @@
* Fast user context implementation of getcpu()
*/

+/* Disable static_cpu_has debugging; it doesn't work in the vDSO */
+#undef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+
#include <linux/kernel.h>
#include <linux/getcpu.h>
#include <linux/time.h>
-#include <asm/vsyscall.h>
#include <asm/vgtod.h>

notrace long
--
1.9.3
Andy Lutomirski
2014-09-23 17:50:56 UTC
Permalink
I think that the jiffies vvar was once used for the vgetcpu cache.
That code is long gone, so let's just make jiffies be a normal
variable.

Signed-off-by: Andy Lutomirski <***@amacapital.net>
---
arch/x86/include/asm/vvar.h | 1 -
arch/x86/kernel/time.c | 2 +-
arch/x86/vdso/vgetcpu.c | 1 -
3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 5d2b9ad2c6d2..5f6d40734a3b 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -44,7 +44,6 @@ extern char __vvar_page;

/* DECLARE_VVAR(offset, type, name) */

-DECLARE_VVAR(0, volatile unsigned long, jiffies)
DECLARE_VVAR(16, int, vgetcpu_mode)
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)

diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index bf7ef5ce29df..822773b657c5 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -23,7 +23,7 @@
#include <asm/time.h>

#ifdef CONFIG_X86_64
-__visible DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES;
+__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
#endif

unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index 2f94b039e55b..add1d98d2477 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -7,7 +7,6 @@

#include <linux/kernel.h>
#include <linux/getcpu.h>
-#include <linux/jiffies.h>
#include <linux/time.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
--
1.9.3
Andy Lutomirski
2014-09-23 17:50:57 UTC
Permalink
Now vdso/vma.c has a single initcall and no references to
"vsyscall".

Signed-off-by: Andy Lutomirski <***@amacapital.net>
---
arch/x86/vdso/vma.c | 54 ++++++++++++++++++-----------------------------------
1 file changed, 18 insertions(+), 36 deletions(-)

diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 32ca60c8157b..a280b11e2122 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -1,7 +1,8 @@
/*
- * Set up the VMAs to tell the VM about the vDSO.
* Copyright 2007 Andi Kleen, SUSE Labs.
* Subject to the GPL, v.2
+ *
+ * This contains most of the x86 vDSO kernel-side code.
*/
#include <linux/mm.h>
#include <linux/err.h>
@@ -11,18 +12,16 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
-#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
+#include <asm/vvar.h>
#include <asm/page.h>
#include <asm/hpet.h>
#include <asm/desc.h>

#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
-
-extern unsigned short vdso_sync_cpuid;
#endif

void __init init_vdso_image(const struct vdso_image *image)
@@ -40,20 +39,6 @@ void __init init_vdso_image(const struct vdso_image *image)
image->alt_len));
}

-#if defined(CONFIG_X86_64)
-static int __init init_vdso(void)
-{
- init_vdso_image(&vdso_image_64);
-
-#ifdef CONFIG_X86_X32_ABI
- init_vdso_image(&vdso_image_x32);
-#endif
-
- return 0;
-}
-subsys_initcall(init_vdso);
-#endif
-
struct linux_binprm;

/* Put the vdso above the (randomized) stack with another randomized offset.
@@ -242,12 +227,9 @@ __setup("vdso=", vdso_setup);
#endif

#ifdef CONFIG_X86_64
-/*
- * Assume __initcall executes before all user space. Hopefully kmod
- * doesn't violate that. We'll find out if it does.
- */
-static void vsyscall_set_cpu(int cpu)
+static void vgetcpu_cpu_init(void *arg)
{
+ int cpu = smp_processor_id();
struct desc_struct d;
unsigned long node = 0;
#ifdef CONFIG_NUMA
@@ -274,34 +256,34 @@ static void vsyscall_set_cpu(int cpu)
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}

-static void cpu_vsyscall_init(void *arg)
-{
- /* preemption should be already off */
- vsyscall_set_cpu(raw_smp_processor_id());
-}
-
static int
-cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
+vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
{
long cpu = (long)arg;

if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
- smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
+ smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);

return NOTIFY_DONE;
}

-static int __init vsyscall_init(void)
+static int __init init_vdso(void)
{
+ init_vdso_image(&vdso_image_64);
+
+#ifdef CONFIG_X86_X32_ABI
+ init_vdso_image(&vdso_image_x32);
+#endif
+
cpu_notifier_register_begin();

- on_each_cpu(cpu_vsyscall_init, NULL, 1);
+ on_each_cpu(vgetcpu_cpu_init, NULL, 1);
/* notifier priority > KVM */
- __hotcpu_notifier(cpu_vsyscall_notifier, 30);
+ __hotcpu_notifier(vgetcpu_cpu_notifier, 30);

cpu_notifier_register_done();

return 0;
}
-__initcall(vsyscall_init);
-#endif
+subsys_initcall(init_vdso);
+#endif /* CONFIG_X86_64 */
--
1.9.3
Andy Lutomirski
2014-09-23 17:50:53 UTC
Permalink
This makes it easier to see what's going on. It produces exactly
the same segment descriptor as the old code.

Signed-off-by: Andy Lutomirski <***@amacapital.net>
---
arch/x86/vdso/vma.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index a155dca5edb5..261b1349acc9 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -248,7 +248,7 @@ __setup("vdso=", vdso_setup);
*/
static void vsyscall_set_cpu(int cpu)
{
- unsigned long d;
+ struct desc_struct d;
unsigned long node = 0;
#ifdef CONFIG_NUMA
node = cpu_to_node(cpu);
@@ -257,13 +257,18 @@ static void vsyscall_set_cpu(int cpu)
write_rdtscp_aux((node << 12) | cpu);

/*
- * Store cpu number in limit so that it can be loaded quickly
- * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
+ * Store cpu number in limit so that it can be loaded
+ * quickly in user space in vgetcpu. (12 bits for the CPU
+ * and 8 bits for the node)
*/
- d = 0x0f40000000000ULL;
- d |= cpu;
- d |= (node & 0xf) << 12;
- d |= (node >> 4) << 48;
+ d = (struct desc_struct) {
+ .limit0 = cpu | ((node & 0xf) << 12),
+ .limit = node >> 4,
+ .type = 4, /* RO data, expand down */
+ .dpl = 3, /* Visible to user code */
+ .s = 1, /* Not a system segment */
+ .p = 1, /* Present */
+ };

write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}
--
1.9.3
Andy Lutomirski
2014-10-20 17:44:13 UTC
Permalink
Hi Ingo and Thomas,
Now that the merge window is closed, can you take a look at these?
There are a few more that follow (tidying up vsyscall emulation and
adding a config flag for the tiny folks), too, but I'd rather not keep
resending these in an ever-growing form.

Thanks,
Andy
This should all be straightforward. It's primarily a cleanup, but it
fixes what is arguably a bug in the PER_CPU mechanism.
The first two patches just rearrange code to disentangle the vdso from
the vsyscall emulation code.
The next three patches are tiny. Patch 3 rewrites the PER_CPU setup
code to make it comprehensible; it has no effect other than clarify.
Patches 4 and 5 fix two problems with the PER_CPU segment that are
apparent after the code has been cleaned up. (Patch 5 fixes a glitch
that that is almost, but not quote, a security bug.)
Patches 6 and 7 are just cleanups. Patch 6 removes the "jiffies" vvar,
which has been unused for years. Patch 7 cleans up some initialization
code.
Patch 8 removes the vvar that selects the getcpu mode. That vvar
has been unnecessary for a couple of years, ever since alternative
patching has worked in the vdso.
There is a small but non quote zero chance that I will want to revert
patch 8 at some point. Reverting it will be needed to be able to
disable the TSC (in the CR4.TSC sense) without breaking the vdso, unless
we patch the vdso text. That being said, I think it'll be a lot less
messy to just patch the vdso text if we do that.
Please consider applying these, or at least applying patches 1-7,
to tip/x86/vdso for 3.18.
Thanks,
Andy
Changes from way back when: This has been floating around since June in
This version just disentangles the vdso and vsyscall code and does the
vdso cleanups/fixes. It does *not* contain the PER_CPU removal on
RDTSCP systems, since hpa mentioned that this is potentially
problematic, and, if we were to try it, it should soak in -next for a
*long* time. It is also heavily cleaned up since last time.
x86_64,vsyscall: Move all of the gate_area code to vsyscall_64.c
x86_64: Move getcpu code from vsyscall_64.c to vdso/vma.c
x86,vdso: Change the PER_CPU segment to use struct desc_struct
x86,vdso: Make the PER_CPU segment start out accessed
x86,vdso: Make the PER_CPU segment 32 bits
x86_64,vdso: Remove jiffies from the vvar page
x86_64,vdso: Clean up vgetcpu init and merge the vdso initcalls
x86,vdso: Replace vgetcpu_mode with static_cpu_has
arch/x86/include/asm/vgtod.h | 21 ++++++++++
arch/x86/include/asm/vsyscall.h | 29 --------------
arch/x86/include/asm/vvar.h | 2 -
arch/x86/kernel/cpu/common.c | 10 -----
arch/x86/kernel/time.c | 2 +-
arch/x86/kernel/vsyscall_64.c | 84 ++++++++++++++++++----------------------
arch/x86/mm/init_64.c | 49 ------------------------
arch/x86/vdso/vclock_gettime.c | 3 ++
arch/x86/vdso/vgetcpu.c | 5 ++-
arch/x86/vdso/vma.c | 85 ++++++++++++++++++++++++++++++++---------
10 files changed, 132 insertions(+), 158 deletions(-)
--
1.9.3
--
Andy Lutomirski
AMA Capital Management, LLC
Loading...