Signed-off-by: Felix Fietkau <nbd@nbd.name>master
parent
b3be33f135
commit
be74158102
@ -0,0 +1,317 @@ |
||||
From: James Hogan <james.hogan@imgtec.com>
|
||||
Date: Mon, 25 Jan 2016 21:30:00 +0000
|
||||
Subject: [PATCH] MIPS: c-r4k: Use IPI calls for CM indexed cache ops
|
||||
|
||||
The Coherence Manager (CM) can propagate address-based ("hit") cache
|
||||
operations to other cores in the coherent system, alleviating software
|
||||
of the need to use IPI calls, however indexed cache operations are not
|
||||
propagated since doing so makes no sense for separate caches.
|
||||
|
||||
r4k_on_each_cpu() previously had a special case for CONFIG_MIPS_MT_SMP,
|
||||
intended to avoid the IPIs when the only other CPUs in the system were
|
||||
other VPEs in the same core, and hence sharing the same caches. This was
|
||||
changed by commit cccf34e9411c ("MIPS: c-r4k: Fix cache flushing for MT
|
||||
cores") to apparently handle multi-core multi-VPE systems, but it
|
||||
focussed mainly on hit cache ops, so the IPI calls were still disabled
|
||||
entirely for CM systems.
|
||||
|
||||
This doesn't normally cause problems, but tests can be written to hit
|
||||
these corner cases by using multiple threads, or changing task
|
||||
affinities to force the process to migrate cores. For example the
|
||||
failure of mprotect RW->RX to globally sync icaches (via
|
||||
flush_cache_range) can be detected by modifying and mprotecting a code
|
||||
page on one core, and migrating to a different core to execute from it.
|
||||
|
||||
Most of the functions called by r4k_on_each_cpu() perform cache
|
||||
operations exclusively with a single addressing-type (virtual address vs
|
||||
indexed), so add a type argument and modify the callers to pass in
|
||||
R4K_USER (user virtual addressing), R4K_KERN (global kernel virtual
|
||||
addressing) or R4K_INDEX (index into cache).
|
||||
|
||||
local_r4k_flush_icache_range() is split up, to allow it to be called
|
||||
from the rest of the kernel, or from r4k_flush_icache_range() where it
|
||||
will choose either indexed or hit cache operations based on the size of
|
||||
the range and the cache sizes.
|
||||
|
||||
local_r4k_flush_kernel_vmap_range() is split into two functions, each of
|
||||
which uses cache operations with a single addressing-type, with
|
||||
r4k_flush_kernel_vmap_range() making the decision whether to use indexed
|
||||
cache ops or not.
|
||||
|
||||
Signed-off-by: James Hogan <james.hogan@imgtec.com>
|
||||
Cc: Ralf Baechle <ralf@linux-mips.org>
|
||||
Cc: Paul Burton <paul.burton@imgtec.com>
|
||||
Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com>
|
||||
Cc: linux-mips@linux-mips.org
|
||||
---
|
||||
|
||||
--- a/arch/mips/mm/c-r4k.c
|
||||
+++ b/arch/mips/mm/c-r4k.c
|
||||
@@ -40,6 +40,50 @@
|
||||
#include <asm/mips-cm.h>
|
||||
|
||||
/*
|
||||
+ * Bits describing what cache ops an IPI callback function may perform.
|
||||
+ *
|
||||
+ * R4K_USER - Virtual user address based cache operations.
|
||||
+ * Ineffective on other CPUs.
|
||||
+ * R4K_KERN - Virtual kernel address based cache operations (including kmap).
|
||||
+ * Effective on other CPUs.
|
||||
+ * R4K_INDEX - Index based cache operations.
|
||||
+ * Effective on other CPUs.
|
||||
+ */
|
||||
+
|
||||
+#define R4K_USER BIT(0)
|
||||
+#define R4K_KERN BIT(1)
|
||||
+#define R4K_INDEX BIT(2)
|
||||
+
|
||||
+#ifdef CONFIG_SMP
|
||||
+/* The Coherence manager propagates address-based cache ops to other cores */
|
||||
+#define r4k_hit_globalized mips_cm_present()
|
||||
+#define r4k_index_globalized 0
|
||||
+#else
|
||||
+/* If there's only 1 CPU, then all cache ops are globalized to that 1 CPU */
|
||||
+#define r4k_hit_globalized 1
|
||||
+#define r4k_index_globalized 1
|
||||
+#endif
|
||||
+
|
||||
+/**
|
||||
+ * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core.
|
||||
+ * @type: Type of cache operations (R4K_USER, R4K_KERN or R4K_INDEX).
|
||||
+ *
|
||||
+ * Returns: 1 if the cache operation @type should be done on every core in
|
||||
+ * the system.
|
||||
+ * 0 if the cache operation @type is globalized and only needs to
|
||||
+ * be performed on a simple CPU.
|
||||
+ */
|
||||
+static inline bool r4k_op_needs_ipi(unsigned int type)
|
||||
+{
|
||||
+ /*
|
||||
+ * If hardware doesn't globalize the required cache ops we must use IPIs
|
||||
+ * to do so.
|
||||
+ */
|
||||
+ return (type & R4K_KERN && !r4k_hit_globalized) ||
|
||||
+ (type & R4K_INDEX && !r4k_index_globalized);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
* Special Variant of smp_call_function for use by cache functions:
|
||||
*
|
||||
* o No return value
|
||||
@@ -48,19 +92,11 @@
|
||||
* primary cache.
|
||||
* o doesn't disable interrupts on the local CPU
|
||||
*/
|
||||
-static inline void r4k_on_each_cpu(void (*func) (void *info), void *info)
|
||||
+static inline void r4k_on_each_cpu(unsigned int type,
|
||||
+ void (*func) (void *info), void *info)
|
||||
{
|
||||
preempt_disable();
|
||||
-
|
||||
- /*
|
||||
- * The Coherent Manager propagates address-based cache ops to other
|
||||
- * cores but not index-based ops. However, r4k_on_each_cpu is used
|
||||
- * in both cases so there is no easy way to tell what kind of op is
|
||||
- * executed to the other cores. The best we can probably do is
|
||||
- * to restrict that call when a CM is not present because both
|
||||
- * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops.
|
||||
- */
|
||||
- if (!mips_cm_present())
|
||||
+ if (r4k_op_needs_ipi(type))
|
||||
smp_call_function_many(&cpu_foreign_map, func, info, 1);
|
||||
func(info);
|
||||
preempt_enable();
|
||||
@@ -456,7 +492,7 @@ static inline void local_r4k___flush_cac
|
||||
|
||||
static void r4k___flush_cache_all(void)
|
||||
{
|
||||
- r4k_on_each_cpu(local_r4k___flush_cache_all, NULL);
|
||||
+ r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL);
|
||||
}
|
||||
|
||||
static inline int has_valid_asid(const struct mm_struct *mm)
|
||||
@@ -503,7 +539,7 @@ static void r4k_flush_cache_range(struct
|
||||
int exec = vma->vm_flags & VM_EXEC;
|
||||
|
||||
if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc))
|
||||
- r4k_on_each_cpu(local_r4k_flush_cache_range, vma);
|
||||
+ r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma);
|
||||
}
|
||||
|
||||
static inline void local_r4k_flush_cache_mm(void * args)
|
||||
@@ -535,7 +571,7 @@ static void r4k_flush_cache_mm(struct mm
|
||||
if (!cpu_has_dc_aliases)
|
||||
return;
|
||||
|
||||
- r4k_on_each_cpu(local_r4k_flush_cache_mm, mm);
|
||||
+ r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm);
|
||||
}
|
||||
|
||||
struct flush_cache_page_args {
|
||||
@@ -629,7 +665,7 @@ static void r4k_flush_cache_page(struct
|
||||
args.addr = addr;
|
||||
args.pfn = pfn;
|
||||
|
||||
- r4k_on_each_cpu(local_r4k_flush_cache_page, &args);
|
||||
+ r4k_on_each_cpu(R4K_KERN, local_r4k_flush_cache_page, &args);
|
||||
}
|
||||
|
||||
static inline void local_r4k_flush_data_cache_page(void * addr)
|
||||
@@ -642,18 +678,23 @@ static void r4k_flush_data_cache_page(un
|
||||
if (in_atomic())
|
||||
local_r4k_flush_data_cache_page((void *)addr);
|
||||
else
|
||||
- r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr);
|
||||
+ r4k_on_each_cpu(R4K_KERN, local_r4k_flush_data_cache_page,
|
||||
+ (void *) addr);
|
||||
}
|
||||
|
||||
struct flush_icache_range_args {
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
+ unsigned int type;
|
||||
};
|
||||
|
||||
-static inline void local_r4k_flush_icache_range(unsigned long start, unsigned long end)
|
||||
+static inline void __local_r4k_flush_icache_range(unsigned long start,
|
||||
+ unsigned long end,
|
||||
+ unsigned int type)
|
||||
{
|
||||
if (!cpu_has_ic_fills_f_dc) {
|
||||
- if (end - start >= dcache_size) {
|
||||
+ if (type == R4K_INDEX ||
|
||||
+ (type & R4K_INDEX && end - start >= dcache_size)) {
|
||||
r4k_blast_dcache();
|
||||
} else {
|
||||
R4600_HIT_CACHEOP_WAR_IMPL;
|
||||
@@ -661,7 +702,8 @@ static inline void local_r4k_flush_icach
|
||||
}
|
||||
}
|
||||
|
||||
- if (end - start > icache_size)
|
||||
+ if (type == R4K_INDEX ||
|
||||
+ (type & R4K_INDEX && end - start > icache_size))
|
||||
r4k_blast_icache();
|
||||
else {
|
||||
switch (boot_cpu_type()) {
|
||||
@@ -687,23 +729,59 @@ static inline void local_r4k_flush_icach
|
||||
#endif
|
||||
}
|
||||
|
||||
+static inline void local_r4k_flush_icache_range(unsigned long start,
|
||||
+ unsigned long end)
|
||||
+{
|
||||
+ __local_r4k_flush_icache_range(start, end, R4K_KERN | R4K_INDEX);
|
||||
+}
|
||||
+
|
||||
static inline void local_r4k_flush_icache_range_ipi(void *args)
|
||||
{
|
||||
struct flush_icache_range_args *fir_args = args;
|
||||
unsigned long start = fir_args->start;
|
||||
unsigned long end = fir_args->end;
|
||||
+ unsigned int type = fir_args->type;
|
||||
|
||||
- local_r4k_flush_icache_range(start, end);
|
||||
+ __local_r4k_flush_icache_range(start, end, type);
|
||||
}
|
||||
|
||||
static void r4k_flush_icache_range(unsigned long start, unsigned long end)
|
||||
{
|
||||
struct flush_icache_range_args args;
|
||||
+ unsigned long size, cache_size;
|
||||
|
||||
args.start = start;
|
||||
args.end = end;
|
||||
+ args.type = R4K_KERN | R4K_INDEX;
|
||||
|
||||
- r4k_on_each_cpu(local_r4k_flush_icache_range_ipi, &args);
|
||||
+ if (in_atomic()) {
|
||||
+ /*
|
||||
+ * We can't do blocking IPI calls from atomic context, so fall
|
||||
+ * back to pure address-based cache ops if they globalize.
|
||||
+ */
|
||||
+ if (!r4k_index_globalized && r4k_hit_globalized) {
|
||||
+ args.type &= ~R4K_INDEX;
|
||||
+ } else {
|
||||
+ /* Just do it locally instead. */
|
||||
+ local_r4k_flush_icache_range(start, end);
|
||||
+ instruction_hazard();
|
||||
+ return;
|
||||
+ }
|
||||
+ } else if (!r4k_index_globalized && r4k_hit_globalized) {
|
||||
+ /*
|
||||
+ * If address-based cache ops are globalized, then we may be
|
||||
+ * able to avoid the IPI for small flushes.
|
||||
+ */
|
||||
+ size = start - end;
|
||||
+ cache_size = icache_size;
|
||||
+ if (!cpu_has_ic_fills_f_dc) {
|
||||
+ size *= 2;
|
||||
+ cache_size += dcache_size;
|
||||
+ }
|
||||
+ if (size <= cache_size)
|
||||
+ args.type &= ~R4K_INDEX;
|
||||
+ }
|
||||
+ r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args);
|
||||
instruction_hazard();
|
||||
}
|
||||
|
||||
@@ -823,7 +901,12 @@ static void local_r4k_flush_cache_sigtra
|
||||
|
||||
static void r4k_flush_cache_sigtramp(unsigned long addr)
|
||||
{
|
||||
- r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr);
|
||||
+ /*
|
||||
+ * FIXME this is a bit broken when !r4k_hit_globalized, since the user
|
||||
+ * code probably won't be mapped on other CPUs, so if the process is
|
||||
+ * migrated, it could end up hitting stale icache lines.
|
||||
+ */
|
||||
+ r4k_on_each_cpu(R4K_USER, local_r4k_flush_cache_sigtramp, (void *)addr);
|
||||
}
|
||||
|
||||
static void r4k_flush_icache_all(void)
|
||||
@@ -837,6 +920,15 @@ struct flush_kernel_vmap_range_args {
|
||||
int size;
|
||||
};
|
||||
|
||||
+static inline void local_r4k_flush_kernel_vmap_range_index(void *args)
|
||||
+{
|
||||
+ /*
|
||||
+ * Aliases only affect the primary caches so don't bother with
|
||||
+ * S-caches or T-caches.
|
||||
+ */
|
||||
+ r4k_blast_dcache();
|
||||
+}
|
||||
+
|
||||
static inline void local_r4k_flush_kernel_vmap_range(void *args)
|
||||
{
|
||||
struct flush_kernel_vmap_range_args *vmra = args;
|
||||
@@ -847,12 +939,8 @@ static inline void local_r4k_flush_kerne
|
||||
* Aliases only affect the primary caches so don't bother with
|
||||
* S-caches or T-caches.
|
||||
*/
|
||||
- if (cpu_has_safe_index_cacheops && size >= dcache_size)
|
||||
- r4k_blast_dcache();
|
||||
- else {
|
||||
- R4600_HIT_CACHEOP_WAR_IMPL;
|
||||
- blast_dcache_range(vaddr, vaddr + size);
|
||||
- }
|
||||
+ R4600_HIT_CACHEOP_WAR_IMPL;
|
||||
+ blast_dcache_range(vaddr, vaddr + size);
|
||||
}
|
||||
|
||||
static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size)
|
||||
@@ -862,7 +950,12 @@ static void r4k_flush_kernel_vmap_range(
|
||||
args.vaddr = (unsigned long) vaddr;
|
||||
args.size = size;
|
||||
|
||||
- r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args);
|
||||
+ if (cpu_has_safe_index_cacheops && size >= dcache_size)
|
||||
+ r4k_on_each_cpu(R4K_INDEX,
|
||||
+ local_r4k_flush_kernel_vmap_range_index, NULL);
|
||||
+ else
|
||||
+ r4k_on_each_cpu(R4K_KERN, local_r4k_flush_kernel_vmap_range,
|
||||
+ &args);
|
||||
}
|
||||
|
||||
static inline void rm7k_erratum31(void)
|
@ -0,0 +1,37 @@ |
||||
From: James Hogan <james.hogan@imgtec.com>
|
||||
Date: Thu, 3 Mar 2016 21:30:42 +0000
|
||||
Subject: [PATCH] MIPS: c-r4k: Exclude sibling CPUs in SMP calls
|
||||
|
||||
When performing SMP calls to foreign cores, exclude sibling CPUs from
|
||||
the provided map, as we already handle the local core on the current
|
||||
CPU. This prevents an IPI call from for example core 0, VPE 1 to VPE 0
|
||||
on the same core.
|
||||
|
||||
Signed-off-by: James Hogan <james.hogan@imgtec.com>
|
||||
Cc: Ralf Baechle <ralf@linux-mips.org>
|
||||
Cc: Paul Burton <paul.burton@imgtec.com>
|
||||
Cc: linux-mips@linux-mips.org
|
||||
---
|
||||
|
||||
--- a/arch/mips/mm/c-r4k.c
|
||||
+++ b/arch/mips/mm/c-r4k.c
|
||||
@@ -96,8 +96,17 @@ static inline void r4k_on_each_cpu(unsig
|
||||
void (*func) (void *info), void *info)
|
||||
{
|
||||
preempt_disable();
|
||||
- if (r4k_op_needs_ipi(type))
|
||||
- smp_call_function_many(&cpu_foreign_map, func, info, 1);
|
||||
+ /* cpu_foreign_map and cpu_sibling_map[] undeclared when !CONFIG_SMP */
|
||||
+#ifdef CONFIG_SMP
|
||||
+ if (r4k_op_needs_ipi(type)) {
|
||||
+ struct cpumask mask;
|
||||
+
|
||||
+ /* exclude sibling CPUs */
|
||||
+ cpumask_andnot(&mask, &cpu_foreign_map,
|
||||
+ &cpu_sibling_map[smp_processor_id()]);
|
||||
+ smp_call_function_many(&mask, func, info, 1);
|
||||
+ }
|
||||
+#endif
|
||||
func(info);
|
||||
preempt_enable();
|
||||
}
|
@ -1,17 +0,0 @@ |
||||
Fix crash on cache flush with the MT_SMP variant
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
|
||||
--- a/arch/mips/mm/c-r4k.c
|
||||
+++ b/arch/mips/mm/c-r4k.c
|
||||
@@ -60,8 +60,10 @@ static inline void r4k_on_each_cpu(void
|
||||
* to restrict that call when a CM is not present because both
|
||||
* CM-based SMP protocols (CMP & CPS) restrict index-based cache ops.
|
||||
*/
|
||||
+#ifndef CONFIG_MIPS_MT_SMP
|
||||
if (!mips_cm_present())
|
||||
smp_call_function_many(&cpu_foreign_map, func, info, 1);
|
||||
+#endif
|
||||
func(info);
|
||||
preempt_enable();
|
||||
}
|
Loading…
Reference in new issue