arch/arc/mm/cache.c

   1 /*
   2  * ARC Cache Management
   3  *
   4  * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
   5  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License version 2 as
   9  * published by the Free Software Foundation.
  10  */
  11
  12 #include <linux/module.h>
  13 #include <linux/mm.h>
  14 #include <linux/sched.h>
  15 #include <linux/cache.h>
  16 #include <linux/mmu_context.h>
  17 #include <linux/syscalls.h>
  18 #include <linux/uaccess.h>
  19 #include <linux/pagemap.h>
  20 #include <asm/cacheflush.h>
  21 #include <asm/cachectl.h>
  22 #include <asm/setup.h>
  23
  24 static int l2_line_sz;
  25 int ioc_exists;
  26 volatile int slc_enable = 1, ioc_enable = 1;
  27 unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
  28
  29 void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
  30                                unsigned long sz, const int cacheop);
  31
  32 void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz);
  33 void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz);
  34 void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz);
  35
  36 char *arc_cache_mumbojumbo(int c, char *buf, int len)
  37 {
  38         int n = 0;
  39         struct cpuinfo_arc_cache *p;
  40
  41 #define PR_CACHE(p, cfg, str)                                           \
  42         if (!(p)->ver)                                                  \
  43                 n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");     \
  44         else                                                            \
  45                 n += scnprintf(buf + n, len - n,                        \
  46                         str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",  \
  47                         (p)->sz_k, (p)->assoc, (p)->line_len,           \
  48                         (p)->vipt ? "VIPT" : "PIPT",                    \
  49                         (p)->alias ? " aliasing" : "",                  \
  50                         IS_USED_CFG(cfg));
  51
  52         PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
  53         PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
  54
  55         if (!is_isa_arcv2())
  56                 return buf;
  57
  58         p = &cpuinfo_arc700[c].slc;
  59         if (p->ver)
  60                 n += scnprintf(buf + n, len - n,
  61                                "SLC\t\t: %uK, %uB Line%s\n",
  62                                p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
  63
  64         if (ioc_exists)
  65                 n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n",
  66                                 IS_DISABLED_RUN(ioc_enable));
  67
  68         return buf;
  69 }
  70
  71 /*
  72  * Read the Cache Build Confuration Registers, Decode them and save into
  73  * the cpuinfo structure for later use.
  74  * No Validation done here, simply read/convert the BCRs
  75  */
  76 static void read_decode_cache_bcr_arcv2(int cpu)
  77 {
  78         struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
  79         struct bcr_generic uncached_space;
  80         struct bcr_generic sbcr;
  81
  82         struct bcr_slc_cfg {
  83 #ifdef CONFIG_CPU_BIG_ENDIAN
  84                 unsigned int pad:24, way:2, lsz:2, sz:4;
  85 #else
  86                 unsigned int sz:4, lsz:2, way:2, pad:24;
  87 #endif
  88         } slc_cfg;
  89
  90         struct bcr_clust_cfg {
  91 #ifdef CONFIG_CPU_BIG_ENDIAN
  92                 unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
  93 #else
  94                 unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
  95 #endif
  96         } cbcr;
  97
  98         READ_BCR(ARC_REG_SLC_BCR, sbcr);
  99         if (sbcr.ver) {
 100                 READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
 101                 p_slc->ver = sbcr.ver;
 102                 p_slc->sz_k = 128 << slc_cfg.sz;
 103                 l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
 104         }
 105
 106         READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
 107         if (cbcr.c && ioc_enable)
 108                 ioc_exists = 1;
 109
 110         /* Legacy Data Uncached BCR is deprecated from v3 onwards */
 111         READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
 112         if (uncached_space.ver > 2)
 113                 perip_base = read_aux_reg(AUX_NON_VOL) & 0xF0000000;
 114 }
 115
 116 void read_decode_cache_bcr(void)
 117 {
 118         struct cpuinfo_arc_cache *p_ic, *p_dc;
 119         unsigned int cpu = smp_processor_id();
 120         struct bcr_cache {
 121 #ifdef CONFIG_CPU_BIG_ENDIAN
 122                 unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
 123 #else
 124                 unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
 125 #endif
 126         } ibcr, dbcr;
 127
 128         p_ic = &cpuinfo_arc700[cpu].icache;
 129         READ_BCR(ARC_REG_IC_BCR, ibcr);
 130
 131         if (!ibcr.ver)
 132                 goto dc_chk;
 133
 134         if (ibcr.ver <= 3) {
 135                 BUG_ON(ibcr.config != 3);
 136                 p_ic->assoc = 2;                /* Fixed to 2w set assoc */
 137         } else if (ibcr.ver >= 4) {
 138                 p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */
 139         }
 140
 141         p_ic->line_len = 8 << ibcr.line_len;
 142         p_ic->sz_k = 1 << (ibcr.sz - 1);
 143         p_ic->ver = ibcr.ver;
 144         p_ic->vipt = 1;
 145         p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
 146
 147 dc_chk:
 148         p_dc = &cpuinfo_arc700[cpu].dcache;
 149         READ_BCR(ARC_REG_DC_BCR, dbcr);
 150
 151         if (!dbcr.ver)
 152                 goto slc_chk;
 153
 154         if (dbcr.ver <= 3) {
 155                 BUG_ON(dbcr.config != 2);
 156                 p_dc->assoc = 4;                /* Fixed to 4w set assoc */
 157                 p_dc->vipt = 1;
 158                 p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
 159         } else if (dbcr.ver >= 4) {
 160                 p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */
 161                 p_dc->vipt = 0;
 162                 p_dc->alias = 0;                /* PIPT so can't VIPT alias */
 163         }
 164
 165         p_dc->line_len = 16 << dbcr.line_len;
 166         p_dc->sz_k = 1 << (dbcr.sz - 1);
 167         p_dc->ver = dbcr.ver;
 168
 169 slc_chk:
 170         if (is_isa_arcv2())
 171                 read_decode_cache_bcr_arcv2(cpu);
 172 }
 173
 174 /*
 175  * Line Operation on {I,D}-Cache
 176  */
 177
 178 #define OP_INV          0x1
 179 #define OP_FLUSH        0x2
 180 #define OP_FLUSH_N_INV  0x3
 181 #define OP_INV_IC       0x4
 182
 183 /*
 184  *              I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
 185  *
 186  * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
 187  * The orig Cache Management Module "CDU" only required paddr to invalidate a
 188  * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
 189  * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
 190  * the exact same line.
 191  *
 192  * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
 193  * paddr alone could not be used to correctly index the cache.
 194  *
 195  * ------------------
 196  * MMU v1/v2 (Fixed Page Size 8k)
 197  * ------------------
 198  * The solution was to provide CDU with these additonal vaddr bits. These
 199  * would be bits [x:13], x would depend on cache-geometry, 13 comes from
 200  * standard page size of 8k.
 201  * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
 202  * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
 203  * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
 204  * represent the offset within cache-line. The adv of using this "clumsy"
 205  * interface for additional info was no new reg was needed in CDU programming
 206  * model.
 207  *
 208  * 17:13 represented the max num of bits passable, actual bits needed were
 209  * fewer, based on the num-of-aliases possible.
 210  * -for 2 alias possibility, only bit 13 needed (32K cache)
 211  * -for 4 alias possibility, bits 14:13 needed (64K cache)
 212  *
 213  * ------------------
 214  * MMU v3
 215  * ------------------
 216  * This ver of MMU supports variable page sizes (1k-16k): although Linux will
 217  * only support 8k (default), 16k and 4k.
 218  * However from hardware perspective, smaller page sizes aggravate aliasing
 219  * meaning more vaddr bits needed to disambiguate the cache-line-op ;
 220  * the existing scheme of piggybacking won't work for certain configurations.
 221  * Two new registers IC_PTAG and DC_PTAG inttoduced.
 222  * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
 223  */
 224
 225 static inline
 226 void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
 227                           unsigned long sz, const int op)
 228 {
 229         unsigned int aux_cmd;
 230         int num_lines;
 231         const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 232
 233         if (op == OP_INV_IC) {
 234                 aux_cmd = ARC_REG_IC_IVIL;
 235         } else {
 236                 /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
 237                 aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
 238         }
 239
 240         /* Ensure we properly floor/ceil the non-line aligned/sized requests
 241          * and have @paddr - aligned to cache line and integral @num_lines.
 242          * This however can be avoided for page sized since:
 243          *  -@paddr will be cache-line aligned already (being page aligned)
 244          *  -@sz will be integral multiple of line size (being page sized).
 245          */
 246         if (!full_page) {
 247                 sz += paddr & ~CACHE_LINE_MASK;
 248                 paddr &= CACHE_LINE_MASK;
 249                 vaddr &= CACHE_LINE_MASK;
 250         }
 251
 252         num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 253
 254         /* MMUv2 and before: paddr contains stuffed vaddrs bits */
 255         paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
 256
 257         while (num_lines-- > 0) {
 258                 write_aux_reg(aux_cmd, paddr);
 259                 paddr += L1_CACHE_BYTES;
 260         }
 261 }
 262
 263 /*
 264  * For ARC700 MMUv3 I-cache and D-cache flushes
 265  * Also reused for HS38 aliasing I-cache configuration
 266  */
 267 static inline
 268 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 269                           unsigned long sz, const int op)
 270 {
 271         unsigned int aux_cmd, aux_tag;
 272         int num_lines;
 273         const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 274
 275         if (op == OP_INV_IC) {
 276                 aux_cmd = ARC_REG_IC_IVIL;
 277                 aux_tag = ARC_REG_IC_PTAG;
 278         } else {
 279                 aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
 280                 aux_tag = ARC_REG_DC_PTAG;
 281         }
 282
 283         /* Ensure we properly floor/ceil the non-line aligned/sized requests
 284          * and have @paddr - aligned to cache line and integral @num_lines.
 285          * This however can be avoided for page sized since:
 286          *  -@paddr will be cache-line aligned already (being page aligned)
 287          *  -@sz will be integral multiple of line size (being page sized).
 288          */
 289         if (!full_page) {
 290                 sz += paddr & ~CACHE_LINE_MASK;
 291                 paddr &= CACHE_LINE_MASK;
 292                 vaddr &= CACHE_LINE_MASK;
 293         }
 294         num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 295
 296         /*
 297          * MMUv3, cache ops require paddr in PTAG reg
 298          * if V-P const for loop, PTAG can be written once outside loop
 299          */
 300         if (full_page)
 301                 write_aux_reg(aux_tag, paddr);
 302
 303         /*
 304          * This is technically for MMU v4, using the MMU v3 programming model
 305          * Special work for HS38 aliasing I-cache configuration with PAE40
 306          *   - upper 8 bits of paddr need to be written into PTAG_HI
 307          *   - (and needs to be written before the lower 32 bits)
 308          * Note that PTAG_HI is hoisted outside the line loop
 309          */
 310         if (is_pae40_enabled() && op == OP_INV_IC)
 311                 write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
 312
 313         while (num_lines-- > 0) {
 314                 if (!full_page) {
 315                         write_aux_reg(aux_tag, paddr);
 316                         paddr += L1_CACHE_BYTES;
 317                 }
 318
 319                 write_aux_reg(aux_cmd, vaddr);
 320                 vaddr += L1_CACHE_BYTES;
 321         }
 322 }
 323
 324 /*
 325  * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
 326  * Here's how cache ops are implemented
 327  *
 328  *  - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
 329  *  - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
 330  *  - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
 331  *    respectively, similar to MMU v3 programming model, hence
 332  *    __cache_line_loop_v3() is used)
 333  *
 334  * If PAE40 is enabled, independent of aliasing considerations, the higher bits
 335  * needs to be written into PTAG_HI
 336  */
 337 static inline
 338 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 339                           unsigned long sz, const int cacheop)
 340 {
 341         unsigned int aux_cmd;
 342         int num_lines;
 343         const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 344
 345         if (cacheop == OP_INV_IC) {
 346                 aux_cmd = ARC_REG_IC_IVIL;
 347         } else {
 348                 /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
 349                 aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
 350         }
 351
 352         /* Ensure we properly floor/ceil the non-line aligned/sized requests
 353          * and have @paddr - aligned to cache line and integral @num_lines.
 354          * This however can be avoided for page sized since:
 355          *  -@paddr will be cache-line aligned already (being page aligned)
 356          *  -@sz will be integral multiple of line size (being page sized).
 357          */
 358         if (!full_page_op) {
 359                 sz += paddr & ~CACHE_LINE_MASK;
 360                 paddr &= CACHE_LINE_MASK;
 361         }
 362
 363         num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 364
 365         /*
 366          * For HS38 PAE40 configuration
 367          *   - upper 8 bits of paddr need to be written into PTAG_HI
 368          *   - (and needs to be written before the lower 32 bits)
 369          */
 370         if (is_pae40_enabled()) {
 371                 if (cacheop == OP_INV_IC)
 372                         /*
 373                          * Non aliasing I-cache in HS38,
 374                          * aliasing I-cache handled in __cache_line_loop_v3()
 375                          */
 376                         write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
 377                 else
 378                         write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
 379         }
 380
 381         while (num_lines-- > 0) {
 382                 write_aux_reg(aux_cmd, paddr);
 383                 paddr += L1_CACHE_BYTES;
 384         }
 385 }
 386
 387 #if (CONFIG_ARC_MMU_VER < 3)
 388 #define __cache_line_loop       __cache_line_loop_v2
 389 #elif (CONFIG_ARC_MMU_VER == 3)
 390 #define __cache_line_loop       __cache_line_loop_v3
 391 #elif (CONFIG_ARC_MMU_VER > 3)
 392 #define __cache_line_loop       __cache_line_loop_v4
 393 #endif
 394
 395 #ifdef CONFIG_ARC_HAS_DCACHE
 396
 397 /***************************************************************
 398  * Machine specific helpers for Entire D-Cache or Per Line ops
 399  */
 400
 401 static inline void __before_dc_op(const int op)
 402 {
 403         if (op == OP_FLUSH_N_INV) {
 404                 /* Dcache provides 2 cmd: FLUSH or INV
 405                  * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
 406                  * flush-n-inv is achieved by INV cmd but with IM=1
 407                  * So toggle INV sub-mode depending on op request and default
 408                  */
 409                 const unsigned int ctl = ARC_REG_DC_CTRL;
 410                 write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH);
 411         }
 412 }
 413
 414 static inline void __after_dc_op(const int op)
 415 {
 416         if (op & OP_FLUSH) {
 417                 const unsigned int ctl = ARC_REG_DC_CTRL;
 418                 unsigned int reg;
 419
 420                 /* flush / flush-n-inv both wait */
 421                 while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS)
 422                         ;
 423
 424                 /* Switch back to default Invalidate mode */
 425                 if (op == OP_FLUSH_N_INV)
 426                         write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH);
 427         }
 428 }
 429
 430 /*
 431  * Operation on Entire D-Cache
 432  * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
 433  * Note that constant propagation ensures all the checks are gone
 434  * in generated code
 435  */
 436 static inline void __dc_entire_op(const int op)
 437 {
 438         int aux;
 439
 440         __before_dc_op(op);
 441
 442         if (op & OP_INV)        /* Inv or flush-n-inv use same cmd reg */
 443                 aux = ARC_REG_DC_IVDC;
 444         else
 445                 aux = ARC_REG_DC_FLSH;
 446
 447         write_aux_reg(aux, 0x1);
 448
 449         __after_dc_op(op);
 450 }
 451
 452 /* For kernel mappings cache operation: index is same as paddr */
 453 #define __dc_line_op_k(p, sz, op)       __dc_line_op(p, p, sz, op)
 454
 455 /*
 456  * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback)
 457  */
 458 static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr,
 459                                 unsigned long sz, const int op)
 460 {
 461         unsigned long flags;
 462
 463         local_irq_save(flags);
 464
 465         __before_dc_op(op);
 466
 467         __cache_line_loop(paddr, vaddr, sz, op);
 468
 469         __after_dc_op(op);
 470
 471         local_irq_restore(flags);
 472 }
 473
 474 #else
 475
 476 #define __dc_entire_op(op)
 477 #define __dc_line_op(paddr, vaddr, sz, op)
 478 #define __dc_line_op_k(paddr, sz, op)
 479
 480 #endif /* CONFIG_ARC_HAS_DCACHE */
 481
 482 #ifdef CONFIG_ARC_HAS_ICACHE
 483
 484 static inline void __ic_entire_inv(void)
 485 {
 486         write_aux_reg(ARC_REG_IC_IVIC, 1);
 487         read_aux_reg(ARC_REG_IC_CTRL);  /* blocks */
 488 }
 489
 490 static inline void
 491 __ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr,
 492                           unsigned long sz)
 493 {
 494         unsigned long flags;
 495
 496         local_irq_save(flags);
 497         (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC);
 498         local_irq_restore(flags);
 499 }
 500
 501 #ifndef CONFIG_SMP
 502
 503 #define __ic_line_inv_vaddr(p, v, s)    __ic_line_inv_vaddr_local(p, v, s)
 504
 505 #else
 506
 507 struct ic_inv_args {
 508         phys_addr_t paddr, vaddr;
 509         int sz;
 510 };
 511
 512 static void __ic_line_inv_vaddr_helper(void *info)
 513 {
 514         struct ic_inv_args *ic_inv = info;
 515
 516         __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
 517 }
 518
 519 static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
 520                                 unsigned long sz)
 521 {
 522         struct ic_inv_args ic_inv = {
 523                 .paddr = paddr,
 524                 .vaddr = vaddr,
 525                 .sz    = sz
 526         };
 527
 528         on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1);
 529 }
 530
 531 #endif  /* CONFIG_SMP */
 532
 533 #else   /* !CONFIG_ARC_HAS_ICACHE */
 534
 535 #define __ic_entire_inv()
 536 #define __ic_line_inv_vaddr(pstart, vstart, sz)
 537
 538 #endif /* CONFIG_ARC_HAS_ICACHE */
 539
 540 noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
 541 {
 542 #ifdef CONFIG_ISA_ARCV2
 543         /*
 544          * SLC is shared between all cores and concurrent aux operations from
 545          * multiple cores need to be serialized using a spinlock
 546          * A concurrent operation can be silently ignored and/or the old/new
 547          * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop
 548          * below)
 549          */
 550         static DEFINE_SPINLOCK(lock);
 551         unsigned long flags;
 552         unsigned int ctrl;
 553
 554         spin_lock_irqsave(&lock, flags);
 555
 556         /*
 557          * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
 558          *  - b'000 (default) is Flush,
 559          *  - b'001 is Invalidate if CTRL.IM == 0
 560          *  - b'001 is Flush-n-Invalidate if CTRL.IM == 1
 561          */
 562         ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
 563
 564         /* Don't rely on default value of IM bit */
 565         if (!(op & OP_FLUSH))           /* i.e. OP_INV */
 566                 ctrl &= ~SLC_CTRL_IM;   /* clear IM: Disable flush before Inv */
 567         else
 568                 ctrl |= SLC_CTRL_IM;
 569
 570         if (op & OP_INV)
 571                 ctrl |= SLC_CTRL_RGN_OP_INV;    /* Inv or flush-n-inv */
 572         else
 573                 ctrl &= ~SLC_CTRL_RGN_OP_INV;
 574
 575         write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
 576
 577         /*
 578          * Lower bits are ignored, no need to clip
 579          * END needs to be setup before START (latter triggers the operation)
 580          * END can't be same as START, so add (l2_line_sz - 1) to sz
 581          */
 582         write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1));
 583         write_aux_reg(ARC_REG_SLC_RGN_START, paddr);
 584
 585         while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
 586
 587         spin_unlock_irqrestore(&lock, flags);
 588 #endif
 589 }
 590
 591 /***********************************************************
 592  * Exported APIs
 593  */
 594
 595 /*
 596  * Handle cache congruency of kernel and userspace mappings of page when kernel
 597  * writes-to/reads-from
 598  *
 599  * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
 600  *  -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
 601  *  -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
 602  *  -In SMP, if hardware caches are coherent
 603  *
 604  * There's a corollary case, where kernel READs from a userspace mapped page.
 605  * If the U-mapping is not congruent to to K-mapping, former needs flushing.
 606  */
 607 void flush_dcache_page(struct page *page)
 608 {
 609         struct address_space *mapping;
 610
 611         if (!cache_is_vipt_aliasing()) {
 612                 clear_bit(PG_dc_clean, &page->flags);
 613                 return;
 614         }
 615
 616         /* don't handle anon pages here */
 617         mapping = page_mapping(page);
 618         if (!mapping)
 619                 return;
 620
 621         /*
 622          * pagecache page, file not yet mapped to userspace
 623          * Make a note that K-mapping is dirty
 624          */
 625         if (!mapping_mapped(mapping)) {
 626                 clear_bit(PG_dc_clean, &page->flags);
 627         } else if (page_mapcount(page)) {
 628
 629                 /* kernel reading from page with U-mapping */
 630                 phys_addr_t paddr = (unsigned long)page_address(page);
 631                 unsigned long vaddr = page->index << PAGE_SHIFT;
 632
 633                 if (addr_not_cache_congruent(paddr, vaddr))
 634                         __flush_dcache_page(paddr, vaddr);
 635         }
 636 }
 637 EXPORT_SYMBOL(flush_dcache_page);
 638
 639 /*
 640  * DMA ops for systems with L1 cache only
 641  * Make memory coherent with L1 cache by flushing/invalidating L1 lines
 642  */
 643 static void __dma_cache_wback_inv_l1(phys_addr_t start, unsigned long sz)
 644 {
 645         __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
 646 }
 647
 648 static void __dma_cache_inv_l1(phys_addr_t start, unsigned long sz)
 649 {
 650         __dc_line_op_k(start, sz, OP_INV);
 651 }
 652
 653 static void __dma_cache_wback_l1(phys_addr_t start, unsigned long sz)
 654 {
 655         __dc_line_op_k(start, sz, OP_FLUSH);
 656 }
 657
 658 /*
 659  * DMA ops for systems with both L1 and L2 caches, but without IOC
 660  * Both L1 and L2 lines need to be explicitly flushed/invalidated
 661  */
 662 static void __dma_cache_wback_inv_slc(phys_addr_t start, unsigned long sz)
 663 {
 664         __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
 665         slc_op(start, sz, OP_FLUSH_N_INV);
 666 }
 667
 668 static void __dma_cache_inv_slc(phys_addr_t start, unsigned long sz)
 669 {
 670         __dc_line_op_k(start, sz, OP_INV);
 671         slc_op(start, sz, OP_INV);
 672 }
 673
 674 static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz)
 675 {
 676         __dc_line_op_k(start, sz, OP_FLUSH);
 677         slc_op(start, sz, OP_FLUSH);
 678 }
 679
 680 /*
 681  * DMA ops for systems with IOC
 682  * IOC hardware snoops all DMA traffic keeping the caches consistent with
 683  * memory - eliding need for any explicit cache maintenance of DMA buffers
 684  */
 685 static void __dma_cache_wback_inv_ioc(phys_addr_t start, unsigned long sz) {}
 686 static void __dma_cache_inv_ioc(phys_addr_t start, unsigned long sz) {}
 687 static void __dma_cache_wback_ioc(phys_addr_t start, unsigned long sz) {}
 688
 689 /*
 690  * Exported DMA API
 691  */
 692 void dma_cache_wback_inv(phys_addr_t start, unsigned long sz)
 693 {
 694         __dma_cache_wback_inv(start, sz);
 695 }
 696 EXPORT_SYMBOL(dma_cache_wback_inv);
 697
 698 void dma_cache_inv(phys_addr_t start, unsigned long sz)
 699 {
 700         __dma_cache_inv(start, sz);
 701 }
 702 EXPORT_SYMBOL(dma_cache_inv);
 703
 704 void dma_cache_wback(phys_addr_t start, unsigned long sz)
 705 {
 706         __dma_cache_wback(start, sz);
 707 }
 708 EXPORT_SYMBOL(dma_cache_wback);
 709
 710 /*
 711  * This is API for making I/D Caches consistent when modifying
 712  * kernel code (loadable modules, kprobes, kgdb...)
 713  * This is called on insmod, with kernel virtual address for CODE of
 714  * the module. ARC cache maintenance ops require PHY address thus we
 715  * need to convert vmalloc addr to PHY addr
 716  */
 717 void flush_icache_range(unsigned long kstart, unsigned long kend)
 718 {
 719         unsigned int tot_sz;
 720
 721         WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
 722
 723         /* Shortcut for bigger flush ranges.
 724          * Here we don't care if this was kernel virtual or phy addr
 725          */
 726         tot_sz = kend - kstart;
 727         if (tot_sz > PAGE_SIZE) {
 728                 flush_cache_all();
 729                 return;
 730         }
 731
 732         /* Case: Kernel Phy addr (0x8000_0000 onwards) */
 733         if (likely(kstart > PAGE_OFFSET)) {
 734                 /*
 735                  * The 2nd arg despite being paddr will be used to index icache
 736                  * This is OK since no alternate virtual mappings will exist
 737                  * given the callers for this case: kprobe/kgdb in built-in
 738                  * kernel code only.
 739                  */
 740                 __sync_icache_dcache(kstart, kstart, kend - kstart);
 741                 return;
 742         }
 743
 744         /*
 745          * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
 746          * (1) ARC Cache Maintenance ops only take Phy addr, hence special
 747          *     handling of kernel vaddr.
 748          *
 749          * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
 750          *     it still needs to handle  a 2 page scenario, where the range
 751          *     straddles across 2 virtual pages and hence need for loop
 752          */
 753         while (tot_sz > 0) {
 754                 unsigned int off, sz;
 755                 unsigned long phy, pfn;
 756
 757                 off = kstart % PAGE_SIZE;
 758                 pfn = vmalloc_to_pfn((void *)kstart);
 759                 phy = (pfn << PAGE_SHIFT) + off;
 760                 sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
 761                 __sync_icache_dcache(phy, kstart, sz);
 762                 kstart += sz;
 763                 tot_sz -= sz;
 764         }
 765 }
 766 EXPORT_SYMBOL(flush_icache_range);
 767
 768 /*
 769  * General purpose helper to make I and D cache lines consistent.
 770  * @paddr is phy addr of region
 771  * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
 772  *    However in one instance, when called by kprobe (for a breakpt in
 773  *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
 774  *    use a paddr to index the cache (despite VIPT). This is fine since since a
 775  *    builtin kernel page will not have any virtual mappings.
 776  *    kprobe on loadable module will be kernel vaddr.
 777  */
 778 void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len)
 779 {
 780         __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
 781         __ic_line_inv_vaddr(paddr, vaddr, len);
 782 }
 783
 784 /* wrapper to compile time eliminate alignment checks in flush loop */
 785 void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr)
 786 {
 787         __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
 788 }
 789
 790 /*
 791  * wrapper to clearout kernel or userspace mappings of a page
 792  * For kernel mappings @vaddr == @paddr
 793  */
 794 void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr)
 795 {
 796         __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
 797 }
 798
 799 noinline void flush_cache_all(void)
 800 {
 801         unsigned long flags;
 802
 803         local_irq_save(flags);
 804
 805         __ic_entire_inv();
 806         __dc_entire_op(OP_FLUSH_N_INV);
 807
 808         local_irq_restore(flags);
 809
 810 }
 811
 812 #ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
 813
 814 void flush_cache_mm(struct mm_struct *mm)
 815 {
 816         flush_cache_all();
 817 }
 818
 819 void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
 820                       unsigned long pfn)
 821 {
 822         unsigned int paddr = pfn << PAGE_SHIFT;
 823
 824         u_vaddr &= PAGE_MASK;
 825
 826         __flush_dcache_page(paddr, u_vaddr);
 827
 828         if (vma->vm_flags & VM_EXEC)
 829                 __inv_icache_page(paddr, u_vaddr);
 830 }
 831
 832 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 833                        unsigned long end)
 834 {
 835         flush_cache_all();
 836 }
 837
 838 void flush_anon_page(struct vm_area_struct *vma, struct page *page,
 839                      unsigned long u_vaddr)
 840 {
 841         /* TBD: do we really need to clear the kernel mapping */
 842         __flush_dcache_page(page_address(page), u_vaddr);
 843         __flush_dcache_page(page_address(page), page_address(page));
 844
 845 }
 846
 847 #endif
 848
 849 void copy_user_highpage(struct page *to, struct page *from,
 850         unsigned long u_vaddr, struct vm_area_struct *vma)
 851 {
 852         void *kfrom = kmap_atomic(from);
 853         void *kto = kmap_atomic(to);
 854         int clean_src_k_mappings = 0;
 855
 856         /*
 857          * If SRC page was already mapped in userspace AND it's U-mapping is
 858          * not congruent with K-mapping, sync former to physical page so that
 859          * K-mapping in memcpy below, sees the right data
 860          *
 861          * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
 862          * equally valid for SRC page as well
 863          *
 864          * For !VIPT cache, all of this gets compiled out as
 865          * addr_not_cache_congruent() is 0
 866          */
 867         if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
 868                 __flush_dcache_page((unsigned long)kfrom, u_vaddr);
 869                 clean_src_k_mappings = 1;
 870         }
 871
 872         copy_page(kto, kfrom);
 873
 874         /*
 875          * Mark DST page K-mapping as dirty for a later finalization by
 876          * update_mmu_cache(). Although the finalization could have been done
 877          * here as well (given that both vaddr/paddr are available).
 878          * But update_mmu_cache() already has code to do that for other
 879          * non copied user pages (e.g. read faults which wire in pagecache page
 880          * directly).
 881          */
 882         clear_bit(PG_dc_clean, &to->flags);
 883
 884         /*
 885          * if SRC was already usermapped and non-congruent to kernel mapping
 886          * sync the kernel mapping back to physical page
 887          */
 888         if (clean_src_k_mappings) {
 889                 __flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom);
 890                 set_bit(PG_dc_clean, &from->flags);
 891         } else {
 892                 clear_bit(PG_dc_clean, &from->flags);
 893         }
 894
 895         kunmap_atomic(kto);
 896         kunmap_atomic(kfrom);
 897 }
 898
 899 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
 900 {
 901         clear_page(to);
 902         clear_bit(PG_dc_clean, &page->flags);
 903 }
 904
 905
 906 /**********************************************************************
 907  * Explicit Cache flush request from user space via syscall
 908  * Needed for JITs which generate code on the fly
 909  */
 910 SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
 911 {
 912         /* TBD: optimize this */
 913         flush_cache_all();
 914         return 0;
 915 }
 916
 917 void arc_cache_init(void)
 918 {
 919         unsigned int __maybe_unused cpu = smp_processor_id();
 920         char str[256];
 921
 922         printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
 923
 924         if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
 925                 struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 926
 927                 if (!ic->ver)
 928                         panic("cache support enabled but non-existent cache\n");
 929
 930                 if (ic->line_len != L1_CACHE_BYTES)
 931                         panic("ICache line [%d] != kernel Config [%d]",
 932                               ic->line_len, L1_CACHE_BYTES);
 933
 934                 if (ic->ver != CONFIG_ARC_MMU_VER)
 935                         panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
 936                               ic->ver, CONFIG_ARC_MMU_VER);
 937
 938                 /*
 939                  * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
 940                  * pair to provide vaddr/paddr respectively, just as in MMU v3
 941                  */
 942                 if (is_isa_arcv2() && ic->alias)
 943                         _cache_line_loop_ic_fn = __cache_line_loop_v3;
 944                 else
 945                         _cache_line_loop_ic_fn = __cache_line_loop;
 946         }
 947
 948         if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
 949                 struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
 950
 951                 if (!dc->ver)
 952                         panic("cache support enabled but non-existent cache\n");
 953
 954                 if (dc->line_len != L1_CACHE_BYTES)
 955                         panic("DCache line [%d] != kernel Config [%d]",
 956                               dc->line_len, L1_CACHE_BYTES);
 957
 958                 /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
 959                 if (is_isa_arcompact()) {
 960                         int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
 961
 962                         if (dc->alias && !handled)
 963                                 panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
 964                         else if (!dc->alias && handled)
 965                                 panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
 966                 }
 967         }
 968
 969         if (is_isa_arcv2() && l2_line_sz && !slc_enable) {
 970
 971                 /* IM set : flush before invalidate */
 972                 write_aux_reg(ARC_REG_SLC_CTRL,
 973                         read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_IM);
 974
 975                 write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
 976
 977                 /* Important to wait for flush to complete */
 978                 while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
 979                 write_aux_reg(ARC_REG_SLC_CTRL,
 980                         read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE);
 981         }
 982
 983         if (is_isa_arcv2() && ioc_exists) {
 984                 /* IO coherency base - 0x8z */
 985                 write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
 986                 /* IO coherency aperture size - 512Mb: 0x8z-0xAz */
 987                 write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, 0x11);
 988                 /* Enable partial writes */
 989                 write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1);
 990                 /* Enable IO coherency */
 991                 write_aux_reg(ARC_REG_IO_COH_ENABLE, 1);
 992
 993                 __dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
 994                 __dma_cache_inv = __dma_cache_inv_ioc;
 995                 __dma_cache_wback = __dma_cache_wback_ioc;
 996         } else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
 997                 __dma_cache_wback_inv = __dma_cache_wback_inv_slc;
 998                 __dma_cache_inv = __dma_cache_inv_slc;
 999                 __dma_cache_wback = __dma_cache_wback_slc;
1000         } else {
1001                 __dma_cache_wback_inv = __dma_cache_wback_inv_l1;
1002                 __dma_cache_inv = __dma_cache_inv_l1;
1003                 __dma_cache_wback = __dma_cache_wback_l1;
1004         }
1005 }