2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
73 uint64_t addr, unsigned count,
74 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
83 /* get temperature in millidegrees */
84 int ci_get_temp(struct radeon_device *rdev)
89 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95 actual_temp = temp & 0x1ff;
97 actual_temp = actual_temp * 1000;
102 /* get temperature in millidegrees */
103 int kv_get_temp(struct radeon_device *rdev)
108 temp = RREG32_SMC(0xC0300E0C);
111 actual_temp = (temp / 8) - 49;
115 actual_temp = actual_temp * 1000;
121 * Indirect registers accessor
123 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
128 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
129 WREG32(PCIE_INDEX, reg);
130 (void)RREG32(PCIE_INDEX);
131 r = RREG32(PCIE_DATA);
132 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
136 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
140 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
141 WREG32(PCIE_INDEX, reg);
142 (void)RREG32(PCIE_INDEX);
143 WREG32(PCIE_DATA, v);
144 (void)RREG32(PCIE_DATA);
145 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
148 static const u32 spectre_rlc_save_restore_register_list[] =
150 (0x0e00 << 16) | (0xc12c >> 2),
152 (0x0e00 << 16) | (0xc140 >> 2),
154 (0x0e00 << 16) | (0xc150 >> 2),
156 (0x0e00 << 16) | (0xc15c >> 2),
158 (0x0e00 << 16) | (0xc168 >> 2),
160 (0x0e00 << 16) | (0xc170 >> 2),
162 (0x0e00 << 16) | (0xc178 >> 2),
164 (0x0e00 << 16) | (0xc204 >> 2),
166 (0x0e00 << 16) | (0xc2b4 >> 2),
168 (0x0e00 << 16) | (0xc2b8 >> 2),
170 (0x0e00 << 16) | (0xc2bc >> 2),
172 (0x0e00 << 16) | (0xc2c0 >> 2),
174 (0x0e00 << 16) | (0x8228 >> 2),
176 (0x0e00 << 16) | (0x829c >> 2),
178 (0x0e00 << 16) | (0x869c >> 2),
180 (0x0600 << 16) | (0x98f4 >> 2),
182 (0x0e00 << 16) | (0x98f8 >> 2),
184 (0x0e00 << 16) | (0x9900 >> 2),
186 (0x0e00 << 16) | (0xc260 >> 2),
188 (0x0e00 << 16) | (0x90e8 >> 2),
190 (0x0e00 << 16) | (0x3c000 >> 2),
192 (0x0e00 << 16) | (0x3c00c >> 2),
194 (0x0e00 << 16) | (0x8c1c >> 2),
196 (0x0e00 << 16) | (0x9700 >> 2),
198 (0x0e00 << 16) | (0xcd20 >> 2),
200 (0x4e00 << 16) | (0xcd20 >> 2),
202 (0x5e00 << 16) | (0xcd20 >> 2),
204 (0x6e00 << 16) | (0xcd20 >> 2),
206 (0x7e00 << 16) | (0xcd20 >> 2),
208 (0x8e00 << 16) | (0xcd20 >> 2),
210 (0x9e00 << 16) | (0xcd20 >> 2),
212 (0xae00 << 16) | (0xcd20 >> 2),
214 (0xbe00 << 16) | (0xcd20 >> 2),
216 (0x0e00 << 16) | (0x89bc >> 2),
218 (0x0e00 << 16) | (0x8900 >> 2),
221 (0x0e00 << 16) | (0xc130 >> 2),
223 (0x0e00 << 16) | (0xc134 >> 2),
225 (0x0e00 << 16) | (0xc1fc >> 2),
227 (0x0e00 << 16) | (0xc208 >> 2),
229 (0x0e00 << 16) | (0xc264 >> 2),
231 (0x0e00 << 16) | (0xc268 >> 2),
233 (0x0e00 << 16) | (0xc26c >> 2),
235 (0x0e00 << 16) | (0xc270 >> 2),
237 (0x0e00 << 16) | (0xc274 >> 2),
239 (0x0e00 << 16) | (0xc278 >> 2),
241 (0x0e00 << 16) | (0xc27c >> 2),
243 (0x0e00 << 16) | (0xc280 >> 2),
245 (0x0e00 << 16) | (0xc284 >> 2),
247 (0x0e00 << 16) | (0xc288 >> 2),
249 (0x0e00 << 16) | (0xc28c >> 2),
251 (0x0e00 << 16) | (0xc290 >> 2),
253 (0x0e00 << 16) | (0xc294 >> 2),
255 (0x0e00 << 16) | (0xc298 >> 2),
257 (0x0e00 << 16) | (0xc29c >> 2),
259 (0x0e00 << 16) | (0xc2a0 >> 2),
261 (0x0e00 << 16) | (0xc2a4 >> 2),
263 (0x0e00 << 16) | (0xc2a8 >> 2),
265 (0x0e00 << 16) | (0xc2ac >> 2),
267 (0x0e00 << 16) | (0xc2b0 >> 2),
269 (0x0e00 << 16) | (0x301d0 >> 2),
271 (0x0e00 << 16) | (0x30238 >> 2),
273 (0x0e00 << 16) | (0x30250 >> 2),
275 (0x0e00 << 16) | (0x30254 >> 2),
277 (0x0e00 << 16) | (0x30258 >> 2),
279 (0x0e00 << 16) | (0x3025c >> 2),
281 (0x4e00 << 16) | (0xc900 >> 2),
283 (0x5e00 << 16) | (0xc900 >> 2),
285 (0x6e00 << 16) | (0xc900 >> 2),
287 (0x7e00 << 16) | (0xc900 >> 2),
289 (0x8e00 << 16) | (0xc900 >> 2),
291 (0x9e00 << 16) | (0xc900 >> 2),
293 (0xae00 << 16) | (0xc900 >> 2),
295 (0xbe00 << 16) | (0xc900 >> 2),
297 (0x4e00 << 16) | (0xc904 >> 2),
299 (0x5e00 << 16) | (0xc904 >> 2),
301 (0x6e00 << 16) | (0xc904 >> 2),
303 (0x7e00 << 16) | (0xc904 >> 2),
305 (0x8e00 << 16) | (0xc904 >> 2),
307 (0x9e00 << 16) | (0xc904 >> 2),
309 (0xae00 << 16) | (0xc904 >> 2),
311 (0xbe00 << 16) | (0xc904 >> 2),
313 (0x4e00 << 16) | (0xc908 >> 2),
315 (0x5e00 << 16) | (0xc908 >> 2),
317 (0x6e00 << 16) | (0xc908 >> 2),
319 (0x7e00 << 16) | (0xc908 >> 2),
321 (0x8e00 << 16) | (0xc908 >> 2),
323 (0x9e00 << 16) | (0xc908 >> 2),
325 (0xae00 << 16) | (0xc908 >> 2),
327 (0xbe00 << 16) | (0xc908 >> 2),
329 (0x4e00 << 16) | (0xc90c >> 2),
331 (0x5e00 << 16) | (0xc90c >> 2),
333 (0x6e00 << 16) | (0xc90c >> 2),
335 (0x7e00 << 16) | (0xc90c >> 2),
337 (0x8e00 << 16) | (0xc90c >> 2),
339 (0x9e00 << 16) | (0xc90c >> 2),
341 (0xae00 << 16) | (0xc90c >> 2),
343 (0xbe00 << 16) | (0xc90c >> 2),
345 (0x4e00 << 16) | (0xc910 >> 2),
347 (0x5e00 << 16) | (0xc910 >> 2),
349 (0x6e00 << 16) | (0xc910 >> 2),
351 (0x7e00 << 16) | (0xc910 >> 2),
353 (0x8e00 << 16) | (0xc910 >> 2),
355 (0x9e00 << 16) | (0xc910 >> 2),
357 (0xae00 << 16) | (0xc910 >> 2),
359 (0xbe00 << 16) | (0xc910 >> 2),
361 (0x0e00 << 16) | (0xc99c >> 2),
363 (0x0e00 << 16) | (0x9834 >> 2),
365 (0x0000 << 16) | (0x30f00 >> 2),
367 (0x0001 << 16) | (0x30f00 >> 2),
369 (0x0000 << 16) | (0x30f04 >> 2),
371 (0x0001 << 16) | (0x30f04 >> 2),
373 (0x0000 << 16) | (0x30f08 >> 2),
375 (0x0001 << 16) | (0x30f08 >> 2),
377 (0x0000 << 16) | (0x30f0c >> 2),
379 (0x0001 << 16) | (0x30f0c >> 2),
381 (0x0600 << 16) | (0x9b7c >> 2),
383 (0x0e00 << 16) | (0x8a14 >> 2),
385 (0x0e00 << 16) | (0x8a18 >> 2),
387 (0x0600 << 16) | (0x30a00 >> 2),
389 (0x0e00 << 16) | (0x8bf0 >> 2),
391 (0x0e00 << 16) | (0x8bcc >> 2),
393 (0x0e00 << 16) | (0x8b24 >> 2),
395 (0x0e00 << 16) | (0x30a04 >> 2),
397 (0x0600 << 16) | (0x30a10 >> 2),
399 (0x0600 << 16) | (0x30a14 >> 2),
401 (0x0600 << 16) | (0x30a18 >> 2),
403 (0x0600 << 16) | (0x30a2c >> 2),
405 (0x0e00 << 16) | (0xc700 >> 2),
407 (0x0e00 << 16) | (0xc704 >> 2),
409 (0x0e00 << 16) | (0xc708 >> 2),
411 (0x0e00 << 16) | (0xc768 >> 2),
413 (0x0400 << 16) | (0xc770 >> 2),
415 (0x0400 << 16) | (0xc774 >> 2),
417 (0x0400 << 16) | (0xc778 >> 2),
419 (0x0400 << 16) | (0xc77c >> 2),
421 (0x0400 << 16) | (0xc780 >> 2),
423 (0x0400 << 16) | (0xc784 >> 2),
425 (0x0400 << 16) | (0xc788 >> 2),
427 (0x0400 << 16) | (0xc78c >> 2),
429 (0x0400 << 16) | (0xc798 >> 2),
431 (0x0400 << 16) | (0xc79c >> 2),
433 (0x0400 << 16) | (0xc7a0 >> 2),
435 (0x0400 << 16) | (0xc7a4 >> 2),
437 (0x0400 << 16) | (0xc7a8 >> 2),
439 (0x0400 << 16) | (0xc7ac >> 2),
441 (0x0400 << 16) | (0xc7b0 >> 2),
443 (0x0400 << 16) | (0xc7b4 >> 2),
445 (0x0e00 << 16) | (0x9100 >> 2),
447 (0x0e00 << 16) | (0x3c010 >> 2),
449 (0x0e00 << 16) | (0x92a8 >> 2),
451 (0x0e00 << 16) | (0x92ac >> 2),
453 (0x0e00 << 16) | (0x92b4 >> 2),
455 (0x0e00 << 16) | (0x92b8 >> 2),
457 (0x0e00 << 16) | (0x92bc >> 2),
459 (0x0e00 << 16) | (0x92c0 >> 2),
461 (0x0e00 << 16) | (0x92c4 >> 2),
463 (0x0e00 << 16) | (0x92c8 >> 2),
465 (0x0e00 << 16) | (0x92cc >> 2),
467 (0x0e00 << 16) | (0x92d0 >> 2),
469 (0x0e00 << 16) | (0x8c00 >> 2),
471 (0x0e00 << 16) | (0x8c04 >> 2),
473 (0x0e00 << 16) | (0x8c20 >> 2),
475 (0x0e00 << 16) | (0x8c38 >> 2),
477 (0x0e00 << 16) | (0x8c3c >> 2),
479 (0x0e00 << 16) | (0xae00 >> 2),
481 (0x0e00 << 16) | (0x9604 >> 2),
483 (0x0e00 << 16) | (0xac08 >> 2),
485 (0x0e00 << 16) | (0xac0c >> 2),
487 (0x0e00 << 16) | (0xac10 >> 2),
489 (0x0e00 << 16) | (0xac14 >> 2),
491 (0x0e00 << 16) | (0xac58 >> 2),
493 (0x0e00 << 16) | (0xac68 >> 2),
495 (0x0e00 << 16) | (0xac6c >> 2),
497 (0x0e00 << 16) | (0xac70 >> 2),
499 (0x0e00 << 16) | (0xac74 >> 2),
501 (0x0e00 << 16) | (0xac78 >> 2),
503 (0x0e00 << 16) | (0xac7c >> 2),
505 (0x0e00 << 16) | (0xac80 >> 2),
507 (0x0e00 << 16) | (0xac84 >> 2),
509 (0x0e00 << 16) | (0xac88 >> 2),
511 (0x0e00 << 16) | (0xac8c >> 2),
513 (0x0e00 << 16) | (0x970c >> 2),
515 (0x0e00 << 16) | (0x9714 >> 2),
517 (0x0e00 << 16) | (0x9718 >> 2),
519 (0x0e00 << 16) | (0x971c >> 2),
521 (0x0e00 << 16) | (0x31068 >> 2),
523 (0x4e00 << 16) | (0x31068 >> 2),
525 (0x5e00 << 16) | (0x31068 >> 2),
527 (0x6e00 << 16) | (0x31068 >> 2),
529 (0x7e00 << 16) | (0x31068 >> 2),
531 (0x8e00 << 16) | (0x31068 >> 2),
533 (0x9e00 << 16) | (0x31068 >> 2),
535 (0xae00 << 16) | (0x31068 >> 2),
537 (0xbe00 << 16) | (0x31068 >> 2),
539 (0x0e00 << 16) | (0xcd10 >> 2),
541 (0x0e00 << 16) | (0xcd14 >> 2),
543 (0x0e00 << 16) | (0x88b0 >> 2),
545 (0x0e00 << 16) | (0x88b4 >> 2),
547 (0x0e00 << 16) | (0x88b8 >> 2),
549 (0x0e00 << 16) | (0x88bc >> 2),
551 (0x0400 << 16) | (0x89c0 >> 2),
553 (0x0e00 << 16) | (0x88c4 >> 2),
555 (0x0e00 << 16) | (0x88c8 >> 2),
557 (0x0e00 << 16) | (0x88d0 >> 2),
559 (0x0e00 << 16) | (0x88d4 >> 2),
561 (0x0e00 << 16) | (0x88d8 >> 2),
563 (0x0e00 << 16) | (0x8980 >> 2),
565 (0x0e00 << 16) | (0x30938 >> 2),
567 (0x0e00 << 16) | (0x3093c >> 2),
569 (0x0e00 << 16) | (0x30940 >> 2),
571 (0x0e00 << 16) | (0x89a0 >> 2),
573 (0x0e00 << 16) | (0x30900 >> 2),
575 (0x0e00 << 16) | (0x30904 >> 2),
577 (0x0e00 << 16) | (0x89b4 >> 2),
579 (0x0e00 << 16) | (0x3c210 >> 2),
581 (0x0e00 << 16) | (0x3c214 >> 2),
583 (0x0e00 << 16) | (0x3c218 >> 2),
585 (0x0e00 << 16) | (0x8904 >> 2),
588 (0x0e00 << 16) | (0x8c28 >> 2),
589 (0x0e00 << 16) | (0x8c2c >> 2),
590 (0x0e00 << 16) | (0x8c30 >> 2),
591 (0x0e00 << 16) | (0x8c34 >> 2),
592 (0x0e00 << 16) | (0x9600 >> 2),
595 static const u32 kalindi_rlc_save_restore_register_list[] =
597 (0x0e00 << 16) | (0xc12c >> 2),
599 (0x0e00 << 16) | (0xc140 >> 2),
601 (0x0e00 << 16) | (0xc150 >> 2),
603 (0x0e00 << 16) | (0xc15c >> 2),
605 (0x0e00 << 16) | (0xc168 >> 2),
607 (0x0e00 << 16) | (0xc170 >> 2),
609 (0x0e00 << 16) | (0xc204 >> 2),
611 (0x0e00 << 16) | (0xc2b4 >> 2),
613 (0x0e00 << 16) | (0xc2b8 >> 2),
615 (0x0e00 << 16) | (0xc2bc >> 2),
617 (0x0e00 << 16) | (0xc2c0 >> 2),
619 (0x0e00 << 16) | (0x8228 >> 2),
621 (0x0e00 << 16) | (0x829c >> 2),
623 (0x0e00 << 16) | (0x869c >> 2),
625 (0x0600 << 16) | (0x98f4 >> 2),
627 (0x0e00 << 16) | (0x98f8 >> 2),
629 (0x0e00 << 16) | (0x9900 >> 2),
631 (0x0e00 << 16) | (0xc260 >> 2),
633 (0x0e00 << 16) | (0x90e8 >> 2),
635 (0x0e00 << 16) | (0x3c000 >> 2),
637 (0x0e00 << 16) | (0x3c00c >> 2),
639 (0x0e00 << 16) | (0x8c1c >> 2),
641 (0x0e00 << 16) | (0x9700 >> 2),
643 (0x0e00 << 16) | (0xcd20 >> 2),
645 (0x4e00 << 16) | (0xcd20 >> 2),
647 (0x5e00 << 16) | (0xcd20 >> 2),
649 (0x6e00 << 16) | (0xcd20 >> 2),
651 (0x7e00 << 16) | (0xcd20 >> 2),
653 (0x0e00 << 16) | (0x89bc >> 2),
655 (0x0e00 << 16) | (0x8900 >> 2),
658 (0x0e00 << 16) | (0xc130 >> 2),
660 (0x0e00 << 16) | (0xc134 >> 2),
662 (0x0e00 << 16) | (0xc1fc >> 2),
664 (0x0e00 << 16) | (0xc208 >> 2),
666 (0x0e00 << 16) | (0xc264 >> 2),
668 (0x0e00 << 16) | (0xc268 >> 2),
670 (0x0e00 << 16) | (0xc26c >> 2),
672 (0x0e00 << 16) | (0xc270 >> 2),
674 (0x0e00 << 16) | (0xc274 >> 2),
676 (0x0e00 << 16) | (0xc28c >> 2),
678 (0x0e00 << 16) | (0xc290 >> 2),
680 (0x0e00 << 16) | (0xc294 >> 2),
682 (0x0e00 << 16) | (0xc298 >> 2),
684 (0x0e00 << 16) | (0xc2a0 >> 2),
686 (0x0e00 << 16) | (0xc2a4 >> 2),
688 (0x0e00 << 16) | (0xc2a8 >> 2),
690 (0x0e00 << 16) | (0xc2ac >> 2),
692 (0x0e00 << 16) | (0x301d0 >> 2),
694 (0x0e00 << 16) | (0x30238 >> 2),
696 (0x0e00 << 16) | (0x30250 >> 2),
698 (0x0e00 << 16) | (0x30254 >> 2),
700 (0x0e00 << 16) | (0x30258 >> 2),
702 (0x0e00 << 16) | (0x3025c >> 2),
704 (0x4e00 << 16) | (0xc900 >> 2),
706 (0x5e00 << 16) | (0xc900 >> 2),
708 (0x6e00 << 16) | (0xc900 >> 2),
710 (0x7e00 << 16) | (0xc900 >> 2),
712 (0x4e00 << 16) | (0xc904 >> 2),
714 (0x5e00 << 16) | (0xc904 >> 2),
716 (0x6e00 << 16) | (0xc904 >> 2),
718 (0x7e00 << 16) | (0xc904 >> 2),
720 (0x4e00 << 16) | (0xc908 >> 2),
722 (0x5e00 << 16) | (0xc908 >> 2),
724 (0x6e00 << 16) | (0xc908 >> 2),
726 (0x7e00 << 16) | (0xc908 >> 2),
728 (0x4e00 << 16) | (0xc90c >> 2),
730 (0x5e00 << 16) | (0xc90c >> 2),
732 (0x6e00 << 16) | (0xc90c >> 2),
734 (0x7e00 << 16) | (0xc90c >> 2),
736 (0x4e00 << 16) | (0xc910 >> 2),
738 (0x5e00 << 16) | (0xc910 >> 2),
740 (0x6e00 << 16) | (0xc910 >> 2),
742 (0x7e00 << 16) | (0xc910 >> 2),
744 (0x0e00 << 16) | (0xc99c >> 2),
746 (0x0e00 << 16) | (0x9834 >> 2),
748 (0x0000 << 16) | (0x30f00 >> 2),
750 (0x0000 << 16) | (0x30f04 >> 2),
752 (0x0000 << 16) | (0x30f08 >> 2),
754 (0x0000 << 16) | (0x30f0c >> 2),
756 (0x0600 << 16) | (0x9b7c >> 2),
758 (0x0e00 << 16) | (0x8a14 >> 2),
760 (0x0e00 << 16) | (0x8a18 >> 2),
762 (0x0600 << 16) | (0x30a00 >> 2),
764 (0x0e00 << 16) | (0x8bf0 >> 2),
766 (0x0e00 << 16) | (0x8bcc >> 2),
768 (0x0e00 << 16) | (0x8b24 >> 2),
770 (0x0e00 << 16) | (0x30a04 >> 2),
772 (0x0600 << 16) | (0x30a10 >> 2),
774 (0x0600 << 16) | (0x30a14 >> 2),
776 (0x0600 << 16) | (0x30a18 >> 2),
778 (0x0600 << 16) | (0x30a2c >> 2),
780 (0x0e00 << 16) | (0xc700 >> 2),
782 (0x0e00 << 16) | (0xc704 >> 2),
784 (0x0e00 << 16) | (0xc708 >> 2),
786 (0x0e00 << 16) | (0xc768 >> 2),
788 (0x0400 << 16) | (0xc770 >> 2),
790 (0x0400 << 16) | (0xc774 >> 2),
792 (0x0400 << 16) | (0xc798 >> 2),
794 (0x0400 << 16) | (0xc79c >> 2),
796 (0x0e00 << 16) | (0x9100 >> 2),
798 (0x0e00 << 16) | (0x3c010 >> 2),
800 (0x0e00 << 16) | (0x8c00 >> 2),
802 (0x0e00 << 16) | (0x8c04 >> 2),
804 (0x0e00 << 16) | (0x8c20 >> 2),
806 (0x0e00 << 16) | (0x8c38 >> 2),
808 (0x0e00 << 16) | (0x8c3c >> 2),
810 (0x0e00 << 16) | (0xae00 >> 2),
812 (0x0e00 << 16) | (0x9604 >> 2),
814 (0x0e00 << 16) | (0xac08 >> 2),
816 (0x0e00 << 16) | (0xac0c >> 2),
818 (0x0e00 << 16) | (0xac10 >> 2),
820 (0x0e00 << 16) | (0xac14 >> 2),
822 (0x0e00 << 16) | (0xac58 >> 2),
824 (0x0e00 << 16) | (0xac68 >> 2),
826 (0x0e00 << 16) | (0xac6c >> 2),
828 (0x0e00 << 16) | (0xac70 >> 2),
830 (0x0e00 << 16) | (0xac74 >> 2),
832 (0x0e00 << 16) | (0xac78 >> 2),
834 (0x0e00 << 16) | (0xac7c >> 2),
836 (0x0e00 << 16) | (0xac80 >> 2),
838 (0x0e00 << 16) | (0xac84 >> 2),
840 (0x0e00 << 16) | (0xac88 >> 2),
842 (0x0e00 << 16) | (0xac8c >> 2),
844 (0x0e00 << 16) | (0x970c >> 2),
846 (0x0e00 << 16) | (0x9714 >> 2),
848 (0x0e00 << 16) | (0x9718 >> 2),
850 (0x0e00 << 16) | (0x971c >> 2),
852 (0x0e00 << 16) | (0x31068 >> 2),
854 (0x4e00 << 16) | (0x31068 >> 2),
856 (0x5e00 << 16) | (0x31068 >> 2),
858 (0x6e00 << 16) | (0x31068 >> 2),
860 (0x7e00 << 16) | (0x31068 >> 2),
862 (0x0e00 << 16) | (0xcd10 >> 2),
864 (0x0e00 << 16) | (0xcd14 >> 2),
866 (0x0e00 << 16) | (0x88b0 >> 2),
868 (0x0e00 << 16) | (0x88b4 >> 2),
870 (0x0e00 << 16) | (0x88b8 >> 2),
872 (0x0e00 << 16) | (0x88bc >> 2),
874 (0x0400 << 16) | (0x89c0 >> 2),
876 (0x0e00 << 16) | (0x88c4 >> 2),
878 (0x0e00 << 16) | (0x88c8 >> 2),
880 (0x0e00 << 16) | (0x88d0 >> 2),
882 (0x0e00 << 16) | (0x88d4 >> 2),
884 (0x0e00 << 16) | (0x88d8 >> 2),
886 (0x0e00 << 16) | (0x8980 >> 2),
888 (0x0e00 << 16) | (0x30938 >> 2),
890 (0x0e00 << 16) | (0x3093c >> 2),
892 (0x0e00 << 16) | (0x30940 >> 2),
894 (0x0e00 << 16) | (0x89a0 >> 2),
896 (0x0e00 << 16) | (0x30900 >> 2),
898 (0x0e00 << 16) | (0x30904 >> 2),
900 (0x0e00 << 16) | (0x89b4 >> 2),
902 (0x0e00 << 16) | (0x3e1fc >> 2),
904 (0x0e00 << 16) | (0x3c210 >> 2),
906 (0x0e00 << 16) | (0x3c214 >> 2),
908 (0x0e00 << 16) | (0x3c218 >> 2),
910 (0x0e00 << 16) | (0x8904 >> 2),
913 (0x0e00 << 16) | (0x8c28 >> 2),
914 (0x0e00 << 16) | (0x8c2c >> 2),
915 (0x0e00 << 16) | (0x8c30 >> 2),
916 (0x0e00 << 16) | (0x8c34 >> 2),
917 (0x0e00 << 16) | (0x9600 >> 2),
920 static const u32 bonaire_golden_spm_registers[] =
922 0x30800, 0xe0ffffff, 0xe0000000
925 static const u32 bonaire_golden_common_registers[] =
927 0xc770, 0xffffffff, 0x00000800,
928 0xc774, 0xffffffff, 0x00000800,
929 0xc798, 0xffffffff, 0x00007fbf,
930 0xc79c, 0xffffffff, 0x00007faf
933 static const u32 bonaire_golden_registers[] =
935 0x3354, 0x00000333, 0x00000333,
936 0x3350, 0x000c0fc0, 0x00040200,
937 0x9a10, 0x00010000, 0x00058208,
938 0x3c000, 0xffff1fff, 0x00140000,
939 0x3c200, 0xfdfc0fff, 0x00000100,
940 0x3c234, 0x40000000, 0x40000200,
941 0x9830, 0xffffffff, 0x00000000,
942 0x9834, 0xf00fffff, 0x00000400,
943 0x9838, 0x0002021c, 0x00020200,
944 0xc78, 0x00000080, 0x00000000,
945 0x5bb0, 0x000000f0, 0x00000070,
946 0x5bc0, 0xf0311fff, 0x80300000,
947 0x98f8, 0x73773777, 0x12010001,
948 0x350c, 0x00810000, 0x408af000,
949 0x7030, 0x31000111, 0x00000011,
950 0x2f48, 0x73773777, 0x12010001,
951 0x220c, 0x00007fb6, 0x0021a1b1,
952 0x2210, 0x00007fb6, 0x002021b1,
953 0x2180, 0x00007fb6, 0x00002191,
954 0x2218, 0x00007fb6, 0x002121b1,
955 0x221c, 0x00007fb6, 0x002021b1,
956 0x21dc, 0x00007fb6, 0x00002191,
957 0x21e0, 0x00007fb6, 0x00002191,
958 0x3628, 0x0000003f, 0x0000000a,
959 0x362c, 0x0000003f, 0x0000000a,
960 0x2ae4, 0x00073ffe, 0x000022a2,
961 0x240c, 0x000007ff, 0x00000000,
962 0x8a14, 0xf000003f, 0x00000007,
963 0x8bf0, 0x00002001, 0x00000001,
964 0x8b24, 0xffffffff, 0x00ffffff,
965 0x30a04, 0x0000ff0f, 0x00000000,
966 0x28a4c, 0x07ffffff, 0x06000000,
967 0x4d8, 0x00000fff, 0x00000100,
968 0x3e78, 0x00000001, 0x00000002,
969 0x9100, 0x03000000, 0x0362c688,
970 0x8c00, 0x000000ff, 0x00000001,
971 0xe40, 0x00001fff, 0x00001fff,
972 0x9060, 0x0000007f, 0x00000020,
973 0x9508, 0x00010000, 0x00010000,
974 0xac14, 0x000003ff, 0x000000f3,
975 0xac0c, 0xffffffff, 0x00001032
978 static const u32 bonaire_mgcg_cgcg_init[] =
980 0xc420, 0xffffffff, 0xfffffffc,
981 0x30800, 0xffffffff, 0xe0000000,
982 0x3c2a0, 0xffffffff, 0x00000100,
983 0x3c208, 0xffffffff, 0x00000100,
984 0x3c2c0, 0xffffffff, 0xc0000100,
985 0x3c2c8, 0xffffffff, 0xc0000100,
986 0x3c2c4, 0xffffffff, 0xc0000100,
987 0x55e4, 0xffffffff, 0x00600100,
988 0x3c280, 0xffffffff, 0x00000100,
989 0x3c214, 0xffffffff, 0x06000100,
990 0x3c220, 0xffffffff, 0x00000100,
991 0x3c218, 0xffffffff, 0x06000100,
992 0x3c204, 0xffffffff, 0x00000100,
993 0x3c2e0, 0xffffffff, 0x00000100,
994 0x3c224, 0xffffffff, 0x00000100,
995 0x3c200, 0xffffffff, 0x00000100,
996 0x3c230, 0xffffffff, 0x00000100,
997 0x3c234, 0xffffffff, 0x00000100,
998 0x3c250, 0xffffffff, 0x00000100,
999 0x3c254, 0xffffffff, 0x00000100,
1000 0x3c258, 0xffffffff, 0x00000100,
1001 0x3c25c, 0xffffffff, 0x00000100,
1002 0x3c260, 0xffffffff, 0x00000100,
1003 0x3c27c, 0xffffffff, 0x00000100,
1004 0x3c278, 0xffffffff, 0x00000100,
1005 0x3c210, 0xffffffff, 0x06000100,
1006 0x3c290, 0xffffffff, 0x00000100,
1007 0x3c274, 0xffffffff, 0x00000100,
1008 0x3c2b4, 0xffffffff, 0x00000100,
1009 0x3c2b0, 0xffffffff, 0x00000100,
1010 0x3c270, 0xffffffff, 0x00000100,
1011 0x30800, 0xffffffff, 0xe0000000,
1012 0x3c020, 0xffffffff, 0x00010000,
1013 0x3c024, 0xffffffff, 0x00030002,
1014 0x3c028, 0xffffffff, 0x00040007,
1015 0x3c02c, 0xffffffff, 0x00060005,
1016 0x3c030, 0xffffffff, 0x00090008,
1017 0x3c034, 0xffffffff, 0x00010000,
1018 0x3c038, 0xffffffff, 0x00030002,
1019 0x3c03c, 0xffffffff, 0x00040007,
1020 0x3c040, 0xffffffff, 0x00060005,
1021 0x3c044, 0xffffffff, 0x00090008,
1022 0x3c048, 0xffffffff, 0x00010000,
1023 0x3c04c, 0xffffffff, 0x00030002,
1024 0x3c050, 0xffffffff, 0x00040007,
1025 0x3c054, 0xffffffff, 0x00060005,
1026 0x3c058, 0xffffffff, 0x00090008,
1027 0x3c05c, 0xffffffff, 0x00010000,
1028 0x3c060, 0xffffffff, 0x00030002,
1029 0x3c064, 0xffffffff, 0x00040007,
1030 0x3c068, 0xffffffff, 0x00060005,
1031 0x3c06c, 0xffffffff, 0x00090008,
1032 0x3c070, 0xffffffff, 0x00010000,
1033 0x3c074, 0xffffffff, 0x00030002,
1034 0x3c078, 0xffffffff, 0x00040007,
1035 0x3c07c, 0xffffffff, 0x00060005,
1036 0x3c080, 0xffffffff, 0x00090008,
1037 0x3c084, 0xffffffff, 0x00010000,
1038 0x3c088, 0xffffffff, 0x00030002,
1039 0x3c08c, 0xffffffff, 0x00040007,
1040 0x3c090, 0xffffffff, 0x00060005,
1041 0x3c094, 0xffffffff, 0x00090008,
1042 0x3c098, 0xffffffff, 0x00010000,
1043 0x3c09c, 0xffffffff, 0x00030002,
1044 0x3c0a0, 0xffffffff, 0x00040007,
1045 0x3c0a4, 0xffffffff, 0x00060005,
1046 0x3c0a8, 0xffffffff, 0x00090008,
1047 0x3c000, 0xffffffff, 0x96e00200,
1048 0x8708, 0xffffffff, 0x00900100,
1049 0xc424, 0xffffffff, 0x0020003f,
1050 0x38, 0xffffffff, 0x0140001c,
1051 0x3c, 0x000f0000, 0x000f0000,
1052 0x220, 0xffffffff, 0xC060000C,
1053 0x224, 0xc0000fff, 0x00000100,
1054 0xf90, 0xffffffff, 0x00000100,
1055 0xf98, 0x00000101, 0x00000000,
1056 0x20a8, 0xffffffff, 0x00000104,
1057 0x55e4, 0xff000fff, 0x00000100,
1058 0x30cc, 0xc0000fff, 0x00000104,
1059 0xc1e4, 0x00000001, 0x00000001,
1060 0xd00c, 0xff000ff0, 0x00000100,
1061 0xd80c, 0xff000ff0, 0x00000100
1064 static const u32 spectre_golden_spm_registers[] =
1066 0x30800, 0xe0ffffff, 0xe0000000
1069 static const u32 spectre_golden_common_registers[] =
1071 0xc770, 0xffffffff, 0x00000800,
1072 0xc774, 0xffffffff, 0x00000800,
1073 0xc798, 0xffffffff, 0x00007fbf,
1074 0xc79c, 0xffffffff, 0x00007faf
1077 static const u32 spectre_golden_registers[] =
1079 0x3c000, 0xffff1fff, 0x96940200,
1080 0x3c00c, 0xffff0001, 0xff000000,
1081 0x3c200, 0xfffc0fff, 0x00000100,
1082 0x6ed8, 0x00010101, 0x00010000,
1083 0x9834, 0xf00fffff, 0x00000400,
1084 0x9838, 0xfffffffc, 0x00020200,
1085 0x5bb0, 0x000000f0, 0x00000070,
1086 0x5bc0, 0xf0311fff, 0x80300000,
1087 0x98f8, 0x73773777, 0x12010001,
1088 0x9b7c, 0x00ff0000, 0x00fc0000,
1089 0x2f48, 0x73773777, 0x12010001,
1090 0x8a14, 0xf000003f, 0x00000007,
1091 0x8b24, 0xffffffff, 0x00ffffff,
1092 0x28350, 0x3f3f3fff, 0x00000082,
1093 0x28355, 0x0000003f, 0x00000000,
1094 0x3e78, 0x00000001, 0x00000002,
1095 0x913c, 0xffff03df, 0x00000004,
1096 0xc768, 0x00000008, 0x00000008,
1097 0x8c00, 0x000008ff, 0x00000800,
1098 0x9508, 0x00010000, 0x00010000,
1099 0xac0c, 0xffffffff, 0x54763210,
1100 0x214f8, 0x01ff01ff, 0x00000002,
1101 0x21498, 0x007ff800, 0x00200000,
1102 0x2015c, 0xffffffff, 0x00000f40,
1103 0x30934, 0xffffffff, 0x00000001
1106 static const u32 spectre_mgcg_cgcg_init[] =
1108 0xc420, 0xffffffff, 0xfffffffc,
1109 0x30800, 0xffffffff, 0xe0000000,
1110 0x3c2a0, 0xffffffff, 0x00000100,
1111 0x3c208, 0xffffffff, 0x00000100,
1112 0x3c2c0, 0xffffffff, 0x00000100,
1113 0x3c2c8, 0xffffffff, 0x00000100,
1114 0x3c2c4, 0xffffffff, 0x00000100,
1115 0x55e4, 0xffffffff, 0x00600100,
1116 0x3c280, 0xffffffff, 0x00000100,
1117 0x3c214, 0xffffffff, 0x06000100,
1118 0x3c220, 0xffffffff, 0x00000100,
1119 0x3c218, 0xffffffff, 0x06000100,
1120 0x3c204, 0xffffffff, 0x00000100,
1121 0x3c2e0, 0xffffffff, 0x00000100,
1122 0x3c224, 0xffffffff, 0x00000100,
1123 0x3c200, 0xffffffff, 0x00000100,
1124 0x3c230, 0xffffffff, 0x00000100,
1125 0x3c234, 0xffffffff, 0x00000100,
1126 0x3c250, 0xffffffff, 0x00000100,
1127 0x3c254, 0xffffffff, 0x00000100,
1128 0x3c258, 0xffffffff, 0x00000100,
1129 0x3c25c, 0xffffffff, 0x00000100,
1130 0x3c260, 0xffffffff, 0x00000100,
1131 0x3c27c, 0xffffffff, 0x00000100,
1132 0x3c278, 0xffffffff, 0x00000100,
1133 0x3c210, 0xffffffff, 0x06000100,
1134 0x3c290, 0xffffffff, 0x00000100,
1135 0x3c274, 0xffffffff, 0x00000100,
1136 0x3c2b4, 0xffffffff, 0x00000100,
1137 0x3c2b0, 0xffffffff, 0x00000100,
1138 0x3c270, 0xffffffff, 0x00000100,
1139 0x30800, 0xffffffff, 0xe0000000,
1140 0x3c020, 0xffffffff, 0x00010000,
1141 0x3c024, 0xffffffff, 0x00030002,
1142 0x3c028, 0xffffffff, 0x00040007,
1143 0x3c02c, 0xffffffff, 0x00060005,
1144 0x3c030, 0xffffffff, 0x00090008,
1145 0x3c034, 0xffffffff, 0x00010000,
1146 0x3c038, 0xffffffff, 0x00030002,
1147 0x3c03c, 0xffffffff, 0x00040007,
1148 0x3c040, 0xffffffff, 0x00060005,
1149 0x3c044, 0xffffffff, 0x00090008,
1150 0x3c048, 0xffffffff, 0x00010000,
1151 0x3c04c, 0xffffffff, 0x00030002,
1152 0x3c050, 0xffffffff, 0x00040007,
1153 0x3c054, 0xffffffff, 0x00060005,
1154 0x3c058, 0xffffffff, 0x00090008,
1155 0x3c05c, 0xffffffff, 0x00010000,
1156 0x3c060, 0xffffffff, 0x00030002,
1157 0x3c064, 0xffffffff, 0x00040007,
1158 0x3c068, 0xffffffff, 0x00060005,
1159 0x3c06c, 0xffffffff, 0x00090008,
1160 0x3c070, 0xffffffff, 0x00010000,
1161 0x3c074, 0xffffffff, 0x00030002,
1162 0x3c078, 0xffffffff, 0x00040007,
1163 0x3c07c, 0xffffffff, 0x00060005,
1164 0x3c080, 0xffffffff, 0x00090008,
1165 0x3c084, 0xffffffff, 0x00010000,
1166 0x3c088, 0xffffffff, 0x00030002,
1167 0x3c08c, 0xffffffff, 0x00040007,
1168 0x3c090, 0xffffffff, 0x00060005,
1169 0x3c094, 0xffffffff, 0x00090008,
1170 0x3c098, 0xffffffff, 0x00010000,
1171 0x3c09c, 0xffffffff, 0x00030002,
1172 0x3c0a0, 0xffffffff, 0x00040007,
1173 0x3c0a4, 0xffffffff, 0x00060005,
1174 0x3c0a8, 0xffffffff, 0x00090008,
1175 0x3c0ac, 0xffffffff, 0x00010000,
1176 0x3c0b0, 0xffffffff, 0x00030002,
1177 0x3c0b4, 0xffffffff, 0x00040007,
1178 0x3c0b8, 0xffffffff, 0x00060005,
1179 0x3c0bc, 0xffffffff, 0x00090008,
1180 0x3c000, 0xffffffff, 0x96e00200,
1181 0x8708, 0xffffffff, 0x00900100,
1182 0xc424, 0xffffffff, 0x0020003f,
1183 0x38, 0xffffffff, 0x0140001c,
1184 0x3c, 0x000f0000, 0x000f0000,
1185 0x220, 0xffffffff, 0xC060000C,
1186 0x224, 0xc0000fff, 0x00000100,
1187 0xf90, 0xffffffff, 0x00000100,
1188 0xf98, 0x00000101, 0x00000000,
1189 0x20a8, 0xffffffff, 0x00000104,
1190 0x55e4, 0xff000fff, 0x00000100,
1191 0x30cc, 0xc0000fff, 0x00000104,
1192 0xc1e4, 0x00000001, 0x00000001,
1193 0xd00c, 0xff000ff0, 0x00000100,
1194 0xd80c, 0xff000ff0, 0x00000100
1197 static const u32 kalindi_golden_spm_registers[] =
1199 0x30800, 0xe0ffffff, 0xe0000000
1202 static const u32 kalindi_golden_common_registers[] =
1204 0xc770, 0xffffffff, 0x00000800,
1205 0xc774, 0xffffffff, 0x00000800,
1206 0xc798, 0xffffffff, 0x00007fbf,
1207 0xc79c, 0xffffffff, 0x00007faf
1210 static const u32 kalindi_golden_registers[] =
1212 0x3c000, 0xffffdfff, 0x6e944040,
1213 0x55e4, 0xff607fff, 0xfc000100,
1214 0x3c220, 0xff000fff, 0x00000100,
1215 0x3c224, 0xff000fff, 0x00000100,
1216 0x3c200, 0xfffc0fff, 0x00000100,
1217 0x6ed8, 0x00010101, 0x00010000,
1218 0x9830, 0xffffffff, 0x00000000,
1219 0x9834, 0xf00fffff, 0x00000400,
1220 0x5bb0, 0x000000f0, 0x00000070,
1221 0x5bc0, 0xf0311fff, 0x80300000,
1222 0x98f8, 0x73773777, 0x12010001,
1223 0x98fc, 0xffffffff, 0x00000010,
1224 0x9b7c, 0x00ff0000, 0x00fc0000,
1225 0x8030, 0x00001f0f, 0x0000100a,
1226 0x2f48, 0x73773777, 0x12010001,
1227 0x2408, 0x000fffff, 0x000c007f,
1228 0x8a14, 0xf000003f, 0x00000007,
1229 0x8b24, 0x3fff3fff, 0x00ffcfff,
1230 0x30a04, 0x0000ff0f, 0x00000000,
1231 0x28a4c, 0x07ffffff, 0x06000000,
1232 0x4d8, 0x00000fff, 0x00000100,
1233 0x3e78, 0x00000001, 0x00000002,
1234 0xc768, 0x00000008, 0x00000008,
1235 0x8c00, 0x000000ff, 0x00000003,
1236 0x214f8, 0x01ff01ff, 0x00000002,
1237 0x21498, 0x007ff800, 0x00200000,
1238 0x2015c, 0xffffffff, 0x00000f40,
1239 0x88c4, 0x001f3ae3, 0x00000082,
1240 0x88d4, 0x0000001f, 0x00000010,
1241 0x30934, 0xffffffff, 0x00000000
1244 static const u32 kalindi_mgcg_cgcg_init[] =
1246 0xc420, 0xffffffff, 0xfffffffc,
1247 0x30800, 0xffffffff, 0xe0000000,
1248 0x3c2a0, 0xffffffff, 0x00000100,
1249 0x3c208, 0xffffffff, 0x00000100,
1250 0x3c2c0, 0xffffffff, 0x00000100,
1251 0x3c2c8, 0xffffffff, 0x00000100,
1252 0x3c2c4, 0xffffffff, 0x00000100,
1253 0x55e4, 0xffffffff, 0x00600100,
1254 0x3c280, 0xffffffff, 0x00000100,
1255 0x3c214, 0xffffffff, 0x06000100,
1256 0x3c220, 0xffffffff, 0x00000100,
1257 0x3c218, 0xffffffff, 0x06000100,
1258 0x3c204, 0xffffffff, 0x00000100,
1259 0x3c2e0, 0xffffffff, 0x00000100,
1260 0x3c224, 0xffffffff, 0x00000100,
1261 0x3c200, 0xffffffff, 0x00000100,
1262 0x3c230, 0xffffffff, 0x00000100,
1263 0x3c234, 0xffffffff, 0x00000100,
1264 0x3c250, 0xffffffff, 0x00000100,
1265 0x3c254, 0xffffffff, 0x00000100,
1266 0x3c258, 0xffffffff, 0x00000100,
1267 0x3c25c, 0xffffffff, 0x00000100,
1268 0x3c260, 0xffffffff, 0x00000100,
1269 0x3c27c, 0xffffffff, 0x00000100,
1270 0x3c278, 0xffffffff, 0x00000100,
1271 0x3c210, 0xffffffff, 0x06000100,
1272 0x3c290, 0xffffffff, 0x00000100,
1273 0x3c274, 0xffffffff, 0x00000100,
1274 0x3c2b4, 0xffffffff, 0x00000100,
1275 0x3c2b0, 0xffffffff, 0x00000100,
1276 0x3c270, 0xffffffff, 0x00000100,
1277 0x30800, 0xffffffff, 0xe0000000,
1278 0x3c020, 0xffffffff, 0x00010000,
1279 0x3c024, 0xffffffff, 0x00030002,
1280 0x3c028, 0xffffffff, 0x00040007,
1281 0x3c02c, 0xffffffff, 0x00060005,
1282 0x3c030, 0xffffffff, 0x00090008,
1283 0x3c034, 0xffffffff, 0x00010000,
1284 0x3c038, 0xffffffff, 0x00030002,
1285 0x3c03c, 0xffffffff, 0x00040007,
1286 0x3c040, 0xffffffff, 0x00060005,
1287 0x3c044, 0xffffffff, 0x00090008,
1288 0x3c000, 0xffffffff, 0x96e00200,
1289 0x8708, 0xffffffff, 0x00900100,
1290 0xc424, 0xffffffff, 0x0020003f,
1291 0x38, 0xffffffff, 0x0140001c,
1292 0x3c, 0x000f0000, 0x000f0000,
1293 0x220, 0xffffffff, 0xC060000C,
1294 0x224, 0xc0000fff, 0x00000100,
1295 0x20a8, 0xffffffff, 0x00000104,
1296 0x55e4, 0xff000fff, 0x00000100,
1297 0x30cc, 0xc0000fff, 0x00000104,
1298 0xc1e4, 0x00000001, 0x00000001,
1299 0xd00c, 0xff000ff0, 0x00000100,
1300 0xd80c, 0xff000ff0, 0x00000100
1303 static void cik_init_golden_registers(struct radeon_device *rdev)
1305 switch (rdev->family) {
1307 radeon_program_register_sequence(rdev,
1308 bonaire_mgcg_cgcg_init,
1309 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1310 radeon_program_register_sequence(rdev,
1311 bonaire_golden_registers,
1312 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1313 radeon_program_register_sequence(rdev,
1314 bonaire_golden_common_registers,
1315 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1316 radeon_program_register_sequence(rdev,
1317 bonaire_golden_spm_registers,
1318 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1321 radeon_program_register_sequence(rdev,
1322 kalindi_mgcg_cgcg_init,
1323 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1324 radeon_program_register_sequence(rdev,
1325 kalindi_golden_registers,
1326 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1327 radeon_program_register_sequence(rdev,
1328 kalindi_golden_common_registers,
1329 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1330 radeon_program_register_sequence(rdev,
1331 kalindi_golden_spm_registers,
1332 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1335 radeon_program_register_sequence(rdev,
1336 spectre_mgcg_cgcg_init,
1337 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1338 radeon_program_register_sequence(rdev,
1339 spectre_golden_registers,
1340 (const u32)ARRAY_SIZE(spectre_golden_registers));
1341 radeon_program_register_sequence(rdev,
1342 spectre_golden_common_registers,
1343 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1344 radeon_program_register_sequence(rdev,
1345 spectre_golden_spm_registers,
1346 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1354 * cik_get_xclk - get the xclk
1356 * @rdev: radeon_device pointer
1358 * Returns the reference clock used by the gfx engine
1361 u32 cik_get_xclk(struct radeon_device *rdev)
1363 u32 reference_clock = rdev->clock.spll.reference_freq;
1365 if (rdev->flags & RADEON_IS_IGP) {
1366 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1367 return reference_clock / 2;
1369 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1370 return reference_clock / 4;
1372 return reference_clock;
1376 * cik_mm_rdoorbell - read a doorbell dword
1378 * @rdev: radeon_device pointer
1379 * @offset: byte offset into the aperture
1381 * Returns the value in the doorbell aperture at the
1382 * requested offset (CIK).
1384 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1386 if (offset < rdev->doorbell.size) {
1387 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1389 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1395 * cik_mm_wdoorbell - write a doorbell dword
1397 * @rdev: radeon_device pointer
1398 * @offset: byte offset into the aperture
1399 * @v: value to write
1401 * Writes @v to the doorbell aperture at the
1402 * requested offset (CIK).
1404 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1406 if (offset < rdev->doorbell.size) {
1407 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1409 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1413 #define BONAIRE_IO_MC_REGS_SIZE 36
1415 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1417 {0x00000070, 0x04400000},
1418 {0x00000071, 0x80c01803},
1419 {0x00000072, 0x00004004},
1420 {0x00000073, 0x00000100},
1421 {0x00000074, 0x00ff0000},
1422 {0x00000075, 0x34000000},
1423 {0x00000076, 0x08000014},
1424 {0x00000077, 0x00cc08ec},
1425 {0x00000078, 0x00000400},
1426 {0x00000079, 0x00000000},
1427 {0x0000007a, 0x04090000},
1428 {0x0000007c, 0x00000000},
1429 {0x0000007e, 0x4408a8e8},
1430 {0x0000007f, 0x00000304},
1431 {0x00000080, 0x00000000},
1432 {0x00000082, 0x00000001},
1433 {0x00000083, 0x00000002},
1434 {0x00000084, 0xf3e4f400},
1435 {0x00000085, 0x052024e3},
1436 {0x00000087, 0x00000000},
1437 {0x00000088, 0x01000000},
1438 {0x0000008a, 0x1c0a0000},
1439 {0x0000008b, 0xff010000},
1440 {0x0000008d, 0xffffefff},
1441 {0x0000008e, 0xfff3efff},
1442 {0x0000008f, 0xfff3efbf},
1443 {0x00000092, 0xf7ffffff},
1444 {0x00000093, 0xffffff7f},
1445 {0x00000095, 0x00101101},
1446 {0x00000096, 0x00000fff},
1447 {0x00000097, 0x00116fff},
1448 {0x00000098, 0x60010000},
1449 {0x00000099, 0x10010000},
1450 {0x0000009a, 0x00006000},
1451 {0x0000009b, 0x00001000},
1452 {0x0000009f, 0x00b48000}
1456 * cik_srbm_select - select specific register instances
1458 * @rdev: radeon_device pointer
1459 * @me: selected ME (micro engine)
1464 * Switches the currently active registers instances. Some
1465 * registers are instanced per VMID, others are instanced per
1466 * me/pipe/queue combination.
1468 static void cik_srbm_select(struct radeon_device *rdev,
1469 u32 me, u32 pipe, u32 queue, u32 vmid)
1471 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1474 QUEUEID(queue & 0x7));
1475 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1480 * ci_mc_load_microcode - load MC ucode into the hw
1482 * @rdev: radeon_device pointer
1484 * Load the GDDR MC ucode into the hw (CIK).
1485 * Returns 0 on success, error on failure.
1487 static int ci_mc_load_microcode(struct radeon_device *rdev)
1489 const __be32 *fw_data;
1490 u32 running, blackout = 0;
1492 int i, ucode_size, regs_size;
1497 switch (rdev->family) {
1500 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1501 ucode_size = CIK_MC_UCODE_SIZE;
1502 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1506 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1510 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1511 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1514 /* reset the engine and set to writable */
1515 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1516 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1518 /* load mc io regs */
1519 for (i = 0; i < regs_size; i++) {
1520 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1521 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1523 /* load the MC ucode */
1524 fw_data = (const __be32 *)rdev->mc_fw->data;
1525 for (i = 0; i < ucode_size; i++)
1526 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1528 /* put the engine back into the active state */
1529 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1530 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1531 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1533 /* wait for training to complete */
1534 for (i = 0; i < rdev->usec_timeout; i++) {
1535 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1539 for (i = 0; i < rdev->usec_timeout; i++) {
1540 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1546 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1553 * cik_init_microcode - load ucode images from disk
1555 * @rdev: radeon_device pointer
1557 * Use the firmware interface to load the ucode images into
1558 * the driver (not loaded into hw).
1559 * Returns 0 on success, error on failure.
1561 static int cik_init_microcode(struct radeon_device *rdev)
1563 const char *chip_name;
1564 size_t pfp_req_size, me_req_size, ce_req_size,
1565 mec_req_size, rlc_req_size, mc_req_size,
1566 sdma_req_size, smc_req_size;
1572 switch (rdev->family) {
1574 chip_name = "BONAIRE";
1575 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1576 me_req_size = CIK_ME_UCODE_SIZE * 4;
1577 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1578 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1579 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1580 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1581 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1582 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1585 chip_name = "KAVERI";
1586 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587 me_req_size = CIK_ME_UCODE_SIZE * 4;
1588 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1591 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1594 chip_name = "KABINI";
1595 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1596 me_req_size = CIK_ME_UCODE_SIZE * 4;
1597 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1598 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1599 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1600 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1605 DRM_INFO("Loading %s Microcode\n", chip_name);
1607 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1608 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1611 if (rdev->pfp_fw->size != pfp_req_size) {
1613 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1614 rdev->pfp_fw->size, fw_name);
1619 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1620 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1623 if (rdev->me_fw->size != me_req_size) {
1625 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1626 rdev->me_fw->size, fw_name);
1630 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1631 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1634 if (rdev->ce_fw->size != ce_req_size) {
1636 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1637 rdev->ce_fw->size, fw_name);
1641 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1642 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1645 if (rdev->mec_fw->size != mec_req_size) {
1647 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1648 rdev->mec_fw->size, fw_name);
1652 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1653 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656 if (rdev->rlc_fw->size != rlc_req_size) {
1658 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1659 rdev->rlc_fw->size, fw_name);
1663 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1664 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1667 if (rdev->sdma_fw->size != sdma_req_size) {
1669 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1670 rdev->sdma_fw->size, fw_name);
1674 /* No SMC, MC ucode on APUs */
1675 if (!(rdev->flags & RADEON_IS_IGP)) {
1676 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1677 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1680 if (rdev->mc_fw->size != mc_req_size) {
1682 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1683 rdev->mc_fw->size, fw_name);
1687 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1688 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1691 "smc: error loading firmware \"%s\"\n",
1693 release_firmware(rdev->smc_fw);
1694 rdev->smc_fw = NULL;
1695 } else if (rdev->smc_fw->size != smc_req_size) {
1697 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1698 rdev->smc_fw->size, fw_name);
1707 "cik_cp: Failed to load firmware \"%s\"\n",
1709 release_firmware(rdev->pfp_fw);
1710 rdev->pfp_fw = NULL;
1711 release_firmware(rdev->me_fw);
1713 release_firmware(rdev->ce_fw);
1715 release_firmware(rdev->rlc_fw);
1716 rdev->rlc_fw = NULL;
1717 release_firmware(rdev->mc_fw);
1719 release_firmware(rdev->smc_fw);
1720 rdev->smc_fw = NULL;
1729 * cik_tiling_mode_table_init - init the hw tiling table
1731 * @rdev: radeon_device pointer
1733 * Starting with SI, the tiling setup is done globally in a
1734 * set of 32 tiling modes. Rather than selecting each set of
1735 * parameters per surface as on older asics, we just select
1736 * which index in the tiling table we want to use, and the
1737 * surface uses those parameters (CIK).
1739 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1741 const u32 num_tile_mode_states = 32;
1742 const u32 num_secondary_tile_mode_states = 16;
1743 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1744 u32 num_pipe_configs;
1745 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1746 rdev->config.cik.max_shader_engines;
1748 switch (rdev->config.cik.mem_row_size_in_kb) {
1750 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1757 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1762 if (num_pipe_configs > 8)
1763 num_pipe_configs = 8; /* ??? */
1765 if (num_pipe_configs == 8) {
1766 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767 switch (reg_offset) {
1769 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1771 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1772 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1775 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1776 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1777 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1778 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1781 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1782 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1783 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1784 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1787 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1789 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1793 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1794 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1795 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1796 TILE_SPLIT(split_equal_to_row_size));
1799 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1800 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1803 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1804 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1805 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1806 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1809 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1810 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1811 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1812 TILE_SPLIT(split_equal_to_row_size));
1815 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1816 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1819 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1820 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1823 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1824 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1825 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1829 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1830 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1831 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1835 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1836 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1837 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1841 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1842 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1845 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1851 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1852 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1853 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1857 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1858 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1859 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1863 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1864 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1867 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1868 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1869 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1873 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1875 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1876 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1879 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1880 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1881 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1889 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1891 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1892 switch (reg_offset) {
1894 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1896 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1897 NUM_BANKS(ADDR_SURF_16_BANK));
1900 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1903 NUM_BANKS(ADDR_SURF_16_BANK));
1906 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1909 NUM_BANKS(ADDR_SURF_16_BANK));
1912 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1913 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1914 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1915 NUM_BANKS(ADDR_SURF_16_BANK));
1918 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1921 NUM_BANKS(ADDR_SURF_8_BANK));
1924 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1926 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1927 NUM_BANKS(ADDR_SURF_4_BANK));
1930 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1933 NUM_BANKS(ADDR_SURF_2_BANK));
1936 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1939 NUM_BANKS(ADDR_SURF_16_BANK));
1942 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1943 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1944 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1945 NUM_BANKS(ADDR_SURF_16_BANK));
1948 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1949 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1950 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1951 NUM_BANKS(ADDR_SURF_16_BANK));
1954 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1957 NUM_BANKS(ADDR_SURF_16_BANK));
1960 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1963 NUM_BANKS(ADDR_SURF_8_BANK));
1966 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1967 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1968 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1969 NUM_BANKS(ADDR_SURF_4_BANK));
1972 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1975 NUM_BANKS(ADDR_SURF_2_BANK));
1981 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1983 } else if (num_pipe_configs == 4) {
1985 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1986 switch (reg_offset) {
1988 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1989 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1990 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1991 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1994 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1995 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1996 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1997 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2000 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2003 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2006 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2012 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2015 TILE_SPLIT(split_equal_to_row_size));
2018 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2019 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2022 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2023 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2024 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2028 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2029 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2030 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2031 TILE_SPLIT(split_equal_to_row_size));
2034 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2035 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2038 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2039 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2042 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2044 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2048 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2049 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2050 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2054 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2055 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2060 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2064 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2066 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2071 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2072 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2077 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2082 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2086 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2087 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2088 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2093 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2094 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2098 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2099 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2107 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2108 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2110 } else if (num_rbs < 4) {
2111 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2112 switch (reg_offset) {
2114 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2116 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2120 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2122 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2123 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2126 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2128 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2129 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2132 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2133 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2134 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2135 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2138 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2140 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2141 TILE_SPLIT(split_equal_to_row_size));
2144 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2145 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2149 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2150 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2151 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2154 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2155 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2156 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2157 TILE_SPLIT(split_equal_to_row_size));
2160 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2161 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2164 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2168 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2170 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2181 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2182 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2190 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2197 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2203 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2208 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2212 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2214 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2218 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2219 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2220 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2224 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2226 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2233 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2234 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2237 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2238 switch (reg_offset) {
2240 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2243 NUM_BANKS(ADDR_SURF_16_BANK));
2246 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2249 NUM_BANKS(ADDR_SURF_16_BANK));
2252 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255 NUM_BANKS(ADDR_SURF_16_BANK));
2258 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261 NUM_BANKS(ADDR_SURF_16_BANK));
2264 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2267 NUM_BANKS(ADDR_SURF_16_BANK));
2270 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273 NUM_BANKS(ADDR_SURF_8_BANK));
2276 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2279 NUM_BANKS(ADDR_SURF_4_BANK));
2282 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285 NUM_BANKS(ADDR_SURF_16_BANK));
2288 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291 NUM_BANKS(ADDR_SURF_16_BANK));
2294 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297 NUM_BANKS(ADDR_SURF_16_BANK));
2300 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303 NUM_BANKS(ADDR_SURF_16_BANK));
2306 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2309 NUM_BANKS(ADDR_SURF_16_BANK));
2312 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2315 NUM_BANKS(ADDR_SURF_8_BANK));
2318 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2321 NUM_BANKS(ADDR_SURF_4_BANK));
2327 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2329 } else if (num_pipe_configs == 2) {
2330 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2331 switch (reg_offset) {
2333 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335 PIPE_CONFIG(ADDR_SURF_P2) |
2336 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2339 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341 PIPE_CONFIG(ADDR_SURF_P2) |
2342 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2345 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2347 PIPE_CONFIG(ADDR_SURF_P2) |
2348 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2351 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353 PIPE_CONFIG(ADDR_SURF_P2) |
2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2357 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 PIPE_CONFIG(ADDR_SURF_P2) |
2360 TILE_SPLIT(split_equal_to_row_size));
2363 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2367 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2368 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369 PIPE_CONFIG(ADDR_SURF_P2) |
2370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2373 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 PIPE_CONFIG(ADDR_SURF_P2) |
2376 TILE_SPLIT(split_equal_to_row_size));
2379 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2382 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2386 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2388 PIPE_CONFIG(ADDR_SURF_P2) |
2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2392 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2394 PIPE_CONFIG(ADDR_SURF_P2) |
2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2399 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2400 PIPE_CONFIG(ADDR_SURF_P2) |
2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410 PIPE_CONFIG(ADDR_SURF_P2) |
2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2416 PIPE_CONFIG(ADDR_SURF_P2) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2421 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422 PIPE_CONFIG(ADDR_SURF_P2) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2426 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2430 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 PIPE_CONFIG(ADDR_SURF_P2) |
2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 PIPE_CONFIG(ADDR_SURF_P2) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444 PIPE_CONFIG(ADDR_SURF_P2) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2452 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2454 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2455 switch (reg_offset) {
2457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2460 NUM_BANKS(ADDR_SURF_16_BANK));
2463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2466 NUM_BANKS(ADDR_SURF_16_BANK));
2469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2472 NUM_BANKS(ADDR_SURF_16_BANK));
2475 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2478 NUM_BANKS(ADDR_SURF_16_BANK));
2481 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2484 NUM_BANKS(ADDR_SURF_16_BANK));
2487 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2490 NUM_BANKS(ADDR_SURF_16_BANK));
2493 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496 NUM_BANKS(ADDR_SURF_8_BANK));
2499 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2502 NUM_BANKS(ADDR_SURF_16_BANK));
2505 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2508 NUM_BANKS(ADDR_SURF_16_BANK));
2511 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514 NUM_BANKS(ADDR_SURF_16_BANK));
2517 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2520 NUM_BANKS(ADDR_SURF_16_BANK));
2523 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2526 NUM_BANKS(ADDR_SURF_16_BANK));
2529 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2532 NUM_BANKS(ADDR_SURF_16_BANK));
2535 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2538 NUM_BANKS(ADDR_SURF_8_BANK));
2544 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2547 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2551 * cik_select_se_sh - select which SE, SH to address
2553 * @rdev: radeon_device pointer
2554 * @se_num: shader engine to address
2555 * @sh_num: sh block to address
2557 * Select which SE, SH combinations to address. Certain
2558 * registers are instanced per SE or SH. 0xffffffff means
2559 * broadcast to all SEs or SHs (CIK).
2561 static void cik_select_se_sh(struct radeon_device *rdev,
2562 u32 se_num, u32 sh_num)
2564 u32 data = INSTANCE_BROADCAST_WRITES;
2566 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2567 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2568 else if (se_num == 0xffffffff)
2569 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2570 else if (sh_num == 0xffffffff)
2571 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2573 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2574 WREG32(GRBM_GFX_INDEX, data);
2578 * cik_create_bitmask - create a bitmask
2580 * @bit_width: length of the mask
2582 * create a variable length bit mask (CIK).
2583 * Returns the bitmask.
2585 static u32 cik_create_bitmask(u32 bit_width)
2589 for (i = 0; i < bit_width; i++) {
2597 * cik_select_se_sh - select which SE, SH to address
2599 * @rdev: radeon_device pointer
2600 * @max_rb_num: max RBs (render backends) for the asic
2601 * @se_num: number of SEs (shader engines) for the asic
2602 * @sh_per_se: number of SH blocks per SE for the asic
2604 * Calculates the bitmask of disabled RBs (CIK).
2605 * Returns the disabled RB bitmask.
2607 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2608 u32 max_rb_num, u32 se_num,
2613 data = RREG32(CC_RB_BACKEND_DISABLE);
2615 data &= BACKEND_DISABLE_MASK;
2618 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2620 data >>= BACKEND_DISABLE_SHIFT;
2622 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2628 * cik_setup_rb - setup the RBs on the asic
2630 * @rdev: radeon_device pointer
2631 * @se_num: number of SEs (shader engines) for the asic
2632 * @sh_per_se: number of SH blocks per SE for the asic
2633 * @max_rb_num: max RBs (render backends) for the asic
2635 * Configures per-SE/SH RB registers (CIK).
2637 static void cik_setup_rb(struct radeon_device *rdev,
2638 u32 se_num, u32 sh_per_se,
2643 u32 disabled_rbs = 0;
2644 u32 enabled_rbs = 0;
2646 for (i = 0; i < se_num; i++) {
2647 for (j = 0; j < sh_per_se; j++) {
2648 cik_select_se_sh(rdev, i, j);
2649 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2650 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2653 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2656 for (i = 0; i < max_rb_num; i++) {
2657 if (!(disabled_rbs & mask))
2658 enabled_rbs |= mask;
2662 for (i = 0; i < se_num; i++) {
2663 cik_select_se_sh(rdev, i, 0xffffffff);
2665 for (j = 0; j < sh_per_se; j++) {
2666 switch (enabled_rbs & 3) {
2668 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2671 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2675 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2680 WREG32(PA_SC_RASTER_CONFIG, data);
2682 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2686 * cik_gpu_init - setup the 3D engine
2688 * @rdev: radeon_device pointer
2690 * Configures the 3D engine and tiling configuration
2691 * registers so that the 3D engine is usable.
2693 static void cik_gpu_init(struct radeon_device *rdev)
2695 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2696 u32 mc_shared_chmap, mc_arb_ramcfg;
2697 u32 hdp_host_path_cntl;
2701 switch (rdev->family) {
2703 rdev->config.cik.max_shader_engines = 2;
2704 rdev->config.cik.max_tile_pipes = 4;
2705 rdev->config.cik.max_cu_per_sh = 7;
2706 rdev->config.cik.max_sh_per_se = 1;
2707 rdev->config.cik.max_backends_per_se = 2;
2708 rdev->config.cik.max_texture_channel_caches = 4;
2709 rdev->config.cik.max_gprs = 256;
2710 rdev->config.cik.max_gs_threads = 32;
2711 rdev->config.cik.max_hw_contexts = 8;
2713 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2714 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2715 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2716 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2717 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2720 rdev->config.cik.max_shader_engines = 1;
2721 rdev->config.cik.max_tile_pipes = 4;
2722 if ((rdev->pdev->device == 0x1304) ||
2723 (rdev->pdev->device == 0x1305) ||
2724 (rdev->pdev->device == 0x130C) ||
2725 (rdev->pdev->device == 0x130F) ||
2726 (rdev->pdev->device == 0x1310) ||
2727 (rdev->pdev->device == 0x1311) ||
2728 (rdev->pdev->device == 0x131C)) {
2729 rdev->config.cik.max_cu_per_sh = 8;
2730 rdev->config.cik.max_backends_per_se = 2;
2731 } else if ((rdev->pdev->device == 0x1309) ||
2732 (rdev->pdev->device == 0x130A) ||
2733 (rdev->pdev->device == 0x130D) ||
2734 (rdev->pdev->device == 0x1313) ||
2735 (rdev->pdev->device == 0x131D)) {
2736 rdev->config.cik.max_cu_per_sh = 6;
2737 rdev->config.cik.max_backends_per_se = 2;
2738 } else if ((rdev->pdev->device == 0x1306) ||
2739 (rdev->pdev->device == 0x1307) ||
2740 (rdev->pdev->device == 0x130B) ||
2741 (rdev->pdev->device == 0x130E) ||
2742 (rdev->pdev->device == 0x1315) ||
2743 (rdev->pdev->device == 0x131B)) {
2744 rdev->config.cik.max_cu_per_sh = 4;
2745 rdev->config.cik.max_backends_per_se = 1;
2747 rdev->config.cik.max_cu_per_sh = 3;
2748 rdev->config.cik.max_backends_per_se = 1;
2750 rdev->config.cik.max_sh_per_se = 1;
2751 rdev->config.cik.max_texture_channel_caches = 4;
2752 rdev->config.cik.max_gprs = 256;
2753 rdev->config.cik.max_gs_threads = 16;
2754 rdev->config.cik.max_hw_contexts = 8;
2756 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2757 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2758 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2759 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2760 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2764 rdev->config.cik.max_shader_engines = 1;
2765 rdev->config.cik.max_tile_pipes = 2;
2766 rdev->config.cik.max_cu_per_sh = 2;
2767 rdev->config.cik.max_sh_per_se = 1;
2768 rdev->config.cik.max_backends_per_se = 1;
2769 rdev->config.cik.max_texture_channel_caches = 2;
2770 rdev->config.cik.max_gprs = 256;
2771 rdev->config.cik.max_gs_threads = 16;
2772 rdev->config.cik.max_hw_contexts = 8;
2774 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2775 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2776 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2777 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2778 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2782 /* Initialize HDP */
2783 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2784 WREG32((0x2c14 + j), 0x00000000);
2785 WREG32((0x2c18 + j), 0x00000000);
2786 WREG32((0x2c1c + j), 0x00000000);
2787 WREG32((0x2c20 + j), 0x00000000);
2788 WREG32((0x2c24 + j), 0x00000000);
2791 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2793 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2795 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2796 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2798 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2799 rdev->config.cik.mem_max_burst_length_bytes = 256;
2800 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2801 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2802 if (rdev->config.cik.mem_row_size_in_kb > 4)
2803 rdev->config.cik.mem_row_size_in_kb = 4;
2804 /* XXX use MC settings? */
2805 rdev->config.cik.shader_engine_tile_size = 32;
2806 rdev->config.cik.num_gpus = 1;
2807 rdev->config.cik.multi_gpu_tile_size = 64;
2809 /* fix up row size */
2810 gb_addr_config &= ~ROW_SIZE_MASK;
2811 switch (rdev->config.cik.mem_row_size_in_kb) {
2814 gb_addr_config |= ROW_SIZE(0);
2817 gb_addr_config |= ROW_SIZE(1);
2820 gb_addr_config |= ROW_SIZE(2);
2824 /* setup tiling info dword. gb_addr_config is not adequate since it does
2825 * not have bank info, so create a custom tiling dword.
2826 * bits 3:0 num_pipes
2827 * bits 7:4 num_banks
2828 * bits 11:8 group_size
2829 * bits 15:12 row_size
2831 rdev->config.cik.tile_config = 0;
2832 switch (rdev->config.cik.num_tile_pipes) {
2834 rdev->config.cik.tile_config |= (0 << 0);
2837 rdev->config.cik.tile_config |= (1 << 0);
2840 rdev->config.cik.tile_config |= (2 << 0);
2844 /* XXX what about 12? */
2845 rdev->config.cik.tile_config |= (3 << 0);
2848 rdev->config.cik.tile_config |=
2849 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2850 rdev->config.cik.tile_config |=
2851 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2852 rdev->config.cik.tile_config |=
2853 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2855 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2856 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2857 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2858 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2859 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2860 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2861 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2862 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2864 cik_tiling_mode_table_init(rdev);
2866 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2867 rdev->config.cik.max_sh_per_se,
2868 rdev->config.cik.max_backends_per_se);
2870 /* set HW defaults for 3D engine */
2871 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2873 WREG32(SX_DEBUG_1, 0x20);
2875 WREG32(TA_CNTL_AUX, 0x00010000);
2877 tmp = RREG32(SPI_CONFIG_CNTL);
2879 WREG32(SPI_CONFIG_CNTL, tmp);
2881 WREG32(SQ_CONFIG, 1);
2883 WREG32(DB_DEBUG, 0);
2885 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2887 WREG32(DB_DEBUG2, tmp);
2889 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2891 WREG32(DB_DEBUG3, tmp);
2893 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2895 WREG32(CB_HW_CONTROL, tmp);
2897 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2899 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2900 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2901 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2902 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2904 WREG32(VGT_NUM_INSTANCES, 1);
2906 WREG32(CP_PERFMON_CNTL, 0);
2908 WREG32(SQ_CONFIG, 0);
2910 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2911 FORCE_EOV_MAX_REZ_CNT(255)));
2913 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2914 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2916 WREG32(VGT_GS_VERTEX_REUSE, 16);
2917 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2919 tmp = RREG32(HDP_MISC_CNTL);
2920 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2921 WREG32(HDP_MISC_CNTL, tmp);
2923 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2924 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2926 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2927 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2933 * GPU scratch registers helpers function.
2936 * cik_scratch_init - setup driver info for CP scratch regs
2938 * @rdev: radeon_device pointer
2940 * Set up the number and offset of the CP scratch registers.
2941 * NOTE: use of CP scratch registers is a legacy inferface and
2942 * is not used by default on newer asics (r6xx+). On newer asics,
2943 * memory buffers are used for fences rather than scratch regs.
2945 static void cik_scratch_init(struct radeon_device *rdev)
2949 rdev->scratch.num_reg = 7;
2950 rdev->scratch.reg_base = SCRATCH_REG0;
2951 for (i = 0; i < rdev->scratch.num_reg; i++) {
2952 rdev->scratch.free[i] = true;
2953 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2958 * cik_ring_test - basic gfx ring test
2960 * @rdev: radeon_device pointer
2961 * @ring: radeon_ring structure holding ring information
2963 * Allocate a scratch register and write to it using the gfx ring (CIK).
2964 * Provides a basic gfx ring test to verify that the ring is working.
2965 * Used by cik_cp_gfx_resume();
2966 * Returns 0 on success, error on failure.
2968 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2975 r = radeon_scratch_get(rdev, &scratch);
2977 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2980 WREG32(scratch, 0xCAFEDEAD);
2981 r = radeon_ring_lock(rdev, ring, 3);
2983 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2984 radeon_scratch_free(rdev, scratch);
2987 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2988 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2989 radeon_ring_write(ring, 0xDEADBEEF);
2990 radeon_ring_unlock_commit(rdev, ring);
2992 for (i = 0; i < rdev->usec_timeout; i++) {
2993 tmp = RREG32(scratch);
2994 if (tmp == 0xDEADBEEF)
2998 if (i < rdev->usec_timeout) {
2999 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3001 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3002 ring->idx, scratch, tmp);
3005 radeon_scratch_free(rdev, scratch);
3010 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3012 * @rdev: radeon_device pointer
3013 * @fence: radeon fence object
3015 * Emits a fence sequnce number on the gfx ring and flushes
3018 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3019 struct radeon_fence *fence)
3021 struct radeon_ring *ring = &rdev->ring[fence->ring];
3022 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3024 /* EVENT_WRITE_EOP - flush caches, send int */
3025 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3026 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3028 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3030 radeon_ring_write(ring, addr & 0xfffffffc);
3031 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3032 radeon_ring_write(ring, fence->seq);
3033 radeon_ring_write(ring, 0);
3035 /* We should be using the new WAIT_REG_MEM special op packet here
3036 * but it causes the CP to hang
3038 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3039 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3040 WRITE_DATA_DST_SEL(0)));
3041 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3042 radeon_ring_write(ring, 0);
3043 radeon_ring_write(ring, 0);
3047 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3049 * @rdev: radeon_device pointer
3050 * @fence: radeon fence object
3052 * Emits a fence sequnce number on the compute ring and flushes
3055 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3056 struct radeon_fence *fence)
3058 struct radeon_ring *ring = &rdev->ring[fence->ring];
3059 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3061 /* RELEASE_MEM - flush caches, send int */
3062 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3063 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3065 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3067 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3068 radeon_ring_write(ring, addr & 0xfffffffc);
3069 radeon_ring_write(ring, upper_32_bits(addr));
3070 radeon_ring_write(ring, fence->seq);
3071 radeon_ring_write(ring, 0);
3073 /* We should be using the new WAIT_REG_MEM special op packet here
3074 * but it causes the CP to hang
3076 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3077 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3078 WRITE_DATA_DST_SEL(0)));
3079 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3080 radeon_ring_write(ring, 0);
3081 radeon_ring_write(ring, 0);
3084 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3085 struct radeon_ring *ring,
3086 struct radeon_semaphore *semaphore,
3089 uint64_t addr = semaphore->gpu_addr;
3090 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3092 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3093 radeon_ring_write(ring, addr & 0xffffffff);
3094 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3101 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3103 * @rdev: radeon_device pointer
3104 * @ib: radeon indirect buffer object
3106 * Emits an DE (drawing engine) or CE (constant engine) IB
3107 * on the gfx ring. IBs are usually generated by userspace
3108 * acceleration drivers and submitted to the kernel for
3109 * sheduling on the ring. This function schedules the IB
3110 * on the gfx ring for execution by the GPU.
3112 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3114 struct radeon_ring *ring = &rdev->ring[ib->ring];
3115 u32 header, control = INDIRECT_BUFFER_VALID;
3117 if (ib->is_const_ib) {
3118 /* set switch buffer packet before const IB */
3119 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3120 radeon_ring_write(ring, 0);
3122 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3125 if (ring->rptr_save_reg) {
3126 next_rptr = ring->wptr + 3 + 4;
3127 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3128 radeon_ring_write(ring, ((ring->rptr_save_reg -
3129 PACKET3_SET_UCONFIG_REG_START) >> 2));
3130 radeon_ring_write(ring, next_rptr);
3131 } else if (rdev->wb.enabled) {
3132 next_rptr = ring->wptr + 5 + 4;
3133 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3134 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3135 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3136 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3137 radeon_ring_write(ring, next_rptr);
3140 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3143 control |= ib->length_dw |
3144 (ib->vm ? (ib->vm->id << 24) : 0);
3146 radeon_ring_write(ring, header);
3147 radeon_ring_write(ring,
3151 (ib->gpu_addr & 0xFFFFFFFC));
3152 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3153 radeon_ring_write(ring, control);
3157 * cik_ib_test - basic gfx ring IB test
3159 * @rdev: radeon_device pointer
3160 * @ring: radeon_ring structure holding ring information
3162 * Allocate an IB and execute it on the gfx ring (CIK).
3163 * Provides a basic gfx ring test to verify that IBs are working.
3164 * Returns 0 on success, error on failure.
3166 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3168 struct radeon_ib ib;
3174 r = radeon_scratch_get(rdev, &scratch);
3176 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3179 WREG32(scratch, 0xCAFEDEAD);
3180 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3182 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3185 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3186 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3187 ib.ptr[2] = 0xDEADBEEF;
3189 r = radeon_ib_schedule(rdev, &ib, NULL);
3191 radeon_scratch_free(rdev, scratch);
3192 radeon_ib_free(rdev, &ib);
3193 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3196 r = radeon_fence_wait(ib.fence, false);
3198 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3201 for (i = 0; i < rdev->usec_timeout; i++) {
3202 tmp = RREG32(scratch);
3203 if (tmp == 0xDEADBEEF)
3207 if (i < rdev->usec_timeout) {
3208 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3210 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3214 radeon_scratch_free(rdev, scratch);
3215 radeon_ib_free(rdev, &ib);
3221 * On CIK, gfx and compute now have independant command processors.
3224 * Gfx consists of a single ring and can process both gfx jobs and
3225 * compute jobs. The gfx CP consists of three microengines (ME):
3226 * PFP - Pre-Fetch Parser
3228 * CE - Constant Engine
3229 * The PFP and ME make up what is considered the Drawing Engine (DE).
3230 * The CE is an asynchronous engine used for updating buffer desciptors
3231 * used by the DE so that they can be loaded into cache in parallel
3232 * while the DE is processing state update packets.
3235 * The compute CP consists of two microengines (ME):
3236 * MEC1 - Compute MicroEngine 1
3237 * MEC2 - Compute MicroEngine 2
3238 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3239 * The queues are exposed to userspace and are programmed directly
3240 * by the compute runtime.
3243 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3245 * @rdev: radeon_device pointer
3246 * @enable: enable or disable the MEs
3248 * Halts or unhalts the gfx MEs.
3250 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3253 WREG32(CP_ME_CNTL, 0);
3255 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3256 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3262 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3264 * @rdev: radeon_device pointer
3266 * Loads the gfx PFP, ME, and CE ucode.
3267 * Returns 0 for success, -EINVAL if the ucode is not available.
3269 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3271 const __be32 *fw_data;
3274 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3277 cik_cp_gfx_enable(rdev, false);
3280 fw_data = (const __be32 *)rdev->pfp_fw->data;
3281 WREG32(CP_PFP_UCODE_ADDR, 0);
3282 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3283 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3284 WREG32(CP_PFP_UCODE_ADDR, 0);
3287 fw_data = (const __be32 *)rdev->ce_fw->data;
3288 WREG32(CP_CE_UCODE_ADDR, 0);
3289 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3290 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3291 WREG32(CP_CE_UCODE_ADDR, 0);
3294 fw_data = (const __be32 *)rdev->me_fw->data;
3295 WREG32(CP_ME_RAM_WADDR, 0);
3296 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3297 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3298 WREG32(CP_ME_RAM_WADDR, 0);
3300 WREG32(CP_PFP_UCODE_ADDR, 0);
3301 WREG32(CP_CE_UCODE_ADDR, 0);
3302 WREG32(CP_ME_RAM_WADDR, 0);
3303 WREG32(CP_ME_RAM_RADDR, 0);
3308 * cik_cp_gfx_start - start the gfx ring
3310 * @rdev: radeon_device pointer
3312 * Enables the ring and loads the clear state context and other
3313 * packets required to init the ring.
3314 * Returns 0 for success, error for failure.
3316 static int cik_cp_gfx_start(struct radeon_device *rdev)
3318 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3322 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3323 WREG32(CP_ENDIAN_SWAP, 0);
3324 WREG32(CP_DEVICE_ID, 1);
3326 cik_cp_gfx_enable(rdev, true);
3328 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3330 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3334 /* init the CE partitions. CE only used for gfx on CIK */
3335 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3336 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3337 radeon_ring_write(ring, 0xc000);
3338 radeon_ring_write(ring, 0xc000);
3340 /* setup clear context state */
3341 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3342 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3344 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3345 radeon_ring_write(ring, 0x80000000);
3346 radeon_ring_write(ring, 0x80000000);
3348 for (i = 0; i < cik_default_size; i++)
3349 radeon_ring_write(ring, cik_default_state[i]);
3351 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3352 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3354 /* set clear context state */
3355 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3356 radeon_ring_write(ring, 0);
3358 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3359 radeon_ring_write(ring, 0x00000316);
3360 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3361 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3363 radeon_ring_unlock_commit(rdev, ring);
3369 * cik_cp_gfx_fini - stop the gfx ring
3371 * @rdev: radeon_device pointer
3373 * Stop the gfx ring and tear down the driver ring
3376 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3378 cik_cp_gfx_enable(rdev, false);
3379 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3383 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3385 * @rdev: radeon_device pointer
3387 * Program the location and size of the gfx ring buffer
3388 * and test it to make sure it's working.
3389 * Returns 0 for success, error for failure.
3391 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3393 struct radeon_ring *ring;
3399 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3400 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3402 /* Set the write pointer delay */
3403 WREG32(CP_RB_WPTR_DELAY, 0);
3405 /* set the RB to use vmid 0 */
3406 WREG32(CP_RB_VMID, 0);
3408 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3410 /* ring 0 - compute and gfx */
3411 /* Set ring buffer size */
3412 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3413 rb_bufsz = order_base_2(ring->ring_size / 8);
3414 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3416 tmp |= BUF_SWAP_32BIT;
3418 WREG32(CP_RB0_CNTL, tmp);
3420 /* Initialize the ring buffer's read and write pointers */
3421 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3423 WREG32(CP_RB0_WPTR, ring->wptr);
3425 /* set the wb address wether it's enabled or not */
3426 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3427 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3429 /* scratch register shadowing is no longer supported */
3430 WREG32(SCRATCH_UMSK, 0);
3432 if (!rdev->wb.enabled)
3433 tmp |= RB_NO_UPDATE;
3436 WREG32(CP_RB0_CNTL, tmp);
3438 rb_addr = ring->gpu_addr >> 8;
3439 WREG32(CP_RB0_BASE, rb_addr);
3440 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3442 ring->rptr = RREG32(CP_RB0_RPTR);
3444 /* start the ring */
3445 cik_cp_gfx_start(rdev);
3446 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3447 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3449 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3455 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3456 struct radeon_ring *ring)
3462 if (rdev->wb.enabled) {
3463 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3465 mutex_lock(&rdev->srbm_mutex);
3466 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3467 rptr = RREG32(CP_HQD_PQ_RPTR);
3468 cik_srbm_select(rdev, 0, 0, 0, 0);
3469 mutex_unlock(&rdev->srbm_mutex);
3475 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3476 struct radeon_ring *ring)
3480 if (rdev->wb.enabled) {
3481 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3483 mutex_lock(&rdev->srbm_mutex);
3484 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3485 wptr = RREG32(CP_HQD_PQ_WPTR);
3486 cik_srbm_select(rdev, 0, 0, 0, 0);
3487 mutex_unlock(&rdev->srbm_mutex);
3493 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3494 struct radeon_ring *ring)
3496 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3497 WDOORBELL32(ring->doorbell_offset, ring->wptr);
3501 * cik_cp_compute_enable - enable/disable the compute CP MEs
3503 * @rdev: radeon_device pointer
3504 * @enable: enable or disable the MEs
3506 * Halts or unhalts the compute MEs.
3508 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3511 WREG32(CP_MEC_CNTL, 0);
3513 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3518 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3520 * @rdev: radeon_device pointer
3522 * Loads the compute MEC1&2 ucode.
3523 * Returns 0 for success, -EINVAL if the ucode is not available.
3525 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3527 const __be32 *fw_data;
3533 cik_cp_compute_enable(rdev, false);
3536 fw_data = (const __be32 *)rdev->mec_fw->data;
3537 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3538 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3539 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3540 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3542 if (rdev->family == CHIP_KAVERI) {
3544 fw_data = (const __be32 *)rdev->mec_fw->data;
3545 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3546 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3547 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3548 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3555 * cik_cp_compute_start - start the compute queues
3557 * @rdev: radeon_device pointer
3559 * Enable the compute queues.
3560 * Returns 0 for success, error for failure.
3562 static int cik_cp_compute_start(struct radeon_device *rdev)
3564 cik_cp_compute_enable(rdev, true);
3570 * cik_cp_compute_fini - stop the compute queues
3572 * @rdev: radeon_device pointer
3574 * Stop the compute queues and tear down the driver queue
3577 static void cik_cp_compute_fini(struct radeon_device *rdev)
3581 cik_cp_compute_enable(rdev, false);
3583 for (i = 0; i < 2; i++) {
3585 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3587 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3589 if (rdev->ring[idx].mqd_obj) {
3590 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3591 if (unlikely(r != 0))
3592 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3594 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3595 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3597 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3598 rdev->ring[idx].mqd_obj = NULL;
3603 static void cik_mec_fini(struct radeon_device *rdev)
3607 if (rdev->mec.hpd_eop_obj) {
3608 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3609 if (unlikely(r != 0))
3610 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3611 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3612 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3614 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3615 rdev->mec.hpd_eop_obj = NULL;
3619 #define MEC_HPD_SIZE 2048
3621 static int cik_mec_init(struct radeon_device *rdev)
3627 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3628 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3630 if (rdev->family == CHIP_KAVERI)
3631 rdev->mec.num_mec = 2;
3633 rdev->mec.num_mec = 1;
3634 rdev->mec.num_pipe = 4;
3635 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3637 if (rdev->mec.hpd_eop_obj == NULL) {
3638 r = radeon_bo_create(rdev,
3639 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3641 RADEON_GEM_DOMAIN_GTT, NULL,
3642 &rdev->mec.hpd_eop_obj);
3644 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3649 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3650 if (unlikely(r != 0)) {
3654 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3655 &rdev->mec.hpd_eop_gpu_addr);
3657 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3661 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3663 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3668 /* clear memory. Not sure if this is required or not */
3669 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3671 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3672 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3677 struct hqd_registers
3679 u32 cp_mqd_base_addr;
3680 u32 cp_mqd_base_addr_hi;
3683 u32 cp_hqd_persistent_state;
3684 u32 cp_hqd_pipe_priority;
3685 u32 cp_hqd_queue_priority;
3688 u32 cp_hqd_pq_base_hi;
3690 u32 cp_hqd_pq_rptr_report_addr;
3691 u32 cp_hqd_pq_rptr_report_addr_hi;
3692 u32 cp_hqd_pq_wptr_poll_addr;
3693 u32 cp_hqd_pq_wptr_poll_addr_hi;
3694 u32 cp_hqd_pq_doorbell_control;
3696 u32 cp_hqd_pq_control;
3697 u32 cp_hqd_ib_base_addr;
3698 u32 cp_hqd_ib_base_addr_hi;
3700 u32 cp_hqd_ib_control;
3701 u32 cp_hqd_iq_timer;
3703 u32 cp_hqd_dequeue_request;
3704 u32 cp_hqd_dma_offload;
3705 u32 cp_hqd_sema_cmd;
3706 u32 cp_hqd_msg_type;
3707 u32 cp_hqd_atomic0_preop_lo;
3708 u32 cp_hqd_atomic0_preop_hi;
3709 u32 cp_hqd_atomic1_preop_lo;
3710 u32 cp_hqd_atomic1_preop_hi;
3711 u32 cp_hqd_hq_scheduler0;
3712 u32 cp_hqd_hq_scheduler1;
3719 u32 dispatch_initiator;
3723 u32 pipeline_stat_enable;
3724 u32 perf_counter_enable;
3730 u32 resource_limits;
3731 u32 static_thread_mgmt01[2];
3733 u32 static_thread_mgmt23[2];
3735 u32 thread_trace_enable;
3738 u32 vgtcs_invoke_count[2];
3739 struct hqd_registers queue_state;
3741 u32 interrupt_queue[64];
3745 * cik_cp_compute_resume - setup the compute queue registers
3747 * @rdev: radeon_device pointer
3749 * Program the compute queues and test them to make sure they
3751 * Returns 0 for success, error for failure.
3753 static int cik_cp_compute_resume(struct radeon_device *rdev)
3757 bool use_doorbell = true;
3763 struct bonaire_mqd *mqd;
3765 r = cik_cp_compute_start(rdev);
3769 /* fix up chicken bits */
3770 tmp = RREG32(CP_CPF_DEBUG);
3772 WREG32(CP_CPF_DEBUG, tmp);
3774 /* init the pipes */
3775 mutex_lock(&rdev->srbm_mutex);
3776 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3777 int me = (i < 4) ? 1 : 2;
3778 int pipe = (i < 4) ? i : (i - 4);
3780 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3782 cik_srbm_select(rdev, me, pipe, 0, 0);
3784 /* write the EOP addr */
3785 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3786 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3788 /* set the VMID assigned */
3789 WREG32(CP_HPD_EOP_VMID, 0);
3791 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3792 tmp = RREG32(CP_HPD_EOP_CONTROL);
3793 tmp &= ~EOP_SIZE_MASK;
3794 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3795 WREG32(CP_HPD_EOP_CONTROL, tmp);
3797 cik_srbm_select(rdev, 0, 0, 0, 0);
3798 mutex_unlock(&rdev->srbm_mutex);
3800 /* init the queues. Just two for now. */
3801 for (i = 0; i < 2; i++) {
3803 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3805 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3807 if (rdev->ring[idx].mqd_obj == NULL) {
3808 r = radeon_bo_create(rdev,
3809 sizeof(struct bonaire_mqd),
3811 RADEON_GEM_DOMAIN_GTT, NULL,
3812 &rdev->ring[idx].mqd_obj);
3814 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3819 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3820 if (unlikely(r != 0)) {
3821 cik_cp_compute_fini(rdev);
3824 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3827 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3828 cik_cp_compute_fini(rdev);
3831 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3833 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3834 cik_cp_compute_fini(rdev);
3838 /* doorbell offset */
3839 rdev->ring[idx].doorbell_offset =
3840 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3842 /* init the mqd struct */
3843 memset(buf, 0, sizeof(struct bonaire_mqd));
3845 mqd = (struct bonaire_mqd *)buf;
3846 mqd->header = 0xC0310800;
3847 mqd->static_thread_mgmt01[0] = 0xffffffff;
3848 mqd->static_thread_mgmt01[1] = 0xffffffff;
3849 mqd->static_thread_mgmt23[0] = 0xffffffff;
3850 mqd->static_thread_mgmt23[1] = 0xffffffff;
3852 mutex_lock(&rdev->srbm_mutex);
3853 cik_srbm_select(rdev, rdev->ring[idx].me,
3854 rdev->ring[idx].pipe,
3855 rdev->ring[idx].queue, 0);
3857 /* disable wptr polling */
3858 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3859 tmp &= ~WPTR_POLL_EN;
3860 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3862 /* enable doorbell? */
3863 mqd->queue_state.cp_hqd_pq_doorbell_control =
3864 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3866 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3868 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3869 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3870 mqd->queue_state.cp_hqd_pq_doorbell_control);
3872 /* disable the queue if it's active */
3873 mqd->queue_state.cp_hqd_dequeue_request = 0;
3874 mqd->queue_state.cp_hqd_pq_rptr = 0;
3875 mqd->queue_state.cp_hqd_pq_wptr= 0;
3876 if (RREG32(CP_HQD_ACTIVE) & 1) {
3877 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3878 for (i = 0; i < rdev->usec_timeout; i++) {
3879 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3883 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3884 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3885 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3888 /* set the pointer to the MQD */
3889 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3890 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3891 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3892 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3893 /* set MQD vmid to 0 */
3894 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3895 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3896 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3898 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3899 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3900 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3901 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3902 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3903 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3905 /* set up the HQD, this is similar to CP_RB0_CNTL */
3906 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3907 mqd->queue_state.cp_hqd_pq_control &=
3908 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3910 mqd->queue_state.cp_hqd_pq_control |=
3911 order_base_2(rdev->ring[idx].ring_size / 8);
3912 mqd->queue_state.cp_hqd_pq_control |=
3913 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3915 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3917 mqd->queue_state.cp_hqd_pq_control &=
3918 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3919 mqd->queue_state.cp_hqd_pq_control |=
3920 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3921 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3923 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3925 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3927 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3928 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3929 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3930 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3931 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3932 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3934 /* set the wb address wether it's enabled or not */
3936 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3938 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3939 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3940 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3941 upper_32_bits(wb_gpu_addr) & 0xffff;
3942 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3943 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3944 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3945 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3947 /* enable the doorbell if requested */
3949 mqd->queue_state.cp_hqd_pq_doorbell_control =
3950 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3951 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3952 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3953 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3954 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3955 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3956 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3959 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3961 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3962 mqd->queue_state.cp_hqd_pq_doorbell_control);
3964 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3965 rdev->ring[idx].wptr = 0;
3966 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3967 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3968 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3969 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3971 /* set the vmid for the queue */
3972 mqd->queue_state.cp_hqd_vmid = 0;
3973 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3975 /* activate the queue */
3976 mqd->queue_state.cp_hqd_active = 1;
3977 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3979 cik_srbm_select(rdev, 0, 0, 0, 0);
3980 mutex_unlock(&rdev->srbm_mutex);
3982 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3983 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3985 rdev->ring[idx].ready = true;
3986 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3988 rdev->ring[idx].ready = false;
3994 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3996 cik_cp_gfx_enable(rdev, enable);
3997 cik_cp_compute_enable(rdev, enable);
4000 static int cik_cp_load_microcode(struct radeon_device *rdev)
4004 r = cik_cp_gfx_load_microcode(rdev);
4007 r = cik_cp_compute_load_microcode(rdev);
4014 static void cik_cp_fini(struct radeon_device *rdev)
4016 cik_cp_gfx_fini(rdev);
4017 cik_cp_compute_fini(rdev);
4020 static int cik_cp_resume(struct radeon_device *rdev)
4024 cik_enable_gui_idle_interrupt(rdev, false);
4026 r = cik_cp_load_microcode(rdev);
4030 r = cik_cp_gfx_resume(rdev);
4033 r = cik_cp_compute_resume(rdev);
4037 cik_enable_gui_idle_interrupt(rdev, true);
4042 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4044 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4045 RREG32(GRBM_STATUS));
4046 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4047 RREG32(GRBM_STATUS2));
4048 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4049 RREG32(GRBM_STATUS_SE0));
4050 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4051 RREG32(GRBM_STATUS_SE1));
4052 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4053 RREG32(GRBM_STATUS_SE2));
4054 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4055 RREG32(GRBM_STATUS_SE3));
4056 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4057 RREG32(SRBM_STATUS));
4058 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4059 RREG32(SRBM_STATUS2));
4060 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4061 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4062 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4063 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4064 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4065 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4066 RREG32(CP_STALLED_STAT1));
4067 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4068 RREG32(CP_STALLED_STAT2));
4069 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4070 RREG32(CP_STALLED_STAT3));
4071 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4072 RREG32(CP_CPF_BUSY_STAT));
4073 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4074 RREG32(CP_CPF_STALLED_STAT1));
4075 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4076 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4077 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4078 RREG32(CP_CPC_STALLED_STAT1));
4079 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4083 * cik_gpu_check_soft_reset - check which blocks are busy
4085 * @rdev: radeon_device pointer
4087 * Check which blocks are busy and return the relevant reset
4088 * mask to be used by cik_gpu_soft_reset().
4089 * Returns a mask of the blocks to be reset.
4091 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4097 tmp = RREG32(GRBM_STATUS);
4098 if (tmp & (PA_BUSY | SC_BUSY |
4099 BCI_BUSY | SX_BUSY |
4100 TA_BUSY | VGT_BUSY |
4102 GDS_BUSY | SPI_BUSY |
4103 IA_BUSY | IA_BUSY_NO_DMA))
4104 reset_mask |= RADEON_RESET_GFX;
4106 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4107 reset_mask |= RADEON_RESET_CP;
4110 tmp = RREG32(GRBM_STATUS2);
4112 reset_mask |= RADEON_RESET_RLC;
4114 /* SDMA0_STATUS_REG */
4115 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4116 if (!(tmp & SDMA_IDLE))
4117 reset_mask |= RADEON_RESET_DMA;
4119 /* SDMA1_STATUS_REG */
4120 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4121 if (!(tmp & SDMA_IDLE))
4122 reset_mask |= RADEON_RESET_DMA1;
4125 tmp = RREG32(SRBM_STATUS2);
4126 if (tmp & SDMA_BUSY)
4127 reset_mask |= RADEON_RESET_DMA;
4129 if (tmp & SDMA1_BUSY)
4130 reset_mask |= RADEON_RESET_DMA1;
4133 tmp = RREG32(SRBM_STATUS);
4136 reset_mask |= RADEON_RESET_IH;
4139 reset_mask |= RADEON_RESET_SEM;
4141 if (tmp & GRBM_RQ_PENDING)
4142 reset_mask |= RADEON_RESET_GRBM;
4145 reset_mask |= RADEON_RESET_VMC;
4147 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4148 MCC_BUSY | MCD_BUSY))
4149 reset_mask |= RADEON_RESET_MC;
4151 if (evergreen_is_display_hung(rdev))
4152 reset_mask |= RADEON_RESET_DISPLAY;
4154 /* Skip MC reset as it's mostly likely not hung, just busy */
4155 if (reset_mask & RADEON_RESET_MC) {
4156 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4157 reset_mask &= ~RADEON_RESET_MC;
4164 * cik_gpu_soft_reset - soft reset GPU
4166 * @rdev: radeon_device pointer
4167 * @reset_mask: mask of which blocks to reset
4169 * Soft reset the blocks specified in @reset_mask.
4171 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4173 struct evergreen_mc_save save;
4174 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4177 if (reset_mask == 0)
4180 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4182 cik_print_gpu_status_regs(rdev);
4183 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4184 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4185 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4186 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4191 /* Disable GFX parsing/prefetching */
4192 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4194 /* Disable MEC parsing/prefetching */
4195 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4197 if (reset_mask & RADEON_RESET_DMA) {
4199 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4201 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4203 if (reset_mask & RADEON_RESET_DMA1) {
4205 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4207 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4210 evergreen_mc_stop(rdev, &save);
4211 if (evergreen_mc_wait_for_idle(rdev)) {
4212 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4215 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4216 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4218 if (reset_mask & RADEON_RESET_CP) {
4219 grbm_soft_reset |= SOFT_RESET_CP;
4221 srbm_soft_reset |= SOFT_RESET_GRBM;
4224 if (reset_mask & RADEON_RESET_DMA)
4225 srbm_soft_reset |= SOFT_RESET_SDMA;
4227 if (reset_mask & RADEON_RESET_DMA1)
4228 srbm_soft_reset |= SOFT_RESET_SDMA1;
4230 if (reset_mask & RADEON_RESET_DISPLAY)
4231 srbm_soft_reset |= SOFT_RESET_DC;
4233 if (reset_mask & RADEON_RESET_RLC)
4234 grbm_soft_reset |= SOFT_RESET_RLC;
4236 if (reset_mask & RADEON_RESET_SEM)
4237 srbm_soft_reset |= SOFT_RESET_SEM;
4239 if (reset_mask & RADEON_RESET_IH)
4240 srbm_soft_reset |= SOFT_RESET_IH;
4242 if (reset_mask & RADEON_RESET_GRBM)
4243 srbm_soft_reset |= SOFT_RESET_GRBM;
4245 if (reset_mask & RADEON_RESET_VMC)
4246 srbm_soft_reset |= SOFT_RESET_VMC;
4248 if (!(rdev->flags & RADEON_IS_IGP)) {
4249 if (reset_mask & RADEON_RESET_MC)
4250 srbm_soft_reset |= SOFT_RESET_MC;
4253 if (grbm_soft_reset) {
4254 tmp = RREG32(GRBM_SOFT_RESET);
4255 tmp |= grbm_soft_reset;
4256 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4257 WREG32(GRBM_SOFT_RESET, tmp);
4258 tmp = RREG32(GRBM_SOFT_RESET);
4262 tmp &= ~grbm_soft_reset;
4263 WREG32(GRBM_SOFT_RESET, tmp);
4264 tmp = RREG32(GRBM_SOFT_RESET);
4267 if (srbm_soft_reset) {
4268 tmp = RREG32(SRBM_SOFT_RESET);
4269 tmp |= srbm_soft_reset;
4270 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4271 WREG32(SRBM_SOFT_RESET, tmp);
4272 tmp = RREG32(SRBM_SOFT_RESET);
4276 tmp &= ~srbm_soft_reset;
4277 WREG32(SRBM_SOFT_RESET, tmp);
4278 tmp = RREG32(SRBM_SOFT_RESET);
4281 /* Wait a little for things to settle down */
4284 evergreen_mc_resume(rdev, &save);
4287 cik_print_gpu_status_regs(rdev);
4291 * cik_asic_reset - soft reset GPU
4293 * @rdev: radeon_device pointer
4295 * Look up which blocks are hung and attempt
4297 * Returns 0 for success.
4299 int cik_asic_reset(struct radeon_device *rdev)
4303 reset_mask = cik_gpu_check_soft_reset(rdev);
4306 r600_set_bios_scratch_engine_hung(rdev, true);
4308 cik_gpu_soft_reset(rdev, reset_mask);
4310 reset_mask = cik_gpu_check_soft_reset(rdev);
4313 r600_set_bios_scratch_engine_hung(rdev, false);
4319 * cik_gfx_is_lockup - check if the 3D engine is locked up
4321 * @rdev: radeon_device pointer
4322 * @ring: radeon_ring structure holding ring information
4324 * Check if the 3D engine is locked up (CIK).
4325 * Returns true if the engine is locked, false if not.
4327 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4329 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4331 if (!(reset_mask & (RADEON_RESET_GFX |
4332 RADEON_RESET_COMPUTE |
4333 RADEON_RESET_CP))) {
4334 radeon_ring_lockup_update(ring);
4337 /* force CP activities */
4338 radeon_ring_force_activity(rdev, ring);
4339 return radeon_ring_test_lockup(rdev, ring);
4344 * cik_mc_program - program the GPU memory controller
4346 * @rdev: radeon_device pointer
4348 * Set the location of vram, gart, and AGP in the GPU's
4349 * physical address space (CIK).
4351 static void cik_mc_program(struct radeon_device *rdev)
4353 struct evergreen_mc_save save;
4357 /* Initialize HDP */
4358 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4359 WREG32((0x2c14 + j), 0x00000000);
4360 WREG32((0x2c18 + j), 0x00000000);
4361 WREG32((0x2c1c + j), 0x00000000);
4362 WREG32((0x2c20 + j), 0x00000000);
4363 WREG32((0x2c24 + j), 0x00000000);
4365 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4367 evergreen_mc_stop(rdev, &save);
4368 if (radeon_mc_wait_for_idle(rdev)) {
4369 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4371 /* Lockout access through VGA aperture*/
4372 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4373 /* Update configuration */
4374 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4375 rdev->mc.vram_start >> 12);
4376 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4377 rdev->mc.vram_end >> 12);
4378 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4379 rdev->vram_scratch.gpu_addr >> 12);
4380 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4381 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4382 WREG32(MC_VM_FB_LOCATION, tmp);
4383 /* XXX double check these! */
4384 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4385 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4386 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4387 WREG32(MC_VM_AGP_BASE, 0);
4388 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4389 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4390 if (radeon_mc_wait_for_idle(rdev)) {
4391 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4393 evergreen_mc_resume(rdev, &save);
4394 /* we need to own VRAM, so turn off the VGA renderer here
4395 * to stop it overwriting our objects */
4396 rv515_vga_render_disable(rdev);
4400 * cik_mc_init - initialize the memory controller driver params
4402 * @rdev: radeon_device pointer
4404 * Look up the amount of vram, vram width, and decide how to place
4405 * vram and gart within the GPU's physical address space (CIK).
4406 * Returns 0 for success.
4408 static int cik_mc_init(struct radeon_device *rdev)
4411 int chansize, numchan;
4413 /* Get VRAM informations */
4414 rdev->mc.vram_is_ddr = true;
4415 tmp = RREG32(MC_ARB_RAMCFG);
4416 if (tmp & CHANSIZE_MASK) {
4421 tmp = RREG32(MC_SHARED_CHMAP);
4422 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4452 rdev->mc.vram_width = numchan * chansize;
4453 /* Could aper size report 0 ? */
4454 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4455 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4456 /* size in MB on si */
4457 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4458 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4459 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4460 si_vram_gtt_location(rdev, &rdev->mc);
4461 radeon_update_bandwidth_info(rdev);
4468 * VMID 0 is the physical GPU addresses as used by the kernel.
4469 * VMIDs 1-15 are used for userspace clients and are handled
4470 * by the radeon vm/hsa code.
4473 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4475 * @rdev: radeon_device pointer
4477 * Flush the TLB for the VMID 0 page table (CIK).
4479 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4481 /* flush hdp cache */
4482 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4484 /* bits 0-15 are the VM contexts0-15 */
4485 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4489 * cik_pcie_gart_enable - gart enable
4491 * @rdev: radeon_device pointer
4493 * This sets up the TLBs, programs the page tables for VMID0,
4494 * sets up the hw for VMIDs 1-15 which are allocated on
4495 * demand, and sets up the global locations for the LDS, GDS,
4496 * and GPUVM for FSA64 clients (CIK).
4497 * Returns 0 for success, errors for failure.
4499 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4503 if (rdev->gart.robj == NULL) {
4504 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4507 r = radeon_gart_table_vram_pin(rdev);
4510 radeon_gart_restore(rdev);
4511 /* Setup TLB control */
4512 WREG32(MC_VM_MX_L1_TLB_CNTL,
4515 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4516 ENABLE_ADVANCED_DRIVER_MODEL |
4517 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4518 /* Setup L2 cache */
4519 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4520 ENABLE_L2_FRAGMENT_PROCESSING |
4521 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4522 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4523 EFFECTIVE_L2_QUEUE_SIZE(7) |
4524 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4525 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4526 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4527 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4528 /* setup context0 */
4529 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4530 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4531 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4532 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4533 (u32)(rdev->dummy_page.addr >> 12));
4534 WREG32(VM_CONTEXT0_CNTL2, 0);
4535 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4536 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4542 /* empty context1-15 */
4543 /* FIXME start with 4G, once using 2 level pt switch to full
4546 /* set vm size, must be a multiple of 4 */
4547 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4548 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4549 for (i = 1; i < 16; i++) {
4551 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4552 rdev->gart.table_addr >> 12);
4554 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4555 rdev->gart.table_addr >> 12);
4558 /* enable context1-15 */
4559 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4560 (u32)(rdev->dummy_page.addr >> 12));
4561 WREG32(VM_CONTEXT1_CNTL2, 4);
4562 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4563 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4564 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4565 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4566 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4567 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4568 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4569 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4570 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4571 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4572 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4573 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4576 /* TC cache setup ??? */
4577 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4578 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4579 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4581 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4582 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4583 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4584 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4585 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4587 WREG32(TC_CFG_L1_VOLATILE, 0);
4588 WREG32(TC_CFG_L2_VOLATILE, 0);
4590 if (rdev->family == CHIP_KAVERI) {
4591 u32 tmp = RREG32(CHUB_CONTROL);
4593 WREG32(CHUB_CONTROL, tmp);
4596 /* XXX SH_MEM regs */
4597 /* where to put LDS, scratch, GPUVM in FSA64 space */
4598 mutex_lock(&rdev->srbm_mutex);
4599 for (i = 0; i < 16; i++) {
4600 cik_srbm_select(rdev, 0, 0, 0, i);
4601 /* CP and shaders */
4602 WREG32(SH_MEM_CONFIG, 0);
4603 WREG32(SH_MEM_APE1_BASE, 1);
4604 WREG32(SH_MEM_APE1_LIMIT, 0);
4605 WREG32(SH_MEM_BASES, 0);
4607 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4608 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4609 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4610 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4611 /* XXX SDMA RLC - todo */
4613 cik_srbm_select(rdev, 0, 0, 0, 0);
4614 mutex_unlock(&rdev->srbm_mutex);
4616 cik_pcie_gart_tlb_flush(rdev);
4617 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4618 (unsigned)(rdev->mc.gtt_size >> 20),
4619 (unsigned long long)rdev->gart.table_addr);
4620 rdev->gart.ready = true;
4625 * cik_pcie_gart_disable - gart disable
4627 * @rdev: radeon_device pointer
4629 * This disables all VM page table (CIK).
4631 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4633 /* Disable all tables */
4634 WREG32(VM_CONTEXT0_CNTL, 0);
4635 WREG32(VM_CONTEXT1_CNTL, 0);
4636 /* Setup TLB control */
4637 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4638 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4639 /* Setup L2 cache */
4641 ENABLE_L2_FRAGMENT_PROCESSING |
4642 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4643 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4644 EFFECTIVE_L2_QUEUE_SIZE(7) |
4645 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4646 WREG32(VM_L2_CNTL2, 0);
4647 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4648 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4649 radeon_gart_table_vram_unpin(rdev);
4653 * cik_pcie_gart_fini - vm fini callback
4655 * @rdev: radeon_device pointer
4657 * Tears down the driver GART/VM setup (CIK).
4659 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4661 cik_pcie_gart_disable(rdev);
4662 radeon_gart_table_vram_free(rdev);
4663 radeon_gart_fini(rdev);
4668 * cik_ib_parse - vm ib_parse callback
4670 * @rdev: radeon_device pointer
4671 * @ib: indirect buffer pointer
4673 * CIK uses hw IB checking so this is a nop (CIK).
4675 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4682 * VMID 0 is the physical GPU addresses as used by the kernel.
4683 * VMIDs 1-15 are used for userspace clients and are handled
4684 * by the radeon vm/hsa code.
4687 * cik_vm_init - cik vm init callback
4689 * @rdev: radeon_device pointer
4691 * Inits cik specific vm parameters (number of VMs, base of vram for
4692 * VMIDs 1-15) (CIK).
4693 * Returns 0 for success.
4695 int cik_vm_init(struct radeon_device *rdev)
4698 rdev->vm_manager.nvm = 16;
4699 /* base offset of vram pages */
4700 if (rdev->flags & RADEON_IS_IGP) {
4701 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4703 rdev->vm_manager.vram_base_offset = tmp;
4705 rdev->vm_manager.vram_base_offset = 0;
4711 * cik_vm_fini - cik vm fini callback
4713 * @rdev: radeon_device pointer
4715 * Tear down any asic specific VM setup (CIK).
4717 void cik_vm_fini(struct radeon_device *rdev)
4722 * cik_vm_decode_fault - print human readable fault info
4724 * @rdev: radeon_device pointer
4725 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4726 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4728 * Print human readable fault information (CIK).
4730 static void cik_vm_decode_fault(struct radeon_device *rdev,
4731 u32 status, u32 addr, u32 mc_client)
4733 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4734 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4735 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4736 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4737 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4739 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4740 protections, vmid, addr,
4741 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4742 block, mc_client, mc_id);
4746 * cik_vm_flush - cik vm flush using the CP
4748 * @rdev: radeon_device pointer
4750 * Update the page table base and flush the VM TLB
4751 * using the CP (CIK).
4753 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4755 struct radeon_ring *ring = &rdev->ring[ridx];
4760 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4761 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4762 WRITE_DATA_DST_SEL(0)));
4764 radeon_ring_write(ring,
4765 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4767 radeon_ring_write(ring,
4768 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4770 radeon_ring_write(ring, 0);
4771 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4773 /* update SH_MEM_* regs */
4774 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4775 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4776 WRITE_DATA_DST_SEL(0)));
4777 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4778 radeon_ring_write(ring, 0);
4779 radeon_ring_write(ring, VMID(vm->id));
4781 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4782 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4783 WRITE_DATA_DST_SEL(0)));
4784 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4785 radeon_ring_write(ring, 0);
4787 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4788 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4789 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4790 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4792 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4793 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4794 WRITE_DATA_DST_SEL(0)));
4795 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4796 radeon_ring_write(ring, 0);
4797 radeon_ring_write(ring, VMID(0));
4800 /* We should be using the WAIT_REG_MEM packet here like in
4801 * cik_fence_ring_emit(), but it causes the CP to hang in this
4804 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4805 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4806 WRITE_DATA_DST_SEL(0)));
4807 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4808 radeon_ring_write(ring, 0);
4809 radeon_ring_write(ring, 0);
4811 /* bits 0-15 are the VM contexts0-15 */
4812 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4813 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4814 WRITE_DATA_DST_SEL(0)));
4815 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4816 radeon_ring_write(ring, 0);
4817 radeon_ring_write(ring, 1 << vm->id);
4819 /* compute doesn't have PFP */
4820 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4821 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4822 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4823 radeon_ring_write(ring, 0x0);
4828 * cik_vm_set_page - update the page tables using sDMA
4830 * @rdev: radeon_device pointer
4831 * @ib: indirect buffer to fill with commands
4832 * @pe: addr of the page entry
4833 * @addr: dst addr to write into pe
4834 * @count: number of page entries to update
4835 * @incr: increase next addr by incr bytes
4836 * @flags: access flags
4838 * Update the page tables using CP or sDMA (CIK).
4840 void cik_vm_set_page(struct radeon_device *rdev,
4841 struct radeon_ib *ib,
4843 uint64_t addr, unsigned count,
4844 uint32_t incr, uint32_t flags)
4846 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4850 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4853 ndw = 2 + count * 2;
4857 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4858 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4859 WRITE_DATA_DST_SEL(1));
4860 ib->ptr[ib->length_dw++] = pe;
4861 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4862 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4863 if (flags & RADEON_VM_PAGE_SYSTEM) {
4864 value = radeon_vm_map_gart(rdev, addr);
4865 value &= 0xFFFFFFFFFFFFF000ULL;
4866 } else if (flags & RADEON_VM_PAGE_VALID) {
4872 value |= r600_flags;
4873 ib->ptr[ib->length_dw++] = value;
4874 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4879 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4885 * The RLC is a multi-purpose microengine that handles a
4886 * variety of functions, the most important of which is
4887 * the interrupt controller.
4889 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4892 u32 tmp = RREG32(CP_INT_CNTL_RING0);
4895 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4897 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4898 WREG32(CP_INT_CNTL_RING0, tmp);
4901 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4905 tmp = RREG32(RLC_LB_CNTL);
4907 tmp |= LOAD_BALANCE_ENABLE;
4909 tmp &= ~LOAD_BALANCE_ENABLE;
4910 WREG32(RLC_LB_CNTL, tmp);
4913 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4918 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4919 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4920 cik_select_se_sh(rdev, i, j);
4921 for (k = 0; k < rdev->usec_timeout; k++) {
4922 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4928 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4930 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4931 for (k = 0; k < rdev->usec_timeout; k++) {
4932 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4938 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4942 tmp = RREG32(RLC_CNTL);
4944 WREG32(RLC_CNTL, rlc);
4947 static u32 cik_halt_rlc(struct radeon_device *rdev)
4951 orig = data = RREG32(RLC_CNTL);
4953 if (data & RLC_ENABLE) {
4956 data &= ~RLC_ENABLE;
4957 WREG32(RLC_CNTL, data);
4959 for (i = 0; i < rdev->usec_timeout; i++) {
4960 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4965 cik_wait_for_rlc_serdes(rdev);
4971 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4975 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4976 WREG32(RLC_GPR_REG2, tmp);
4978 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4979 for (i = 0; i < rdev->usec_timeout; i++) {
4980 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4985 for (i = 0; i < rdev->usec_timeout; i++) {
4986 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4992 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4996 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4997 WREG32(RLC_GPR_REG2, tmp);
5001 * cik_rlc_stop - stop the RLC ME
5003 * @rdev: radeon_device pointer
5005 * Halt the RLC ME (MicroEngine) (CIK).
5007 static void cik_rlc_stop(struct radeon_device *rdev)
5009 WREG32(RLC_CNTL, 0);
5011 cik_enable_gui_idle_interrupt(rdev, false);
5013 cik_wait_for_rlc_serdes(rdev);
5017 * cik_rlc_start - start the RLC ME
5019 * @rdev: radeon_device pointer
5021 * Unhalt the RLC ME (MicroEngine) (CIK).
5023 static void cik_rlc_start(struct radeon_device *rdev)
5025 WREG32(RLC_CNTL, RLC_ENABLE);
5027 cik_enable_gui_idle_interrupt(rdev, true);
5033 * cik_rlc_resume - setup the RLC hw
5035 * @rdev: radeon_device pointer
5037 * Initialize the RLC registers, load the ucode,
5038 * and start the RLC (CIK).
5039 * Returns 0 for success, -EINVAL if the ucode is not available.
5041 static int cik_rlc_resume(struct radeon_device *rdev)
5044 const __be32 *fw_data;
5049 switch (rdev->family) {
5052 size = BONAIRE_RLC_UCODE_SIZE;
5055 size = KV_RLC_UCODE_SIZE;
5058 size = KB_RLC_UCODE_SIZE;
5065 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5066 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5074 WREG32(RLC_LB_CNTR_INIT, 0);
5075 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5077 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5078 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5079 WREG32(RLC_LB_PARAMS, 0x00600408);
5080 WREG32(RLC_LB_CNTL, 0x80000004);
5082 WREG32(RLC_MC_CNTL, 0);
5083 WREG32(RLC_UCODE_CNTL, 0);
5085 fw_data = (const __be32 *)rdev->rlc_fw->data;
5086 WREG32(RLC_GPM_UCODE_ADDR, 0);
5087 for (i = 0; i < size; i++)
5088 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5089 WREG32(RLC_GPM_UCODE_ADDR, 0);
5091 /* XXX - find out what chips support lbpw */
5092 cik_enable_lbpw(rdev, false);
5094 if (rdev->family == CHIP_BONAIRE)
5095 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5097 cik_rlc_start(rdev);
5102 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5104 u32 data, orig, tmp, tmp2;
5106 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5108 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5109 cik_enable_gui_idle_interrupt(rdev, true);
5111 tmp = cik_halt_rlc(rdev);
5113 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5114 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5115 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5116 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5117 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5119 cik_update_rlc(rdev, tmp);
5121 data |= CGCG_EN | CGLS_EN;
5123 cik_enable_gui_idle_interrupt(rdev, false);
5125 RREG32(CB_CGTT_SCLK_CTRL);
5126 RREG32(CB_CGTT_SCLK_CTRL);
5127 RREG32(CB_CGTT_SCLK_CTRL);
5128 RREG32(CB_CGTT_SCLK_CTRL);
5130 data &= ~(CGCG_EN | CGLS_EN);
5134 WREG32(RLC_CGCG_CGLS_CTRL, data);
5138 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5140 u32 data, orig, tmp = 0;
5142 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5143 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5144 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5145 orig = data = RREG32(CP_MEM_SLP_CNTL);
5146 data |= CP_MEM_LS_EN;
5148 WREG32(CP_MEM_SLP_CNTL, data);
5152 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5155 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5157 tmp = cik_halt_rlc(rdev);
5159 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5160 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5161 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5162 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5163 WREG32(RLC_SERDES_WR_CTRL, data);
5165 cik_update_rlc(rdev, tmp);
5167 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5168 orig = data = RREG32(CGTS_SM_CTRL_REG);
5169 data &= ~SM_MODE_MASK;
5170 data |= SM_MODE(0x2);
5171 data |= SM_MODE_ENABLE;
5172 data &= ~CGTS_OVERRIDE;
5173 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5174 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5175 data &= ~CGTS_LS_OVERRIDE;
5176 data &= ~ON_MONITOR_ADD_MASK;
5177 data |= ON_MONITOR_ADD_EN;
5178 data |= ON_MONITOR_ADD(0x96);
5180 WREG32(CGTS_SM_CTRL_REG, data);
5183 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5186 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5188 data = RREG32(RLC_MEM_SLP_CNTL);
5189 if (data & RLC_MEM_LS_EN) {
5190 data &= ~RLC_MEM_LS_EN;
5191 WREG32(RLC_MEM_SLP_CNTL, data);
5194 data = RREG32(CP_MEM_SLP_CNTL);
5195 if (data & CP_MEM_LS_EN) {
5196 data &= ~CP_MEM_LS_EN;
5197 WREG32(CP_MEM_SLP_CNTL, data);
5200 orig = data = RREG32(CGTS_SM_CTRL_REG);
5201 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5203 WREG32(CGTS_SM_CTRL_REG, data);
5205 tmp = cik_halt_rlc(rdev);
5207 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5208 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5209 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5210 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5211 WREG32(RLC_SERDES_WR_CTRL, data);
5213 cik_update_rlc(rdev, tmp);
5217 static const u32 mc_cg_registers[] =
5230 static void cik_enable_mc_ls(struct radeon_device *rdev,
5236 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5237 orig = data = RREG32(mc_cg_registers[i]);
5238 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5239 data |= MC_LS_ENABLE;
5241 data &= ~MC_LS_ENABLE;
5243 WREG32(mc_cg_registers[i], data);
5247 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5253 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5254 orig = data = RREG32(mc_cg_registers[i]);
5255 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5256 data |= MC_CG_ENABLE;
5258 data &= ~MC_CG_ENABLE;
5260 WREG32(mc_cg_registers[i], data);
5264 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5269 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5270 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5271 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5273 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5276 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5278 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5281 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5285 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5290 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5291 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5294 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5296 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5299 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5301 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5304 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5306 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5309 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5313 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5318 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5319 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5321 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5323 orig = data = RREG32(UVD_CGC_CTRL);
5326 WREG32(UVD_CGC_CTRL, data);
5328 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5330 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5332 orig = data = RREG32(UVD_CGC_CTRL);
5335 WREG32(UVD_CGC_CTRL, data);
5339 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5344 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5346 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5347 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5348 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5350 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5351 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5354 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5357 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5362 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5364 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5365 data &= ~CLOCK_GATING_DIS;
5367 data |= CLOCK_GATING_DIS;
5370 WREG32(HDP_HOST_PATH_CNTL, data);
5373 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5378 orig = data = RREG32(HDP_MEM_POWER_LS);
5380 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5381 data |= HDP_LS_ENABLE;
5383 data &= ~HDP_LS_ENABLE;
5386 WREG32(HDP_MEM_POWER_LS, data);
5389 void cik_update_cg(struct radeon_device *rdev,
5390 u32 block, bool enable)
5393 if (block & RADEON_CG_BLOCK_GFX) {
5394 cik_enable_gui_idle_interrupt(rdev, false);
5395 /* order matters! */
5397 cik_enable_mgcg(rdev, true);
5398 cik_enable_cgcg(rdev, true);
5400 cik_enable_cgcg(rdev, false);
5401 cik_enable_mgcg(rdev, false);
5403 cik_enable_gui_idle_interrupt(rdev, true);
5406 if (block & RADEON_CG_BLOCK_MC) {
5407 if (!(rdev->flags & RADEON_IS_IGP)) {
5408 cik_enable_mc_mgcg(rdev, enable);
5409 cik_enable_mc_ls(rdev, enable);
5413 if (block & RADEON_CG_BLOCK_SDMA) {
5414 cik_enable_sdma_mgcg(rdev, enable);
5415 cik_enable_sdma_mgls(rdev, enable);
5418 if (block & RADEON_CG_BLOCK_BIF) {
5419 cik_enable_bif_mgls(rdev, enable);
5422 if (block & RADEON_CG_BLOCK_UVD) {
5424 cik_enable_uvd_mgcg(rdev, enable);
5427 if (block & RADEON_CG_BLOCK_HDP) {
5428 cik_enable_hdp_mgcg(rdev, enable);
5429 cik_enable_hdp_ls(rdev, enable);
5433 static void cik_init_cg(struct radeon_device *rdev)
5436 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5439 si_init_uvd_internal_cg(rdev);
5441 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5442 RADEON_CG_BLOCK_SDMA |
5443 RADEON_CG_BLOCK_BIF |
5444 RADEON_CG_BLOCK_UVD |
5445 RADEON_CG_BLOCK_HDP), true);
5448 static void cik_fini_cg(struct radeon_device *rdev)
5450 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5451 RADEON_CG_BLOCK_SDMA |
5452 RADEON_CG_BLOCK_BIF |
5453 RADEON_CG_BLOCK_UVD |
5454 RADEON_CG_BLOCK_HDP), false);
5456 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5459 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5464 orig = data = RREG32(RLC_PG_CNTL);
5465 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5466 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5468 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5470 WREG32(RLC_PG_CNTL, data);
5473 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5478 orig = data = RREG32(RLC_PG_CNTL);
5479 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5480 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5482 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5484 WREG32(RLC_PG_CNTL, data);
5487 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5491 orig = data = RREG32(RLC_PG_CNTL);
5492 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5493 data &= ~DISABLE_CP_PG;
5495 data |= DISABLE_CP_PG;
5497 WREG32(RLC_PG_CNTL, data);
5500 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5504 orig = data = RREG32(RLC_PG_CNTL);
5505 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5506 data &= ~DISABLE_GDS_PG;
5508 data |= DISABLE_GDS_PG;
5510 WREG32(RLC_PG_CNTL, data);
5513 #define CP_ME_TABLE_SIZE 96
5514 #define CP_ME_TABLE_OFFSET 2048
5515 #define CP_MEC_TABLE_OFFSET 4096
5517 void cik_init_cp_pg_table(struct radeon_device *rdev)
5519 const __be32 *fw_data;
5520 volatile u32 *dst_ptr;
5521 int me, i, max_me = 4;
5525 if (rdev->family == CHIP_KAVERI)
5528 if (rdev->rlc.cp_table_ptr == NULL)
5531 /* write the cp table buffer */
5532 dst_ptr = rdev->rlc.cp_table_ptr;
5533 for (me = 0; me < max_me; me++) {
5535 fw_data = (const __be32 *)rdev->ce_fw->data;
5536 table_offset = CP_ME_TABLE_OFFSET;
5537 } else if (me == 1) {
5538 fw_data = (const __be32 *)rdev->pfp_fw->data;
5539 table_offset = CP_ME_TABLE_OFFSET;
5540 } else if (me == 2) {
5541 fw_data = (const __be32 *)rdev->me_fw->data;
5542 table_offset = CP_ME_TABLE_OFFSET;
5544 fw_data = (const __be32 *)rdev->mec_fw->data;
5545 table_offset = CP_MEC_TABLE_OFFSET;
5548 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5549 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5551 bo_offset += CP_ME_TABLE_SIZE;
5555 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5560 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5561 orig = data = RREG32(RLC_PG_CNTL);
5562 data |= GFX_PG_ENABLE;
5564 WREG32(RLC_PG_CNTL, data);
5566 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5569 WREG32(RLC_AUTO_PG_CTRL, data);
5571 orig = data = RREG32(RLC_PG_CNTL);
5572 data &= ~GFX_PG_ENABLE;
5574 WREG32(RLC_PG_CNTL, data);
5576 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5577 data &= ~AUTO_PG_EN;
5579 WREG32(RLC_AUTO_PG_CTRL, data);
5581 data = RREG32(DB_RENDER_CONTROL);
5585 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5587 u32 mask = 0, tmp, tmp1;
5590 cik_select_se_sh(rdev, se, sh);
5591 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5592 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5593 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5600 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5605 return (~tmp) & mask;
5608 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5610 u32 i, j, k, active_cu_number = 0;
5611 u32 mask, counter, cu_bitmap;
5614 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5615 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5619 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5620 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5628 active_cu_number += counter;
5629 tmp |= (cu_bitmap << (i * 16 + j * 8));
5633 WREG32(RLC_PG_AO_CU_MASK, tmp);
5635 tmp = RREG32(RLC_MAX_PG_CU);
5636 tmp &= ~MAX_PU_CU_MASK;
5637 tmp |= MAX_PU_CU(active_cu_number);
5638 WREG32(RLC_MAX_PG_CU, tmp);
5641 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5646 orig = data = RREG32(RLC_PG_CNTL);
5647 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5648 data |= STATIC_PER_CU_PG_ENABLE;
5650 data &= ~STATIC_PER_CU_PG_ENABLE;
5652 WREG32(RLC_PG_CNTL, data);
5655 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5660 orig = data = RREG32(RLC_PG_CNTL);
5661 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5662 data |= DYN_PER_CU_PG_ENABLE;
5664 data &= ~DYN_PER_CU_PG_ENABLE;
5666 WREG32(RLC_PG_CNTL, data);
5669 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5670 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5672 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5677 if (rdev->rlc.cs_data) {
5678 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5679 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5680 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5681 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5683 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5684 for (i = 0; i < 3; i++)
5685 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5687 if (rdev->rlc.reg_list) {
5688 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5689 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5690 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5693 orig = data = RREG32(RLC_PG_CNTL);
5696 WREG32(RLC_PG_CNTL, data);
5698 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5699 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5701 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5702 data &= ~IDLE_POLL_COUNT_MASK;
5703 data |= IDLE_POLL_COUNT(0x60);
5704 WREG32(CP_RB_WPTR_POLL_CNTL, data);
5707 WREG32(RLC_PG_DELAY, data);
5709 data = RREG32(RLC_PG_DELAY_2);
5712 WREG32(RLC_PG_DELAY_2, data);
5714 data = RREG32(RLC_AUTO_PG_CTRL);
5715 data &= ~GRBM_REG_SGIT_MASK;
5716 data |= GRBM_REG_SGIT(0x700);
5717 WREG32(RLC_AUTO_PG_CTRL, data);
5721 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5723 cik_enable_gfx_cgpg(rdev, enable);
5724 cik_enable_gfx_static_mgpg(rdev, enable);
5725 cik_enable_gfx_dynamic_mgpg(rdev, enable);
5728 u32 cik_get_csb_size(struct radeon_device *rdev)
5731 const struct cs_section_def *sect = NULL;
5732 const struct cs_extent_def *ext = NULL;
5734 if (rdev->rlc.cs_data == NULL)
5737 /* begin clear state */
5739 /* context control state */
5742 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5743 for (ext = sect->section; ext->extent != NULL; ++ext) {
5744 if (sect->id == SECT_CONTEXT)
5745 count += 2 + ext->reg_count;
5750 /* pa_sc_raster_config/pa_sc_raster_config1 */
5752 /* end clear state */
5760 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5763 const struct cs_section_def *sect = NULL;
5764 const struct cs_extent_def *ext = NULL;
5766 if (rdev->rlc.cs_data == NULL)
5771 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5772 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5774 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5775 buffer[count++] = 0x80000000;
5776 buffer[count++] = 0x80000000;
5778 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5779 for (ext = sect->section; ext->extent != NULL; ++ext) {
5780 if (sect->id == SECT_CONTEXT) {
5781 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5782 buffer[count++] = ext->reg_index - 0xa000;
5783 for (i = 0; i < ext->reg_count; i++)
5784 buffer[count++] = ext->extent[i];
5791 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5792 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5793 switch (rdev->family) {
5795 buffer[count++] = 0x16000012;
5796 buffer[count++] = 0x00000000;
5799 buffer[count++] = 0x00000000; /* XXX */
5800 buffer[count++] = 0x00000000;
5803 buffer[count++] = 0x00000000; /* XXX */
5804 buffer[count++] = 0x00000000;
5807 buffer[count++] = 0x00000000;
5808 buffer[count++] = 0x00000000;
5812 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5813 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5815 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5816 buffer[count++] = 0;
5819 static void cik_init_pg(struct radeon_device *rdev)
5821 if (rdev->pg_flags) {
5822 cik_enable_sck_slowdown_on_pu(rdev, true);
5823 cik_enable_sck_slowdown_on_pd(rdev, true);
5824 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5825 cik_init_gfx_cgpg(rdev);
5826 cik_enable_cp_pg(rdev, true);
5827 cik_enable_gds_pg(rdev, true);
5829 cik_init_ao_cu_mask(rdev);
5830 cik_update_gfx_pg(rdev, true);
5834 static void cik_fini_pg(struct radeon_device *rdev)
5836 if (rdev->pg_flags) {
5837 cik_update_gfx_pg(rdev, false);
5838 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5839 cik_enable_cp_pg(rdev, false);
5840 cik_enable_gds_pg(rdev, false);
5847 * Starting with r6xx, interrupts are handled via a ring buffer.
5848 * Ring buffers are areas of GPU accessible memory that the GPU
5849 * writes interrupt vectors into and the host reads vectors out of.
5850 * There is a rptr (read pointer) that determines where the
5851 * host is currently reading, and a wptr (write pointer)
5852 * which determines where the GPU has written. When the
5853 * pointers are equal, the ring is idle. When the GPU
5854 * writes vectors to the ring buffer, it increments the
5855 * wptr. When there is an interrupt, the host then starts
5856 * fetching commands and processing them until the pointers are
5857 * equal again at which point it updates the rptr.
5861 * cik_enable_interrupts - Enable the interrupt ring buffer
5863 * @rdev: radeon_device pointer
5865 * Enable the interrupt ring buffer (CIK).
5867 static void cik_enable_interrupts(struct radeon_device *rdev)
5869 u32 ih_cntl = RREG32(IH_CNTL);
5870 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5872 ih_cntl |= ENABLE_INTR;
5873 ih_rb_cntl |= IH_RB_ENABLE;
5874 WREG32(IH_CNTL, ih_cntl);
5875 WREG32(IH_RB_CNTL, ih_rb_cntl);
5876 rdev->ih.enabled = true;
5880 * cik_disable_interrupts - Disable the interrupt ring buffer
5882 * @rdev: radeon_device pointer
5884 * Disable the interrupt ring buffer (CIK).
5886 static void cik_disable_interrupts(struct radeon_device *rdev)
5888 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5889 u32 ih_cntl = RREG32(IH_CNTL);
5891 ih_rb_cntl &= ~IH_RB_ENABLE;
5892 ih_cntl &= ~ENABLE_INTR;
5893 WREG32(IH_RB_CNTL, ih_rb_cntl);
5894 WREG32(IH_CNTL, ih_cntl);
5895 /* set rptr, wptr to 0 */
5896 WREG32(IH_RB_RPTR, 0);
5897 WREG32(IH_RB_WPTR, 0);
5898 rdev->ih.enabled = false;
5903 * cik_disable_interrupt_state - Disable all interrupt sources
5905 * @rdev: radeon_device pointer
5907 * Clear all interrupt enable bits used by the driver (CIK).
5909 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5914 tmp = RREG32(CP_INT_CNTL_RING0) &
5915 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5916 WREG32(CP_INT_CNTL_RING0, tmp);
5918 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5919 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5920 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5921 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5922 /* compute queues */
5923 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5924 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5925 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5926 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5927 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5928 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5929 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5930 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5932 WREG32(GRBM_INT_CNTL, 0);
5933 /* vline/vblank, etc. */
5934 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5935 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5936 if (rdev->num_crtc >= 4) {
5937 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5938 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5940 if (rdev->num_crtc >= 6) {
5941 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5942 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5946 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5948 /* digital hotplug */
5949 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5950 WREG32(DC_HPD1_INT_CONTROL, tmp);
5951 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5952 WREG32(DC_HPD2_INT_CONTROL, tmp);
5953 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5954 WREG32(DC_HPD3_INT_CONTROL, tmp);
5955 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5956 WREG32(DC_HPD4_INT_CONTROL, tmp);
5957 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5958 WREG32(DC_HPD5_INT_CONTROL, tmp);
5959 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960 WREG32(DC_HPD6_INT_CONTROL, tmp);
5965 * cik_irq_init - init and enable the interrupt ring
5967 * @rdev: radeon_device pointer
5969 * Allocate a ring buffer for the interrupt controller,
5970 * enable the RLC, disable interrupts, enable the IH
5971 * ring buffer and enable it (CIK).
5972 * Called at device load and reume.
5973 * Returns 0 for success, errors for failure.
5975 static int cik_irq_init(struct radeon_device *rdev)
5979 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5982 ret = r600_ih_ring_alloc(rdev);
5987 cik_disable_interrupts(rdev);
5990 ret = cik_rlc_resume(rdev);
5992 r600_ih_ring_fini(rdev);
5996 /* setup interrupt control */
5997 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5998 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5999 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6000 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6001 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6003 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6004 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6005 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6006 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6008 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6009 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6011 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6012 IH_WPTR_OVERFLOW_CLEAR |
6015 if (rdev->wb.enabled)
6016 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6018 /* set the writeback address whether it's enabled or not */
6019 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6020 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6022 WREG32(IH_RB_CNTL, ih_rb_cntl);
6024 /* set rptr, wptr to 0 */
6025 WREG32(IH_RB_RPTR, 0);
6026 WREG32(IH_RB_WPTR, 0);
6028 /* Default settings for IH_CNTL (disabled at first) */
6029 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6030 /* RPTR_REARM only works if msi's are enabled */
6031 if (rdev->msi_enabled)
6032 ih_cntl |= RPTR_REARM;
6033 WREG32(IH_CNTL, ih_cntl);
6035 /* force the active interrupt state to all disabled */
6036 cik_disable_interrupt_state(rdev);
6038 pci_set_master(rdev->pdev);
6041 cik_enable_interrupts(rdev);
6047 * cik_irq_set - enable/disable interrupt sources
6049 * @rdev: radeon_device pointer
6051 * Enable interrupt sources on the GPU (vblanks, hpd,
6053 * Returns 0 for success, errors for failure.
6055 int cik_irq_set(struct radeon_device *rdev)
6058 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6059 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6060 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6061 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6062 u32 grbm_int_cntl = 0;
6063 u32 dma_cntl, dma_cntl1;
6066 if (!rdev->irq.installed) {
6067 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6070 /* don't enable anything if the ih is disabled */
6071 if (!rdev->ih.enabled) {
6072 cik_disable_interrupts(rdev);
6073 /* force the active interrupt state to all disabled */
6074 cik_disable_interrupt_state(rdev);
6078 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6079 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6080 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6082 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6083 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6084 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6085 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6086 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6087 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6089 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6090 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6092 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6093 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6094 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6095 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6096 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6097 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6098 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6099 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6101 if (rdev->flags & RADEON_IS_IGP)
6102 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6103 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6105 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6106 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6108 /* enable CP interrupts on all rings */
6109 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6110 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6111 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6113 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6114 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6115 DRM_DEBUG("si_irq_set: sw int cp1\n");
6116 if (ring->me == 1) {
6117 switch (ring->pipe) {
6119 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6122 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6125 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6128 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6131 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6134 } else if (ring->me == 2) {
6135 switch (ring->pipe) {
6137 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6140 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6143 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6146 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6149 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6153 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6156 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6157 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6158 DRM_DEBUG("si_irq_set: sw int cp2\n");
6159 if (ring->me == 1) {
6160 switch (ring->pipe) {
6162 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6165 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6168 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6171 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6174 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6177 } else if (ring->me == 2) {
6178 switch (ring->pipe) {
6180 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6183 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6186 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6189 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6192 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6196 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6200 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6201 DRM_DEBUG("cik_irq_set: sw int dma\n");
6202 dma_cntl |= TRAP_ENABLE;
6205 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6206 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6207 dma_cntl1 |= TRAP_ENABLE;
6210 if (rdev->irq.crtc_vblank_int[0] ||
6211 atomic_read(&rdev->irq.pflip[0])) {
6212 DRM_DEBUG("cik_irq_set: vblank 0\n");
6213 crtc1 |= VBLANK_INTERRUPT_MASK;
6215 if (rdev->irq.crtc_vblank_int[1] ||
6216 atomic_read(&rdev->irq.pflip[1])) {
6217 DRM_DEBUG("cik_irq_set: vblank 1\n");
6218 crtc2 |= VBLANK_INTERRUPT_MASK;
6220 if (rdev->irq.crtc_vblank_int[2] ||
6221 atomic_read(&rdev->irq.pflip[2])) {
6222 DRM_DEBUG("cik_irq_set: vblank 2\n");
6223 crtc3 |= VBLANK_INTERRUPT_MASK;
6225 if (rdev->irq.crtc_vblank_int[3] ||
6226 atomic_read(&rdev->irq.pflip[3])) {
6227 DRM_DEBUG("cik_irq_set: vblank 3\n");
6228 crtc4 |= VBLANK_INTERRUPT_MASK;
6230 if (rdev->irq.crtc_vblank_int[4] ||
6231 atomic_read(&rdev->irq.pflip[4])) {
6232 DRM_DEBUG("cik_irq_set: vblank 4\n");
6233 crtc5 |= VBLANK_INTERRUPT_MASK;
6235 if (rdev->irq.crtc_vblank_int[5] ||
6236 atomic_read(&rdev->irq.pflip[5])) {
6237 DRM_DEBUG("cik_irq_set: vblank 5\n");
6238 crtc6 |= VBLANK_INTERRUPT_MASK;
6240 if (rdev->irq.hpd[0]) {
6241 DRM_DEBUG("cik_irq_set: hpd 1\n");
6242 hpd1 |= DC_HPDx_INT_EN;
6244 if (rdev->irq.hpd[1]) {
6245 DRM_DEBUG("cik_irq_set: hpd 2\n");
6246 hpd2 |= DC_HPDx_INT_EN;
6248 if (rdev->irq.hpd[2]) {
6249 DRM_DEBUG("cik_irq_set: hpd 3\n");
6250 hpd3 |= DC_HPDx_INT_EN;
6252 if (rdev->irq.hpd[3]) {
6253 DRM_DEBUG("cik_irq_set: hpd 4\n");
6254 hpd4 |= DC_HPDx_INT_EN;
6256 if (rdev->irq.hpd[4]) {
6257 DRM_DEBUG("cik_irq_set: hpd 5\n");
6258 hpd5 |= DC_HPDx_INT_EN;
6260 if (rdev->irq.hpd[5]) {
6261 DRM_DEBUG("cik_irq_set: hpd 6\n");
6262 hpd6 |= DC_HPDx_INT_EN;
6265 if (rdev->irq.dpm_thermal) {
6266 DRM_DEBUG("dpm thermal\n");
6267 if (rdev->flags & RADEON_IS_IGP)
6268 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6270 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6273 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6275 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6276 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6278 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6279 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6280 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6281 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6282 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6283 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6284 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6285 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6287 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6289 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6290 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6291 if (rdev->num_crtc >= 4) {
6292 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6293 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6295 if (rdev->num_crtc >= 6) {
6296 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6297 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6300 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6301 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6302 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6303 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6304 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6305 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6307 if (rdev->flags & RADEON_IS_IGP)
6308 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6310 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6316 * cik_irq_ack - ack interrupt sources
6318 * @rdev: radeon_device pointer
6320 * Ack interrupt sources on the GPU (vblanks, hpd,
6321 * etc.) (CIK). Certain interrupts sources are sw
6322 * generated and do not require an explicit ack.
6324 static inline void cik_irq_ack(struct radeon_device *rdev)
6328 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6329 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6330 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6331 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6332 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6333 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6334 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6336 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6337 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6338 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6339 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6340 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6341 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6342 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6343 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6345 if (rdev->num_crtc >= 4) {
6346 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6348 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6350 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6351 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6352 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6353 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6356 if (rdev->num_crtc >= 6) {
6357 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6358 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6359 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6360 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6361 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6362 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6363 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6364 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6367 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6368 tmp = RREG32(DC_HPD1_INT_CONTROL);
6369 tmp |= DC_HPDx_INT_ACK;
6370 WREG32(DC_HPD1_INT_CONTROL, tmp);
6372 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6373 tmp = RREG32(DC_HPD2_INT_CONTROL);
6374 tmp |= DC_HPDx_INT_ACK;
6375 WREG32(DC_HPD2_INT_CONTROL, tmp);
6377 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6378 tmp = RREG32(DC_HPD3_INT_CONTROL);
6379 tmp |= DC_HPDx_INT_ACK;
6380 WREG32(DC_HPD3_INT_CONTROL, tmp);
6382 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6383 tmp = RREG32(DC_HPD4_INT_CONTROL);
6384 tmp |= DC_HPDx_INT_ACK;
6385 WREG32(DC_HPD4_INT_CONTROL, tmp);
6387 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6388 tmp = RREG32(DC_HPD5_INT_CONTROL);
6389 tmp |= DC_HPDx_INT_ACK;
6390 WREG32(DC_HPD5_INT_CONTROL, tmp);
6392 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6393 tmp = RREG32(DC_HPD5_INT_CONTROL);
6394 tmp |= DC_HPDx_INT_ACK;
6395 WREG32(DC_HPD6_INT_CONTROL, tmp);
6400 * cik_irq_disable - disable interrupts
6402 * @rdev: radeon_device pointer
6404 * Disable interrupts on the hw (CIK).
6406 static void cik_irq_disable(struct radeon_device *rdev)
6408 cik_disable_interrupts(rdev);
6409 /* Wait and acknowledge irq */
6412 cik_disable_interrupt_state(rdev);
6416 * cik_irq_disable - disable interrupts for suspend
6418 * @rdev: radeon_device pointer
6420 * Disable interrupts and stop the RLC (CIK).
6423 static void cik_irq_suspend(struct radeon_device *rdev)
6425 cik_irq_disable(rdev);
6430 * cik_irq_fini - tear down interrupt support
6432 * @rdev: radeon_device pointer
6434 * Disable interrupts on the hw and free the IH ring
6436 * Used for driver unload.
6438 static void cik_irq_fini(struct radeon_device *rdev)
6440 cik_irq_suspend(rdev);
6441 r600_ih_ring_fini(rdev);
6445 * cik_get_ih_wptr - get the IH ring buffer wptr
6447 * @rdev: radeon_device pointer
6449 * Get the IH ring buffer wptr from either the register
6450 * or the writeback memory buffer (CIK). Also check for
6451 * ring buffer overflow and deal with it.
6452 * Used by cik_irq_process().
6453 * Returns the value of the wptr.
6455 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6459 if (rdev->wb.enabled)
6460 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6462 wptr = RREG32(IH_RB_WPTR);
6464 if (wptr & RB_OVERFLOW) {
6465 /* When a ring buffer overflow happen start parsing interrupt
6466 * from the last not overwritten vector (wptr + 16). Hopefully
6467 * this should allow us to catchup.
6469 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6470 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6471 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6472 tmp = RREG32(IH_RB_CNTL);
6473 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6474 WREG32(IH_RB_CNTL, tmp);
6476 return (wptr & rdev->ih.ptr_mask);
6480 * Each IV ring entry is 128 bits:
6481 * [7:0] - interrupt source id
6483 * [59:32] - interrupt source data
6484 * [63:60] - reserved
6487 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6488 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6489 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6490 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6491 * PIPE_ID - ME0 0=3D
6492 * - ME1&2 compute dispatcher (4 pipes each)
6494 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6495 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6496 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6499 * [127:96] - reserved
6502 * cik_irq_process - interrupt handler
6504 * @rdev: radeon_device pointer
6506 * Interrupt hander (CIK). Walk the IH ring,
6507 * ack interrupts and schedule work to handle
6509 * Returns irq process return code.
6511 int cik_irq_process(struct radeon_device *rdev)
6513 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6514 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6517 u32 src_id, src_data, ring_id;
6518 u8 me_id, pipe_id, queue_id;
6520 bool queue_hotplug = false;
6521 bool queue_reset = false;
6522 u32 addr, status, mc_client;
6523 bool queue_thermal = false;
6525 if (!rdev->ih.enabled || rdev->shutdown)
6528 wptr = cik_get_ih_wptr(rdev);
6531 /* is somebody else already processing irqs? */
6532 if (atomic_xchg(&rdev->ih.lock, 1))
6535 rptr = rdev->ih.rptr;
6536 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6538 /* Order reading of wptr vs. reading of IH ring data */
6541 /* display interrupts */
6544 while (rptr != wptr) {
6545 /* wptr/rptr are in bytes! */
6546 ring_index = rptr / 4;
6547 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6548 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6549 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6552 case 1: /* D1 vblank/vline */
6554 case 0: /* D1 vblank */
6555 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6556 if (rdev->irq.crtc_vblank_int[0]) {
6557 drm_handle_vblank(rdev->ddev, 0);
6558 rdev->pm.vblank_sync = true;
6559 wake_up(&rdev->irq.vblank_queue);
6561 if (atomic_read(&rdev->irq.pflip[0]))
6562 radeon_crtc_handle_flip(rdev, 0);
6563 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6564 DRM_DEBUG("IH: D1 vblank\n");
6567 case 1: /* D1 vline */
6568 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6569 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6570 DRM_DEBUG("IH: D1 vline\n");
6574 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6578 case 2: /* D2 vblank/vline */
6580 case 0: /* D2 vblank */
6581 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6582 if (rdev->irq.crtc_vblank_int[1]) {
6583 drm_handle_vblank(rdev->ddev, 1);
6584 rdev->pm.vblank_sync = true;
6585 wake_up(&rdev->irq.vblank_queue);
6587 if (atomic_read(&rdev->irq.pflip[1]))
6588 radeon_crtc_handle_flip(rdev, 1);
6589 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6590 DRM_DEBUG("IH: D2 vblank\n");
6593 case 1: /* D2 vline */
6594 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6595 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6596 DRM_DEBUG("IH: D2 vline\n");
6600 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6604 case 3: /* D3 vblank/vline */
6606 case 0: /* D3 vblank */
6607 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6608 if (rdev->irq.crtc_vblank_int[2]) {
6609 drm_handle_vblank(rdev->ddev, 2);
6610 rdev->pm.vblank_sync = true;
6611 wake_up(&rdev->irq.vblank_queue);
6613 if (atomic_read(&rdev->irq.pflip[2]))
6614 radeon_crtc_handle_flip(rdev, 2);
6615 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6616 DRM_DEBUG("IH: D3 vblank\n");
6619 case 1: /* D3 vline */
6620 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6621 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6622 DRM_DEBUG("IH: D3 vline\n");
6626 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6630 case 4: /* D4 vblank/vline */
6632 case 0: /* D4 vblank */
6633 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6634 if (rdev->irq.crtc_vblank_int[3]) {
6635 drm_handle_vblank(rdev->ddev, 3);
6636 rdev->pm.vblank_sync = true;
6637 wake_up(&rdev->irq.vblank_queue);
6639 if (atomic_read(&rdev->irq.pflip[3]))
6640 radeon_crtc_handle_flip(rdev, 3);
6641 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6642 DRM_DEBUG("IH: D4 vblank\n");
6645 case 1: /* D4 vline */
6646 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6647 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6648 DRM_DEBUG("IH: D4 vline\n");
6652 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6656 case 5: /* D5 vblank/vline */
6658 case 0: /* D5 vblank */
6659 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6660 if (rdev->irq.crtc_vblank_int[4]) {
6661 drm_handle_vblank(rdev->ddev, 4);
6662 rdev->pm.vblank_sync = true;
6663 wake_up(&rdev->irq.vblank_queue);
6665 if (atomic_read(&rdev->irq.pflip[4]))
6666 radeon_crtc_handle_flip(rdev, 4);
6667 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6668 DRM_DEBUG("IH: D5 vblank\n");
6671 case 1: /* D5 vline */
6672 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6673 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6674 DRM_DEBUG("IH: D5 vline\n");
6678 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6682 case 6: /* D6 vblank/vline */
6684 case 0: /* D6 vblank */
6685 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6686 if (rdev->irq.crtc_vblank_int[5]) {
6687 drm_handle_vblank(rdev->ddev, 5);
6688 rdev->pm.vblank_sync = true;
6689 wake_up(&rdev->irq.vblank_queue);
6691 if (atomic_read(&rdev->irq.pflip[5]))
6692 radeon_crtc_handle_flip(rdev, 5);
6693 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6694 DRM_DEBUG("IH: D6 vblank\n");
6697 case 1: /* D6 vline */
6698 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6699 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6700 DRM_DEBUG("IH: D6 vline\n");
6704 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6708 case 42: /* HPD hotplug */
6711 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6712 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6713 queue_hotplug = true;
6714 DRM_DEBUG("IH: HPD1\n");
6718 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6719 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6720 queue_hotplug = true;
6721 DRM_DEBUG("IH: HPD2\n");
6725 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6726 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6727 queue_hotplug = true;
6728 DRM_DEBUG("IH: HPD3\n");
6732 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6733 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6734 queue_hotplug = true;
6735 DRM_DEBUG("IH: HPD4\n");
6739 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6740 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6741 queue_hotplug = true;
6742 DRM_DEBUG("IH: HPD5\n");
6746 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6747 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6748 queue_hotplug = true;
6749 DRM_DEBUG("IH: HPD6\n");
6753 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6758 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6759 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6763 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6764 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6765 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6766 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6767 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
6769 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6771 cik_vm_decode_fault(rdev, status, addr, mc_client);
6772 /* reset addr and status */
6773 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6775 case 176: /* GFX RB CP_INT */
6776 case 177: /* GFX IB CP_INT */
6777 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6779 case 181: /* CP EOP event */
6780 DRM_DEBUG("IH: CP EOP\n");
6781 /* XXX check the bitfield order! */
6782 me_id = (ring_id & 0x60) >> 5;
6783 pipe_id = (ring_id & 0x18) >> 3;
6784 queue_id = (ring_id & 0x7) >> 0;
6787 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6791 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6792 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6793 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6794 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6798 case 184: /* CP Privileged reg access */
6799 DRM_ERROR("Illegal register access in command stream\n");
6800 /* XXX check the bitfield order! */
6801 me_id = (ring_id & 0x60) >> 5;
6802 pipe_id = (ring_id & 0x18) >> 3;
6803 queue_id = (ring_id & 0x7) >> 0;
6806 /* This results in a full GPU reset, but all we need to do is soft
6807 * reset the CP for gfx
6821 case 185: /* CP Privileged inst */
6822 DRM_ERROR("Illegal instruction in command stream\n");
6823 /* XXX check the bitfield order! */
6824 me_id = (ring_id & 0x60) >> 5;
6825 pipe_id = (ring_id & 0x18) >> 3;
6826 queue_id = (ring_id & 0x7) >> 0;
6829 /* This results in a full GPU reset, but all we need to do is soft
6830 * reset the CP for gfx
6844 case 224: /* SDMA trap event */
6845 /* XXX check the bitfield order! */
6846 me_id = (ring_id & 0x3) >> 0;
6847 queue_id = (ring_id & 0xc) >> 2;
6848 DRM_DEBUG("IH: SDMA trap\n");
6853 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6866 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6878 case 230: /* thermal low to high */
6879 DRM_DEBUG("IH: thermal low to high\n");
6880 rdev->pm.dpm.thermal.high_to_low = false;
6881 queue_thermal = true;
6883 case 231: /* thermal high to low */
6884 DRM_DEBUG("IH: thermal high to low\n");
6885 rdev->pm.dpm.thermal.high_to_low = true;
6886 queue_thermal = true;
6888 case 233: /* GUI IDLE */
6889 DRM_DEBUG("IH: GUI idle\n");
6891 case 241: /* SDMA Privileged inst */
6892 case 247: /* SDMA Privileged inst */
6893 DRM_ERROR("Illegal instruction in SDMA command stream\n");
6894 /* XXX check the bitfield order! */
6895 me_id = (ring_id & 0x3) >> 0;
6896 queue_id = (ring_id & 0xc) >> 2;
6931 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6935 /* wptr/rptr are in bytes! */
6937 rptr &= rdev->ih.ptr_mask;
6940 schedule_work(&rdev->hotplug_work);
6942 schedule_work(&rdev->reset_work);
6944 schedule_work(&rdev->pm.dpm.thermal.work);
6945 rdev->ih.rptr = rptr;
6946 WREG32(IH_RB_RPTR, rdev->ih.rptr);
6947 atomic_set(&rdev->ih.lock, 0);
6949 /* make sure wptr hasn't changed while processing */
6950 wptr = cik_get_ih_wptr(rdev);
6958 * startup/shutdown callbacks
6961 * cik_startup - program the asic to a functional state
6963 * @rdev: radeon_device pointer
6965 * Programs the asic to a functional state (CIK).
6966 * Called by cik_init() and cik_resume().
6967 * Returns 0 for success, error for failure.
6969 static int cik_startup(struct radeon_device *rdev)
6971 struct radeon_ring *ring;
6974 /* enable pcie gen2/3 link */
6975 cik_pcie_gen3_enable(rdev);
6977 cik_program_aspm(rdev);
6979 /* scratch needs to be initialized before MC */
6980 r = r600_vram_scratch_init(rdev);
6984 cik_mc_program(rdev);
6986 if (rdev->flags & RADEON_IS_IGP) {
6987 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6988 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6989 r = cik_init_microcode(rdev);
6991 DRM_ERROR("Failed to load firmware!\n");
6996 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6997 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6999 r = cik_init_microcode(rdev);
7001 DRM_ERROR("Failed to load firmware!\n");
7006 r = ci_mc_load_microcode(rdev);
7008 DRM_ERROR("Failed to load MC firmware!\n");
7013 r = cik_pcie_gart_enable(rdev);
7018 /* allocate rlc buffers */
7019 if (rdev->flags & RADEON_IS_IGP) {
7020 if (rdev->family == CHIP_KAVERI) {
7021 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7022 rdev->rlc.reg_list_size =
7023 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7025 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7026 rdev->rlc.reg_list_size =
7027 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7030 rdev->rlc.cs_data = ci_cs_data;
7031 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7032 r = sumo_rlc_init(rdev);
7034 DRM_ERROR("Failed to init rlc BOs!\n");
7038 /* allocate wb buffer */
7039 r = radeon_wb_init(rdev);
7043 /* allocate mec buffers */
7044 r = cik_mec_init(rdev);
7046 DRM_ERROR("Failed to init MEC BOs!\n");
7050 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7052 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7056 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7058 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7062 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7064 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7068 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7070 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7074 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7076 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7080 r = radeon_uvd_resume(rdev);
7082 r = uvd_v4_2_resume(rdev);
7084 r = radeon_fence_driver_start_ring(rdev,
7085 R600_RING_TYPE_UVD_INDEX);
7087 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7091 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7094 if (!rdev->irq.installed) {
7095 r = radeon_irq_kms_init(rdev);
7100 r = cik_irq_init(rdev);
7102 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7103 radeon_irq_kms_fini(rdev);
7108 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7109 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7110 CP_RB0_RPTR, CP_RB0_WPTR,
7115 /* set up the compute queues */
7116 /* type-2 packets are deprecated on MEC, use type-3 instead */
7117 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7118 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7119 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7120 PACKET3(PACKET3_NOP, 0x3FFF));
7123 ring->me = 1; /* first MEC */
7124 ring->pipe = 0; /* first pipe */
7125 ring->queue = 0; /* first queue */
7126 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7128 /* type-2 packets are deprecated on MEC, use type-3 instead */
7129 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7130 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7131 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7132 PACKET3(PACKET3_NOP, 0x3FFF));
7135 /* dGPU only have 1 MEC */
7136 ring->me = 1; /* first MEC */
7137 ring->pipe = 0; /* first pipe */
7138 ring->queue = 1; /* second queue */
7139 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7141 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7142 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7143 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7144 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7145 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7149 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7150 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7151 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7152 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7153 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7157 r = cik_cp_resume(rdev);
7161 r = cik_sdma_resume(rdev);
7165 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7166 if (ring->ring_size) {
7167 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7168 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7171 r = uvd_v1_0_init(rdev);
7173 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7176 r = radeon_ib_pool_init(rdev);
7178 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7182 r = radeon_vm_manager_init(rdev);
7184 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7188 r = dce6_audio_init(rdev);
7196 * cik_resume - resume the asic to a functional state
7198 * @rdev: radeon_device pointer
7200 * Programs the asic to a functional state (CIK).
7202 * Returns 0 for success, error for failure.
7204 int cik_resume(struct radeon_device *rdev)
7209 atom_asic_init(rdev->mode_info.atom_context);
7211 /* init golden registers */
7212 cik_init_golden_registers(rdev);
7214 rdev->accel_working = true;
7215 r = cik_startup(rdev);
7217 DRM_ERROR("cik startup failed on resume\n");
7218 rdev->accel_working = false;
7227 * cik_suspend - suspend the asic
7229 * @rdev: radeon_device pointer
7231 * Bring the chip into a state suitable for suspend (CIK).
7232 * Called at suspend.
7233 * Returns 0 for success.
7235 int cik_suspend(struct radeon_device *rdev)
7237 dce6_audio_fini(rdev);
7238 radeon_vm_manager_fini(rdev);
7239 cik_cp_enable(rdev, false);
7240 cik_sdma_enable(rdev, false);
7241 uvd_v1_0_fini(rdev);
7242 radeon_uvd_suspend(rdev);
7245 cik_irq_suspend(rdev);
7246 radeon_wb_disable(rdev);
7247 cik_pcie_gart_disable(rdev);
7251 /* Plan is to move initialization in that function and use
7252 * helper function so that radeon_device_init pretty much
7253 * do nothing more than calling asic specific function. This
7254 * should also allow to remove a bunch of callback function
7258 * cik_init - asic specific driver and hw init
7260 * @rdev: radeon_device pointer
7262 * Setup asic specific driver variables and program the hw
7263 * to a functional state (CIK).
7264 * Called at driver startup.
7265 * Returns 0 for success, errors for failure.
7267 int cik_init(struct radeon_device *rdev)
7269 struct radeon_ring *ring;
7273 if (!radeon_get_bios(rdev)) {
7274 if (ASIC_IS_AVIVO(rdev))
7277 /* Must be an ATOMBIOS */
7278 if (!rdev->is_atom_bios) {
7279 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7282 r = radeon_atombios_init(rdev);
7286 /* Post card if necessary */
7287 if (!radeon_card_posted(rdev)) {
7289 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7292 DRM_INFO("GPU not posted. posting now...\n");
7293 atom_asic_init(rdev->mode_info.atom_context);
7295 /* init golden registers */
7296 cik_init_golden_registers(rdev);
7297 /* Initialize scratch registers */
7298 cik_scratch_init(rdev);
7299 /* Initialize surface registers */
7300 radeon_surface_init(rdev);
7301 /* Initialize clocks */
7302 radeon_get_clock_info(rdev->ddev);
7305 r = radeon_fence_driver_init(rdev);
7309 /* initialize memory controller */
7310 r = cik_mc_init(rdev);
7313 /* Memory manager */
7314 r = radeon_bo_init(rdev);
7318 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7319 ring->ring_obj = NULL;
7320 r600_ring_init(rdev, ring, 1024 * 1024);
7322 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7323 ring->ring_obj = NULL;
7324 r600_ring_init(rdev, ring, 1024 * 1024);
7325 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7329 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7330 ring->ring_obj = NULL;
7331 r600_ring_init(rdev, ring, 1024 * 1024);
7332 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7336 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7337 ring->ring_obj = NULL;
7338 r600_ring_init(rdev, ring, 256 * 1024);
7340 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7341 ring->ring_obj = NULL;
7342 r600_ring_init(rdev, ring, 256 * 1024);
7344 r = radeon_uvd_init(rdev);
7346 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7347 ring->ring_obj = NULL;
7348 r600_ring_init(rdev, ring, 4096);
7351 rdev->ih.ring_obj = NULL;
7352 r600_ih_ring_init(rdev, 64 * 1024);
7354 r = r600_pcie_gart_init(rdev);
7358 rdev->accel_working = true;
7359 r = cik_startup(rdev);
7361 dev_err(rdev->dev, "disabling GPU acceleration\n");
7363 cik_sdma_fini(rdev);
7365 sumo_rlc_fini(rdev);
7367 radeon_wb_fini(rdev);
7368 radeon_ib_pool_fini(rdev);
7369 radeon_vm_manager_fini(rdev);
7370 radeon_irq_kms_fini(rdev);
7371 cik_pcie_gart_fini(rdev);
7372 rdev->accel_working = false;
7375 /* Don't start up if the MC ucode is missing.
7376 * The default clocks and voltages before the MC ucode
7377 * is loaded are not suffient for advanced operations.
7379 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7380 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7388 * cik_fini - asic specific driver and hw fini
7390 * @rdev: radeon_device pointer
7392 * Tear down the asic specific driver variables and program the hw
7393 * to an idle state (CIK).
7394 * Called at driver unload.
7396 void cik_fini(struct radeon_device *rdev)
7399 cik_sdma_fini(rdev);
7403 sumo_rlc_fini(rdev);
7405 radeon_wb_fini(rdev);
7406 radeon_vm_manager_fini(rdev);
7407 radeon_ib_pool_fini(rdev);
7408 radeon_irq_kms_fini(rdev);
7409 uvd_v1_0_fini(rdev);
7410 radeon_uvd_fini(rdev);
7411 cik_pcie_gart_fini(rdev);
7412 r600_vram_scratch_fini(rdev);
7413 radeon_gem_fini(rdev);
7414 radeon_fence_driver_fini(rdev);
7415 radeon_bo_fini(rdev);
7416 radeon_atombios_fini(rdev);
7421 /* display watermark setup */
7423 * dce8_line_buffer_adjust - Set up the line buffer
7425 * @rdev: radeon_device pointer
7426 * @radeon_crtc: the selected display controller
7427 * @mode: the current display mode on the selected display
7430 * Setup up the line buffer allocation for
7431 * the selected display controller (CIK).
7432 * Returns the line buffer size in pixels.
7434 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7435 struct radeon_crtc *radeon_crtc,
7436 struct drm_display_mode *mode)
7438 u32 tmp, buffer_alloc, i;
7439 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7442 * There are 6 line buffers, one for each display controllers.
7443 * There are 3 partitions per LB. Select the number of partitions
7444 * to enable based on the display width. For display widths larger
7445 * than 4096, you need use to use 2 display controllers and combine
7446 * them using the stereo blender.
7448 if (radeon_crtc->base.enabled && mode) {
7449 if (mode->crtc_hdisplay < 1920) {
7452 } else if (mode->crtc_hdisplay < 2560) {
7455 } else if (mode->crtc_hdisplay < 4096) {
7457 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7459 DRM_DEBUG_KMS("Mode too big for LB!\n");
7461 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7468 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7469 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7471 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7472 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7473 for (i = 0; i < rdev->usec_timeout; i++) {
7474 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7475 DMIF_BUFFERS_ALLOCATED_COMPLETED)
7480 if (radeon_crtc->base.enabled && mode) {
7492 /* controller not enabled, so no lb used */
7497 * cik_get_number_of_dram_channels - get the number of dram channels
7499 * @rdev: radeon_device pointer
7501 * Look up the number of video ram channels (CIK).
7502 * Used for display watermark bandwidth calculations
7503 * Returns the number of dram channels
7505 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7507 u32 tmp = RREG32(MC_SHARED_CHMAP);
7509 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7532 struct dce8_wm_params {
7533 u32 dram_channels; /* number of dram channels */
7534 u32 yclk; /* bandwidth per dram data pin in kHz */
7535 u32 sclk; /* engine clock in kHz */
7536 u32 disp_clk; /* display clock in kHz */
7537 u32 src_width; /* viewport width */
7538 u32 active_time; /* active display time in ns */
7539 u32 blank_time; /* blank time in ns */
7540 bool interlaced; /* mode is interlaced */
7541 fixed20_12 vsc; /* vertical scale ratio */
7542 u32 num_heads; /* number of active crtcs */
7543 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7544 u32 lb_size; /* line buffer allocated to pipe */
7545 u32 vtaps; /* vertical scaler taps */
7549 * dce8_dram_bandwidth - get the dram bandwidth
7551 * @wm: watermark calculation data
7553 * Calculate the raw dram bandwidth (CIK).
7554 * Used for display watermark bandwidth calculations
7555 * Returns the dram bandwidth in MBytes/s
7557 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7559 /* Calculate raw DRAM Bandwidth */
7560 fixed20_12 dram_efficiency; /* 0.7 */
7561 fixed20_12 yclk, dram_channels, bandwidth;
7564 a.full = dfixed_const(1000);
7565 yclk.full = dfixed_const(wm->yclk);
7566 yclk.full = dfixed_div(yclk, a);
7567 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7568 a.full = dfixed_const(10);
7569 dram_efficiency.full = dfixed_const(7);
7570 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7571 bandwidth.full = dfixed_mul(dram_channels, yclk);
7572 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7574 return dfixed_trunc(bandwidth);
7578 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7580 * @wm: watermark calculation data
7582 * Calculate the dram bandwidth used for display (CIK).
7583 * Used for display watermark bandwidth calculations
7584 * Returns the dram bandwidth for display in MBytes/s
7586 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7588 /* Calculate DRAM Bandwidth and the part allocated to display. */
7589 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7590 fixed20_12 yclk, dram_channels, bandwidth;
7593 a.full = dfixed_const(1000);
7594 yclk.full = dfixed_const(wm->yclk);
7595 yclk.full = dfixed_div(yclk, a);
7596 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7597 a.full = dfixed_const(10);
7598 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7599 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7600 bandwidth.full = dfixed_mul(dram_channels, yclk);
7601 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7603 return dfixed_trunc(bandwidth);
7607 * dce8_data_return_bandwidth - get the data return bandwidth
7609 * @wm: watermark calculation data
7611 * Calculate the data return bandwidth used for display (CIK).
7612 * Used for display watermark bandwidth calculations
7613 * Returns the data return bandwidth in MBytes/s
7615 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7617 /* Calculate the display Data return Bandwidth */
7618 fixed20_12 return_efficiency; /* 0.8 */
7619 fixed20_12 sclk, bandwidth;
7622 a.full = dfixed_const(1000);
7623 sclk.full = dfixed_const(wm->sclk);
7624 sclk.full = dfixed_div(sclk, a);
7625 a.full = dfixed_const(10);
7626 return_efficiency.full = dfixed_const(8);
7627 return_efficiency.full = dfixed_div(return_efficiency, a);
7628 a.full = dfixed_const(32);
7629 bandwidth.full = dfixed_mul(a, sclk);
7630 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7632 return dfixed_trunc(bandwidth);
7636 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7638 * @wm: watermark calculation data
7640 * Calculate the dmif bandwidth used for display (CIK).
7641 * Used for display watermark bandwidth calculations
7642 * Returns the dmif bandwidth in MBytes/s
7644 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7646 /* Calculate the DMIF Request Bandwidth */
7647 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7648 fixed20_12 disp_clk, bandwidth;
7651 a.full = dfixed_const(1000);
7652 disp_clk.full = dfixed_const(wm->disp_clk);
7653 disp_clk.full = dfixed_div(disp_clk, a);
7654 a.full = dfixed_const(32);
7655 b.full = dfixed_mul(a, disp_clk);
7657 a.full = dfixed_const(10);
7658 disp_clk_request_efficiency.full = dfixed_const(8);
7659 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7661 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7663 return dfixed_trunc(bandwidth);
7667 * dce8_available_bandwidth - get the min available bandwidth
7669 * @wm: watermark calculation data
7671 * Calculate the min available bandwidth used for display (CIK).
7672 * Used for display watermark bandwidth calculations
7673 * Returns the min available bandwidth in MBytes/s
7675 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7677 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7678 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7679 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7680 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7682 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7686 * dce8_average_bandwidth - get the average available bandwidth
7688 * @wm: watermark calculation data
7690 * Calculate the average available bandwidth used for display (CIK).
7691 * Used for display watermark bandwidth calculations
7692 * Returns the average available bandwidth in MBytes/s
7694 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7696 /* Calculate the display mode Average Bandwidth
7697 * DisplayMode should contain the source and destination dimensions,
7701 fixed20_12 line_time;
7702 fixed20_12 src_width;
7703 fixed20_12 bandwidth;
7706 a.full = dfixed_const(1000);
7707 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7708 line_time.full = dfixed_div(line_time, a);
7709 bpp.full = dfixed_const(wm->bytes_per_pixel);
7710 src_width.full = dfixed_const(wm->src_width);
7711 bandwidth.full = dfixed_mul(src_width, bpp);
7712 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7713 bandwidth.full = dfixed_div(bandwidth, line_time);
7715 return dfixed_trunc(bandwidth);
7719 * dce8_latency_watermark - get the latency watermark
7721 * @wm: watermark calculation data
7723 * Calculate the latency watermark (CIK).
7724 * Used for display watermark bandwidth calculations
7725 * Returns the latency watermark in ns
7727 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7729 /* First calculate the latency in ns */
7730 u32 mc_latency = 2000; /* 2000 ns. */
7731 u32 available_bandwidth = dce8_available_bandwidth(wm);
7732 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7733 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7734 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7735 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7736 (wm->num_heads * cursor_line_pair_return_time);
7737 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7738 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7739 u32 tmp, dmif_size = 12288;
7742 if (wm->num_heads == 0)
7745 a.full = dfixed_const(2);
7746 b.full = dfixed_const(1);
7747 if ((wm->vsc.full > a.full) ||
7748 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7750 ((wm->vsc.full >= a.full) && wm->interlaced))
7751 max_src_lines_per_dst_line = 4;
7753 max_src_lines_per_dst_line = 2;
7755 a.full = dfixed_const(available_bandwidth);
7756 b.full = dfixed_const(wm->num_heads);
7757 a.full = dfixed_div(a, b);
7759 b.full = dfixed_const(mc_latency + 512);
7760 c.full = dfixed_const(wm->disp_clk);
7761 b.full = dfixed_div(b, c);
7763 c.full = dfixed_const(dmif_size);
7764 b.full = dfixed_div(c, b);
7766 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7768 b.full = dfixed_const(1000);
7769 c.full = dfixed_const(wm->disp_clk);
7770 b.full = dfixed_div(c, b);
7771 c.full = dfixed_const(wm->bytes_per_pixel);
7772 b.full = dfixed_mul(b, c);
7774 lb_fill_bw = min(tmp, dfixed_trunc(b));
7776 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7777 b.full = dfixed_const(1000);
7778 c.full = dfixed_const(lb_fill_bw);
7779 b.full = dfixed_div(c, b);
7780 a.full = dfixed_div(a, b);
7781 line_fill_time = dfixed_trunc(a);
7783 if (line_fill_time < wm->active_time)
7786 return latency + (line_fill_time - wm->active_time);
7791 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7792 * average and available dram bandwidth
7794 * @wm: watermark calculation data
7796 * Check if the display average bandwidth fits in the display
7797 * dram bandwidth (CIK).
7798 * Used for display watermark bandwidth calculations
7799 * Returns true if the display fits, false if not.
7801 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7803 if (dce8_average_bandwidth(wm) <=
7804 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7811 * dce8_average_bandwidth_vs_available_bandwidth - check
7812 * average and available bandwidth
7814 * @wm: watermark calculation data
7816 * Check if the display average bandwidth fits in the display
7817 * available bandwidth (CIK).
7818 * Used for display watermark bandwidth calculations
7819 * Returns true if the display fits, false if not.
7821 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7823 if (dce8_average_bandwidth(wm) <=
7824 (dce8_available_bandwidth(wm) / wm->num_heads))
7831 * dce8_check_latency_hiding - check latency hiding
7833 * @wm: watermark calculation data
7835 * Check latency hiding (CIK).
7836 * Used for display watermark bandwidth calculations
7837 * Returns true if the display fits, false if not.
7839 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7841 u32 lb_partitions = wm->lb_size / wm->src_width;
7842 u32 line_time = wm->active_time + wm->blank_time;
7843 u32 latency_tolerant_lines;
7847 a.full = dfixed_const(1);
7848 if (wm->vsc.full > a.full)
7849 latency_tolerant_lines = 1;
7851 if (lb_partitions <= (wm->vtaps + 1))
7852 latency_tolerant_lines = 1;
7854 latency_tolerant_lines = 2;
7857 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7859 if (dce8_latency_watermark(wm) <= latency_hiding)
7866 * dce8_program_watermarks - program display watermarks
7868 * @rdev: radeon_device pointer
7869 * @radeon_crtc: the selected display controller
7870 * @lb_size: line buffer size
7871 * @num_heads: number of display controllers in use
7873 * Calculate and program the display watermarks for the
7874 * selected display controller (CIK).
7876 static void dce8_program_watermarks(struct radeon_device *rdev,
7877 struct radeon_crtc *radeon_crtc,
7878 u32 lb_size, u32 num_heads)
7880 struct drm_display_mode *mode = &radeon_crtc->base.mode;
7881 struct dce8_wm_params wm_low, wm_high;
7884 u32 latency_watermark_a = 0, latency_watermark_b = 0;
7887 if (radeon_crtc->base.enabled && num_heads && mode) {
7888 pixel_period = 1000000 / (u32)mode->clock;
7889 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7891 /* watermark for high clocks */
7892 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7893 rdev->pm.dpm_enabled) {
7895 radeon_dpm_get_mclk(rdev, false) * 10;
7897 radeon_dpm_get_sclk(rdev, false) * 10;
7899 wm_high.yclk = rdev->pm.current_mclk * 10;
7900 wm_high.sclk = rdev->pm.current_sclk * 10;
7903 wm_high.disp_clk = mode->clock;
7904 wm_high.src_width = mode->crtc_hdisplay;
7905 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7906 wm_high.blank_time = line_time - wm_high.active_time;
7907 wm_high.interlaced = false;
7908 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7909 wm_high.interlaced = true;
7910 wm_high.vsc = radeon_crtc->vsc;
7912 if (radeon_crtc->rmx_type != RMX_OFF)
7914 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7915 wm_high.lb_size = lb_size;
7916 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7917 wm_high.num_heads = num_heads;
7919 /* set for high clocks */
7920 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7922 /* possibly force display priority to high */
7923 /* should really do this at mode validation time... */
7924 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7925 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7926 !dce8_check_latency_hiding(&wm_high) ||
7927 (rdev->disp_priority == 2)) {
7928 DRM_DEBUG_KMS("force priority to high\n");
7931 /* watermark for low clocks */
7932 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7933 rdev->pm.dpm_enabled) {
7935 radeon_dpm_get_mclk(rdev, true) * 10;
7937 radeon_dpm_get_sclk(rdev, true) * 10;
7939 wm_low.yclk = rdev->pm.current_mclk * 10;
7940 wm_low.sclk = rdev->pm.current_sclk * 10;
7943 wm_low.disp_clk = mode->clock;
7944 wm_low.src_width = mode->crtc_hdisplay;
7945 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7946 wm_low.blank_time = line_time - wm_low.active_time;
7947 wm_low.interlaced = false;
7948 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7949 wm_low.interlaced = true;
7950 wm_low.vsc = radeon_crtc->vsc;
7952 if (radeon_crtc->rmx_type != RMX_OFF)
7954 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7955 wm_low.lb_size = lb_size;
7956 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7957 wm_low.num_heads = num_heads;
7959 /* set for low clocks */
7960 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7962 /* possibly force display priority to high */
7963 /* should really do this at mode validation time... */
7964 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7965 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7966 !dce8_check_latency_hiding(&wm_low) ||
7967 (rdev->disp_priority == 2)) {
7968 DRM_DEBUG_KMS("force priority to high\n");
7973 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7975 tmp &= ~LATENCY_WATERMARK_MASK(3);
7976 tmp |= LATENCY_WATERMARK_MASK(1);
7977 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7978 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7979 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7980 LATENCY_HIGH_WATERMARK(line_time)));
7982 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7983 tmp &= ~LATENCY_WATERMARK_MASK(3);
7984 tmp |= LATENCY_WATERMARK_MASK(2);
7985 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7986 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7987 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7988 LATENCY_HIGH_WATERMARK(line_time)));
7989 /* restore original selection */
7990 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7992 /* save values for DPM */
7993 radeon_crtc->line_time = line_time;
7994 radeon_crtc->wm_high = latency_watermark_a;
7995 radeon_crtc->wm_low = latency_watermark_b;
7999 * dce8_bandwidth_update - program display watermarks
8001 * @rdev: radeon_device pointer
8003 * Calculate and program the display watermarks and line
8004 * buffer allocation (CIK).
8006 void dce8_bandwidth_update(struct radeon_device *rdev)
8008 struct drm_display_mode *mode = NULL;
8009 u32 num_heads = 0, lb_size;
8012 radeon_update_display_priority(rdev);
8014 for (i = 0; i < rdev->num_crtc; i++) {
8015 if (rdev->mode_info.crtcs[i]->base.enabled)
8018 for (i = 0; i < rdev->num_crtc; i++) {
8019 mode = &rdev->mode_info.crtcs[i]->base.mode;
8020 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8021 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8026 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8028 * @rdev: radeon_device pointer
8030 * Fetches a GPU clock counter snapshot (SI).
8031 * Returns the 64 bit clock counter snapshot.
8033 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8037 mutex_lock(&rdev->gpu_clock_mutex);
8038 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8039 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8040 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8041 mutex_unlock(&rdev->gpu_clock_mutex);
8045 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8046 u32 cntl_reg, u32 status_reg)
8049 struct atom_clock_dividers dividers;
8052 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8053 clock, false, ÷rs);
8057 tmp = RREG32_SMC(cntl_reg);
8058 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8059 tmp |= dividers.post_divider;
8060 WREG32_SMC(cntl_reg, tmp);
8062 for (i = 0; i < 100; i++) {
8063 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8073 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8077 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8081 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8085 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8087 struct pci_dev *root = rdev->pdev->bus->self;
8088 int bridge_pos, gpu_pos;
8089 u32 speed_cntl, mask, current_data_rate;
8093 if (radeon_pcie_gen2 == 0)
8096 if (rdev->flags & RADEON_IS_IGP)
8099 if (!(rdev->flags & RADEON_IS_PCIE))
8102 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8106 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8109 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8110 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8111 LC_CURRENT_DATA_RATE_SHIFT;
8112 if (mask & DRM_PCIE_SPEED_80) {
8113 if (current_data_rate == 2) {
8114 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8117 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8118 } else if (mask & DRM_PCIE_SPEED_50) {
8119 if (current_data_rate == 1) {
8120 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8123 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8126 bridge_pos = pci_pcie_cap(root);
8130 gpu_pos = pci_pcie_cap(rdev->pdev);
8134 if (mask & DRM_PCIE_SPEED_80) {
8135 /* re-try equalization if gen3 is not already enabled */
8136 if (current_data_rate != 2) {
8137 u16 bridge_cfg, gpu_cfg;
8138 u16 bridge_cfg2, gpu_cfg2;
8139 u32 max_lw, current_lw, tmp;
8141 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8142 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8144 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8145 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8147 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8148 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8150 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8151 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8152 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8154 if (current_lw < max_lw) {
8155 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8156 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8157 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8158 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8159 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8160 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8164 for (i = 0; i < 10; i++) {
8166 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8167 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8170 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8171 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8173 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8174 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8176 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8177 tmp |= LC_SET_QUIESCE;
8178 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8180 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8182 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8187 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8188 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8189 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8190 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8192 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8193 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8194 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8195 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8198 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8199 tmp16 &= ~((1 << 4) | (7 << 9));
8200 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8201 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8203 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8204 tmp16 &= ~((1 << 4) | (7 << 9));
8205 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8206 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8208 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8209 tmp &= ~LC_SET_QUIESCE;
8210 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8215 /* set the link speed */
8216 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8217 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8218 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8220 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8222 if (mask & DRM_PCIE_SPEED_80)
8223 tmp16 |= 3; /* gen3 */
8224 else if (mask & DRM_PCIE_SPEED_50)
8225 tmp16 |= 2; /* gen2 */
8227 tmp16 |= 1; /* gen1 */
8228 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8230 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8231 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8232 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8234 for (i = 0; i < rdev->usec_timeout; i++) {
8235 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8236 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8242 static void cik_program_aspm(struct radeon_device *rdev)
8245 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8246 bool disable_clkreq = false;
8248 if (radeon_aspm == 0)
8251 /* XXX double check IGPs */
8252 if (rdev->flags & RADEON_IS_IGP)
8255 if (!(rdev->flags & RADEON_IS_PCIE))
8258 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8259 data &= ~LC_XMIT_N_FTS_MASK;
8260 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8262 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8264 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8265 data |= LC_GO_TO_RECOVERY;
8267 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8269 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8270 data |= P_IGNORE_EDB_ERR;
8272 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8274 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8275 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8276 data |= LC_PMI_TO_L1_DIS;
8278 data |= LC_L0S_INACTIVITY(7);
8281 data |= LC_L1_INACTIVITY(7);
8282 data &= ~LC_PMI_TO_L1_DIS;
8284 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8286 if (!disable_plloff_in_l1) {
8287 bool clk_req_support;
8289 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8290 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8291 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8293 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8295 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8296 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8297 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8299 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8301 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8302 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8303 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8305 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8307 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8308 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8309 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8311 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8313 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8314 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8315 data |= LC_DYN_LANES_PWR_STATE(3);
8317 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8319 if (!disable_clkreq) {
8320 struct pci_dev *root = rdev->pdev->bus->self;
8323 clk_req_support = false;
8324 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8325 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8326 clk_req_support = true;
8328 clk_req_support = false;
8331 if (clk_req_support) {
8332 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8333 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8335 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8337 orig = data = RREG32_SMC(THM_CLK_CNTL);
8338 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8339 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8341 WREG32_SMC(THM_CLK_CNTL, data);
8343 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8344 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8345 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8347 WREG32_SMC(MISC_CLK_CTRL, data);
8349 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8350 data &= ~BCLK_AS_XCLK;
8352 WREG32_SMC(CG_CLKPIN_CNTL, data);
8354 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8355 data &= ~FORCE_BIF_REFCLK_EN;
8357 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8359 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8360 data &= ~MPLL_CLKOUT_SEL_MASK;
8361 data |= MPLL_CLKOUT_SEL(4);
8363 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8368 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8371 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8372 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8374 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8377 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8378 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8379 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8380 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8381 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8382 data &= ~LC_L0S_INACTIVITY_MASK;
8384 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);