]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/gpu/drm/radeon/cik.c
Merge branch 'drm-next-3.12' of git://people.freedesktop.org/~agd5f/linux into drm...
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
index 8928bd109c1647b684293cb49ed91198419eb33c..a3bba05872769fa77d1561681de9639aae95acf9 100644 (file)
 #include "cikd.h"
 #include "atom.h"
 #include "cik_blit_shaders.h"
-
-/* GFX */
-#define CIK_PFP_UCODE_SIZE 2144
-#define CIK_ME_UCODE_SIZE 2144
-#define CIK_CE_UCODE_SIZE 2144
-/* compute */
-#define CIK_MEC_UCODE_SIZE 4192
-/* interrupts */
-#define BONAIRE_RLC_UCODE_SIZE 2048
-#define KB_RLC_UCODE_SIZE 2560
-#define KV_RLC_UCODE_SIZE 2560
-/* gddr controller */
-#define CIK_MC_UCODE_SIZE 7866
-/* sdma */
-#define CIK_SDMA_UCODE_SIZE 1050
-#define CIK_SDMA_UCODE_VERSION 64
+#include "radeon_ucode.h"
+#include "clearstate_ci.h"
 
 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
@@ -54,6 +40,7 @@ MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
+MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
@@ -72,10 +59,61 @@ extern void r600_ih_ring_fini(struct radeon_device *rdev);
 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
+extern void sumo_rlc_fini(struct radeon_device *rdev);
+extern int sumo_rlc_init(struct radeon_device *rdev);
 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
-extern void si_rlc_fini(struct radeon_device *rdev);
-extern int si_rlc_init(struct radeon_device *rdev);
+extern void si_rlc_reset(struct radeon_device *rdev);
+extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+extern int cik_sdma_resume(struct radeon_device *rdev);
+extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
+extern void cik_sdma_fini(struct radeon_device *rdev);
+extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
+                                struct radeon_ib *ib,
+                                uint64_t pe,
+                                uint64_t addr, unsigned count,
+                                uint32_t incr, uint32_t flags);
 static void cik_rlc_stop(struct radeon_device *rdev);
+static void cik_pcie_gen3_enable(struct radeon_device *rdev);
+static void cik_program_aspm(struct radeon_device *rdev);
+static void cik_init_pg(struct radeon_device *rdev);
+static void cik_init_cg(struct radeon_device *rdev);
+
+/* get temperature in millidegrees */
+int ci_get_temp(struct radeon_device *rdev)
+{
+       u32 temp;
+       int actual_temp = 0;
+
+       temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
+               CTF_TEMP_SHIFT;
+
+       if (temp & 0x200)
+               actual_temp = 255;
+       else
+               actual_temp = temp & 0x1ff;
+
+       actual_temp = actual_temp * 1000;
+
+       return actual_temp;
+}
+
+/* get temperature in millidegrees */
+int kv_get_temp(struct radeon_device *rdev)
+{
+       u32 temp;
+       int actual_temp = 0;
+
+       temp = RREG32_SMC(0xC0300E0C);
+
+       if (temp)
+               actual_temp = (temp / 8) - 49;
+       else
+               actual_temp = 0;
+
+       actual_temp = actual_temp * 1000;
+
+       return actual_temp;
+}
 
 /*
  * Indirect registers accessor
@@ -98,6 +136,778 @@ void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
        (void)RREG32(PCIE_DATA);
 }
 
+static const u32 spectre_rlc_save_restore_register_list[] =
+{
+       (0x0e00 << 16) | (0xc12c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc140 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc150 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc15c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc168 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc170 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc178 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc204 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8228 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x829c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x869c >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x98f4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x98f8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9900 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc260 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x90e8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c000 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c00c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c1c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9700 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8900 >> 2),
+       0x00000000,
+       0x3,
+       (0x0e00 << 16) | (0xc130 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc134 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc1fc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc208 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc264 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc268 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc26c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc270 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc274 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc278 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc27c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc280 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc284 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc288 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc28c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc290 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc294 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc298 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc29c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2ac  >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x301d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30238 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30250 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30254 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30258 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3025c >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc99c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9834 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f00 >> 2),
+       0x00000000,
+       (0x0001 << 16) | (0x30f00 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f04 >> 2),
+       0x00000000,
+       (0x0001 << 16) | (0x30f04 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f08 >> 2),
+       0x00000000,
+       (0x0001 << 16) | (0x30f08 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f0c >> 2),
+       0x00000000,
+       (0x0001 << 16) | (0x30f0c >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x9b7c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8a14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8a18 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8bf0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8bcc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8b24 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30a04 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a10 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a14 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a18 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a2c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc700 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc704 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc708 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc768 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc770 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc774 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc778 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc77c >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc780 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc784 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc788 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc78c >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc798 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc79c >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7a0 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7a4 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7a8 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7ac >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7b0 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9100 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c010 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92a8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92ac >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92c4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92c8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92cc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c04 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c20 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c38 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c3c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xae00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9604 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac08 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac0c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac10 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac58 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac68 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac6c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac70 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac74 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac78 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac7c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac80 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac84 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac88 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac8c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x970c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9714 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9718 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x971c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd10 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88bc >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0x89c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88c4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88c8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8980 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30938 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3093c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30940 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89a0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30900 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30904 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c210 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c214 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c218 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8904 >> 2),
+       0x00000000,
+       0x5,
+       (0x0e00 << 16) | (0x8c28 >> 2),
+       (0x0e00 << 16) | (0x8c2c >> 2),
+       (0x0e00 << 16) | (0x8c30 >> 2),
+       (0x0e00 << 16) | (0x8c34 >> 2),
+       (0x0e00 << 16) | (0x9600 >> 2),
+};
+
+static const u32 kalindi_rlc_save_restore_register_list[] =
+{
+       (0x0e00 << 16) | (0xc12c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc140 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc150 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc15c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc168 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc170 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc204 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8228 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x829c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x869c >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x98f4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x98f8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9900 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc260 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x90e8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c000 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c00c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c1c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9700 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8900 >> 2),
+       0x00000000,
+       0x3,
+       (0x0e00 << 16) | (0xc130 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc134 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc1fc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc208 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc264 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc268 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc26c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc270 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc274 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc28c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc290 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc294 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc298 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2ac >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x301d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30238 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30250 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30254 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30258 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3025c >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc99c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9834 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f00 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f04 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f08 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f0c >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x9b7c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8a14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8a18 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8bf0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8bcc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8b24 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30a04 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a10 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a14 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a18 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a2c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc700 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc704 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc708 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc768 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc770 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc774 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc798 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc79c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9100 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c010 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c04 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c20 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c38 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c3c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xae00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9604 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac08 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac0c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac10 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac58 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac68 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac6c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac70 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac74 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac78 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac7c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac80 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac84 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac88 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac8c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x970c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9714 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9718 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x971c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd10 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88bc >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0x89c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88c4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88c8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8980 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30938 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3093c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30940 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89a0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30900 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30904 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3e1fc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c210 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c214 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c218 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8904 >> 2),
+       0x00000000,
+       0x5,
+       (0x0e00 << 16) | (0x8c28 >> 2),
+       (0x0e00 << 16) | (0x8c2c >> 2),
+       (0x0e00 << 16) | (0x8c30 >> 2),
+       (0x0e00 << 16) | (0x8c34 >> 2),
+       (0x0e00 << 16) | (0x9600 >> 2),
+};
+
 static const u32 bonaire_golden_spm_registers[] =
 {
        0x30800, 0xe0ffffff, 0xe0000000
@@ -744,7 +1554,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
        const char *chip_name;
        size_t pfp_req_size, me_req_size, ce_req_size,
                mec_req_size, rlc_req_size, mc_req_size,
-               sdma_req_size;
+               sdma_req_size, smc_req_size;
        char fw_name[30];
        int err;
 
@@ -760,6 +1570,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
                rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
                mc_req_size = CIK_MC_UCODE_SIZE * 4;
                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
+               smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
                break;
        case CHIP_KAVERI:
                chip_name = "KAVERI";
@@ -851,7 +1662,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
                err = -EINVAL;
        }
 
-       /* No MC ucode on APUs */
+       /* No SMC, MC ucode on APUs */
        if (!(rdev->flags & RADEON_IS_IGP)) {
                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
                err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
@@ -863,6 +1674,21 @@ static int cik_init_microcode(struct radeon_device *rdev)
                               rdev->mc_fw->size, fw_name);
                        err = -EINVAL;
                }
+
+               snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
+               err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
+               if (err) {
+                       printk(KERN_ERR
+                              "smc: error loading firmware \"%s\"\n",
+                              fw_name);
+                       release_firmware(rdev->smc_fw);
+                       rdev->smc_fw = NULL;
+               } else if (rdev->smc_fw->size != smc_req_size) {
+                       printk(KERN_ERR
+                              "cik_smc: Bogus length %zu in firmware \"%s\"\n",
+                              rdev->smc_fw->size, fw_name);
+                       err = -EINVAL;
+               }
        }
 
 out:
@@ -881,6 +1707,8 @@ out:
                rdev->rlc_fw = NULL;
                release_firmware(rdev->mc_fw);
                rdev->mc_fw = NULL;
+               release_firmware(rdev->smc_fw);
+               rdev->smc_fw = NULL;
        }
        return err;
 }
@@ -1880,7 +2708,46 @@ static void cik_gpu_init(struct radeon_device *rdev)
                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
                break;
        case CHIP_KAVERI:
-               /* TODO */
+               rdev->config.cik.max_shader_engines = 1;
+               rdev->config.cik.max_tile_pipes = 4;
+               if ((rdev->pdev->device == 0x1304) ||
+                   (rdev->pdev->device == 0x1305) ||
+                   (rdev->pdev->device == 0x130C) ||
+                   (rdev->pdev->device == 0x130F) ||
+                   (rdev->pdev->device == 0x1310) ||
+                   (rdev->pdev->device == 0x1311) ||
+                   (rdev->pdev->device == 0x131C)) {
+                       rdev->config.cik.max_cu_per_sh = 8;
+                       rdev->config.cik.max_backends_per_se = 2;
+               } else if ((rdev->pdev->device == 0x1309) ||
+                          (rdev->pdev->device == 0x130A) ||
+                          (rdev->pdev->device == 0x130D) ||
+                          (rdev->pdev->device == 0x1313)) {
+                       rdev->config.cik.max_cu_per_sh = 6;
+                       rdev->config.cik.max_backends_per_se = 2;
+               } else if ((rdev->pdev->device == 0x1306) ||
+                          (rdev->pdev->device == 0x1307) ||
+                          (rdev->pdev->device == 0x130B) ||
+                          (rdev->pdev->device == 0x130E) ||
+                          (rdev->pdev->device == 0x1315) ||
+                          (rdev->pdev->device == 0x131B)) {
+                       rdev->config.cik.max_cu_per_sh = 4;
+                       rdev->config.cik.max_backends_per_se = 1;
+               } else {
+                       rdev->config.cik.max_cu_per_sh = 3;
+                       rdev->config.cik.max_backends_per_se = 1;
+               }
+               rdev->config.cik.max_sh_per_se = 1;
+               rdev->config.cik.max_texture_channel_caches = 4;
+               rdev->config.cik.max_gprs = 256;
+               rdev->config.cik.max_gs_threads = 16;
+               rdev->config.cik.max_hw_contexts = 8;
+
+               rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
+               rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
+               rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
+               rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
+               gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
                break;
        case CHIP_KABINI:
        default:
@@ -2535,8 +3402,8 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev)
        /* ring 0 - compute and gfx */
        /* Set ring buffer size */
        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-       rb_bufsz = drm_order(ring->ring_size / 8);
-       tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+       rb_bufsz = order_base_2(ring->ring_size / 8);
+       tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
        tmp |= BUF_SWAP_32BIT;
 #endif
@@ -2593,7 +3460,6 @@ u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
                cik_srbm_select(rdev, 0, 0, 0, 0);
                mutex_unlock(&rdev->srbm_mutex);
        }
-       rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
 
        return rptr;
 }
@@ -2612,7 +3478,6 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
                cik_srbm_select(rdev, 0, 0, 0, 0);
                mutex_unlock(&rdev->srbm_mutex);
        }
-       wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
 
        return wptr;
 }
@@ -2620,10 +3485,8 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
                               struct radeon_ring *ring)
 {
-       u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
-
-       rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
-       WDOORBELL32(ring->doorbell_offset, wptr);
+       rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
+       WDOORBELL32(ring->doorbell_offset, ring->wptr);
 }
 
 /**
@@ -2920,7 +3783,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
                /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
                tmp = RREG32(CP_HPD_EOP_CONTROL);
                tmp &= ~EOP_SIZE_MASK;
-               tmp |= drm_order(MEC_HPD_SIZE / 8);
+               tmp |= order_base_2(MEC_HPD_SIZE / 8);
                WREG32(CP_HPD_EOP_CONTROL, tmp);
        }
        cik_srbm_select(rdev, 0, 0, 0, 0);
@@ -3037,9 +3900,9 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
                        ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
 
                mqd->queue_state.cp_hqd_pq_control |=
-                       drm_order(rdev->ring[idx].ring_size / 8);
+                       order_base_2(rdev->ring[idx].ring_size / 8);
                mqd->queue_state.cp_hqd_pq_control |=
-                       (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
+                       (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
 #ifdef __BIG_ENDIAN
                mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
 #endif
@@ -3150,13 +4013,6 @@ static int cik_cp_resume(struct radeon_device *rdev)
 {
        int r;
 
-       /* Reset all cp blocks */
-       WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
-       RREG32(GRBM_SOFT_RESET);
-       mdelay(15);
-       WREG32(GRBM_SOFT_RESET, 0);
-       RREG32(GRBM_SOFT_RESET);
-
        r = cik_cp_load_microcode(rdev);
        if (r)
                return r;
@@ -3171,1711 +4027,1803 @@ static int cik_cp_resume(struct radeon_device *rdev)
        return 0;
 }
 
-/*
- * sDMA - System DMA
- * Starting with CIK, the GPU has new asynchronous
- * DMA engines.  These engines are used for compute
- * and gfx.  There are two DMA engines (SDMA0, SDMA1)
- * and each one supports 1 ring buffer used for gfx
- * and 2 queues used for compute.
- *
- * The programming model is very similar to the CP
- * (ring buffer, IBs, etc.), but sDMA has it's own
- * packet format that is different from the PM4 format
- * used by the CP. sDMA supports copying data, writing
- * embedded data, solid fills, and a number of other
- * things.  It also has support for tiling/detiling of
- * buffers.
- */
+static void cik_print_gpu_status_regs(struct radeon_device *rdev)
+{
+       dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
+               RREG32(GRBM_STATUS));
+       dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
+               RREG32(GRBM_STATUS2));
+       dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
+               RREG32(GRBM_STATUS_SE0));
+       dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
+               RREG32(GRBM_STATUS_SE1));
+       dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
+               RREG32(GRBM_STATUS_SE2));
+       dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
+               RREG32(GRBM_STATUS_SE3));
+       dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
+               RREG32(SRBM_STATUS));
+       dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
+               RREG32(SRBM_STATUS2));
+       dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
+               RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
+       dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
+                RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
+       dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
+       dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
+                RREG32(CP_STALLED_STAT1));
+       dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
+                RREG32(CP_STALLED_STAT2));
+       dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
+                RREG32(CP_STALLED_STAT3));
+       dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
+                RREG32(CP_CPF_BUSY_STAT));
+       dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
+                RREG32(CP_CPF_STALLED_STAT1));
+       dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
+       dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
+       dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
+                RREG32(CP_CPC_STALLED_STAT1));
+       dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
+}
+
 /**
- * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
+ * cik_gpu_check_soft_reset - check which blocks are busy
  *
  * @rdev: radeon_device pointer
- * @ib: IB object to schedule
  *
- * Schedule an IB in the DMA ring (CIK).
+ * Check which blocks are busy and return the relevant reset
+ * mask to be used by cik_gpu_soft_reset().
+ * Returns a mask of the blocks to be reset.
  */
-void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
-                             struct radeon_ib *ib)
+u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
 {
-       struct radeon_ring *ring = &rdev->ring[ib->ring];
-       u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+       u32 reset_mask = 0;
+       u32 tmp;
 
-       if (rdev->wb.enabled) {
-               u32 next_rptr = ring->wptr + 5;
-               while ((next_rptr & 7) != 4)
-                       next_rptr++;
-               next_rptr += 4;
-               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-               radeon_ring_write(ring, 1); /* number of DWs to follow */
-               radeon_ring_write(ring, next_rptr);
-       }
+       /* GRBM_STATUS */
+       tmp = RREG32(GRBM_STATUS);
+       if (tmp & (PA_BUSY | SC_BUSY |
+                  BCI_BUSY | SX_BUSY |
+                  TA_BUSY | VGT_BUSY |
+                  DB_BUSY | CB_BUSY |
+                  GDS_BUSY | SPI_BUSY |
+                  IA_BUSY | IA_BUSY_NO_DMA))
+               reset_mask |= RADEON_RESET_GFX;
 
-       /* IB packet must end on a 8 DW boundary */
-       while ((ring->wptr & 7) != 4)
-               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
-       radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
-       radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
-       radeon_ring_write(ring, ib->length_dw);
+       if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
+               reset_mask |= RADEON_RESET_CP;
 
-}
+       /* GRBM_STATUS2 */
+       tmp = RREG32(GRBM_STATUS2);
+       if (tmp & RLC_BUSY)
+               reset_mask |= RADEON_RESET_RLC;
 
-/**
- * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
- *
- * @rdev: radeon_device pointer
- * @fence: radeon fence object
- *
- * Add a DMA fence packet to the ring to write
- * the fence seq number and DMA trap packet to generate
- * an interrupt if needed (CIK).
- */
-void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
-                             struct radeon_fence *fence)
-{
-       struct radeon_ring *ring = &rdev->ring[fence->ring];
-       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
-       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
-                         SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
-       u32 ref_and_mask;
+       /* SDMA0_STATUS_REG */
+       tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
+       if (!(tmp & SDMA_IDLE))
+               reset_mask |= RADEON_RESET_DMA;
 
-       if (fence->ring == R600_RING_TYPE_DMA_INDEX)
-               ref_and_mask = SDMA0;
-       else
-               ref_and_mask = SDMA1;
+       /* SDMA1_STATUS_REG */
+       tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
+       if (!(tmp & SDMA_IDLE))
+               reset_mask |= RADEON_RESET_DMA1;
 
-       /* write the fence */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
-       radeon_ring_write(ring, addr & 0xffffffff);
-       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-       radeon_ring_write(ring, fence->seq);
-       /* generate an interrupt */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
-       /* flush HDP */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
-       radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
-       radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
-       radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
-       radeon_ring_write(ring, ref_and_mask); /* MASK */
-       radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
-}
+       /* SRBM_STATUS2 */
+       tmp = RREG32(SRBM_STATUS2);
+       if (tmp & SDMA_BUSY)
+               reset_mask |= RADEON_RESET_DMA;
 
-/**
- * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- * @semaphore: radeon semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (CIK).
- */
-void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
-                                 struct radeon_ring *ring,
-                                 struct radeon_semaphore *semaphore,
-                                 bool emit_wait)
-{
-       u64 addr = semaphore->gpu_addr;
-       u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
+       if (tmp & SDMA1_BUSY)
+               reset_mask |= RADEON_RESET_DMA1;
 
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
-       radeon_ring_write(ring, addr & 0xfffffff8);
-       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-}
+       /* SRBM_STATUS */
+       tmp = RREG32(SRBM_STATUS);
 
-/**
- * cik_sdma_gfx_stop - stop the gfx async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the gfx async dma ring buffers (CIK).
- */
-static void cik_sdma_gfx_stop(struct radeon_device *rdev)
-{
-       u32 rb_cntl, reg_offset;
-       int i;
+       if (tmp & IH_BUSY)
+               reset_mask |= RADEON_RESET_IH;
 
-       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+       if (tmp & SEM_BUSY)
+               reset_mask |= RADEON_RESET_SEM;
 
-       for (i = 0; i < 2; i++) {
-               if (i == 0)
-                       reg_offset = SDMA0_REGISTER_OFFSET;
-               else
-                       reg_offset = SDMA1_REGISTER_OFFSET;
-               rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
-               rb_cntl &= ~SDMA_RB_ENABLE;
-               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
-               WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
-       }
-}
+       if (tmp & GRBM_RQ_PENDING)
+               reset_mask |= RADEON_RESET_GRBM;
 
-/**
- * cik_sdma_rlc_stop - stop the compute async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the compute async dma queues (CIK).
- */
-static void cik_sdma_rlc_stop(struct radeon_device *rdev)
-{
-       /* XXX todo */
-}
+       if (tmp & VMC_BUSY)
+               reset_mask |= RADEON_RESET_VMC;
 
-/**
- * cik_sdma_enable - stop the async dma engines
- *
- * @rdev: radeon_device pointer
- * @enable: enable/disable the DMA MEs.
- *
- * Halt or unhalt the async dma engines (CIK).
- */
-static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
-{
-       u32 me_cntl, reg_offset;
-       int i;
+       if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
+                  MCC_BUSY | MCD_BUSY))
+               reset_mask |= RADEON_RESET_MC;
 
-       for (i = 0; i < 2; i++) {
-               if (i == 0)
-                       reg_offset = SDMA0_REGISTER_OFFSET;
-               else
-                       reg_offset = SDMA1_REGISTER_OFFSET;
-               me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
-               if (enable)
-                       me_cntl &= ~SDMA_HALT;
-               else
-                       me_cntl |= SDMA_HALT;
-               WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
+       if (evergreen_is_display_hung(rdev))
+               reset_mask |= RADEON_RESET_DISPLAY;
+
+       /* Skip MC reset as it's mostly likely not hung, just busy */
+       if (reset_mask & RADEON_RESET_MC) {
+               DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
+               reset_mask &= ~RADEON_RESET_MC;
        }
+
+       return reset_mask;
 }
 
 /**
- * cik_sdma_gfx_resume - setup and start the async dma engines
+ * cik_gpu_soft_reset - soft reset GPU
  *
  * @rdev: radeon_device pointer
+ * @reset_mask: mask of which blocks to reset
  *
- * Set up the gfx DMA ring buffers and enable them (CIK).
- * Returns 0 for success, error for failure.
+ * Soft reset the blocks specified in @reset_mask.
  */
-static int cik_sdma_gfx_resume(struct radeon_device *rdev)
+static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
 {
-       struct radeon_ring *ring;
-       u32 rb_cntl, ib_cntl;
-       u32 rb_bufsz;
-       u32 reg_offset, wb_offset;
-       int i, r;
+       struct evergreen_mc_save save;
+       u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+       u32 tmp;
 
-       for (i = 0; i < 2; i++) {
-               if (i == 0) {
-                       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
-                       reg_offset = SDMA0_REGISTER_OFFSET;
-                       wb_offset = R600_WB_DMA_RPTR_OFFSET;
-               } else {
-                       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
-                       reg_offset = SDMA1_REGISTER_OFFSET;
-                       wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
-               }
+       if (reset_mask == 0)
+               return;
 
-               WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
-               WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+       dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
 
-               /* Set ring buffer size in dwords */
-               rb_bufsz = drm_order(ring->ring_size / 4);
-               rb_cntl = rb_bufsz << 1;
-#ifdef __BIG_ENDIAN
-               rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
-#endif
-               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
+       cik_print_gpu_status_regs(rdev);
+       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
+                RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+                RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
 
-               /* Initialize the ring buffer's read and write pointers */
-               WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
-               WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
+       /* stop the rlc */
+       cik_rlc_stop(rdev);
 
-               /* set the wb address whether it's enabled or not */
-               WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
-                      upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
-               WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
-                      ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+       /* Disable GFX parsing/prefetching */
+       WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
 
-               if (rdev->wb.enabled)
-                       rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
+       /* Disable MEC parsing/prefetching */
+       WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
 
-               WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
-               WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
+       if (reset_mask & RADEON_RESET_DMA) {
+               /* sdma0 */
+               tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
+               tmp |= SDMA_HALT;
+               WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
+       }
+       if (reset_mask & RADEON_RESET_DMA1) {
+               /* sdma1 */
+               tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
+               tmp |= SDMA_HALT;
+               WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
+       }
 
-               ring->wptr = 0;
-               WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
+       evergreen_mc_stop(rdev, &save);
+       if (evergreen_mc_wait_for_idle(rdev)) {
+               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+       }
 
-               ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
+       if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
+               grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
 
-               /* enable DMA RB */
-               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
+       if (reset_mask & RADEON_RESET_CP) {
+               grbm_soft_reset |= SOFT_RESET_CP;
 
-               ib_cntl = SDMA_IB_ENABLE;
-#ifdef __BIG_ENDIAN
-               ib_cntl |= SDMA_IB_SWAP_ENABLE;
-#endif
-               /* enable DMA IBs */
-               WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
+               srbm_soft_reset |= SOFT_RESET_GRBM;
+       }
 
-               ring->ready = true;
+       if (reset_mask & RADEON_RESET_DMA)
+               srbm_soft_reset |= SOFT_RESET_SDMA;
 
-               r = radeon_ring_test(rdev, ring->idx, ring);
-               if (r) {
-                       ring->ready = false;
-                       return r;
-               }
+       if (reset_mask & RADEON_RESET_DMA1)
+               srbm_soft_reset |= SOFT_RESET_SDMA1;
+
+       if (reset_mask & RADEON_RESET_DISPLAY)
+               srbm_soft_reset |= SOFT_RESET_DC;
+
+       if (reset_mask & RADEON_RESET_RLC)
+               grbm_soft_reset |= SOFT_RESET_RLC;
+
+       if (reset_mask & RADEON_RESET_SEM)
+               srbm_soft_reset |= SOFT_RESET_SEM;
+
+       if (reset_mask & RADEON_RESET_IH)
+               srbm_soft_reset |= SOFT_RESET_IH;
+
+       if (reset_mask & RADEON_RESET_GRBM)
+               srbm_soft_reset |= SOFT_RESET_GRBM;
+
+       if (reset_mask & RADEON_RESET_VMC)
+               srbm_soft_reset |= SOFT_RESET_VMC;
+
+       if (!(rdev->flags & RADEON_IS_IGP)) {
+               if (reset_mask & RADEON_RESET_MC)
+                       srbm_soft_reset |= SOFT_RESET_MC;
        }
 
-       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+       if (grbm_soft_reset) {
+               tmp = RREG32(GRBM_SOFT_RESET);
+               tmp |= grbm_soft_reset;
+               dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+               WREG32(GRBM_SOFT_RESET, tmp);
+               tmp = RREG32(GRBM_SOFT_RESET);
 
-       return 0;
-}
+               udelay(50);
 
-/**
- * cik_sdma_rlc_resume - setup and start the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Set up the compute DMA queues and enable them (CIK).
- * Returns 0 for success, error for failure.
- */
-static int cik_sdma_rlc_resume(struct radeon_device *rdev)
-{
-       /* XXX todo */
-       return 0;
-}
+               tmp &= ~grbm_soft_reset;
+               WREG32(GRBM_SOFT_RESET, tmp);
+               tmp = RREG32(GRBM_SOFT_RESET);
+       }
 
-/**
- * cik_sdma_load_microcode - load the sDMA ME ucode
- *
- * @rdev: radeon_device pointer
- *
- * Loads the sDMA0/1 ucode.
- * Returns 0 for success, -EINVAL if the ucode is not available.
- */
-static int cik_sdma_load_microcode(struct radeon_device *rdev)
-{
-       const __be32 *fw_data;
-       int i;
+       if (srbm_soft_reset) {
+               tmp = RREG32(SRBM_SOFT_RESET);
+               tmp |= srbm_soft_reset;
+               dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+               WREG32(SRBM_SOFT_RESET, tmp);
+               tmp = RREG32(SRBM_SOFT_RESET);
 
-       if (!rdev->sdma_fw)
-               return -EINVAL;
+               udelay(50);
 
-       /* stop the gfx rings and rlc compute queues */
-       cik_sdma_gfx_stop(rdev);
-       cik_sdma_rlc_stop(rdev);
+               tmp &= ~srbm_soft_reset;
+               WREG32(SRBM_SOFT_RESET, tmp);
+               tmp = RREG32(SRBM_SOFT_RESET);
+       }
 
-       /* halt the MEs */
-       cik_sdma_enable(rdev, false);
+       /* Wait a little for things to settle down */
+       udelay(50);
 
-       /* sdma0 */
-       fw_data = (const __be32 *)rdev->sdma_fw->data;
-       WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
-       for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
-               WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
-       WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
-
-       /* sdma1 */
-       fw_data = (const __be32 *)rdev->sdma_fw->data;
-       WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
-       for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
-               WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
-       WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
-
-       WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
-       WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
-       return 0;
+       evergreen_mc_resume(rdev, &save);
+       udelay(50);
+
+       cik_print_gpu_status_regs(rdev);
 }
 
 /**
- * cik_sdma_resume - setup and start the async dma engines
+ * cik_asic_reset - soft reset GPU
  *
  * @rdev: radeon_device pointer
  *
- * Set up the DMA engines and enable them (CIK).
- * Returns 0 for success, error for failure.
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
  */
-static int cik_sdma_resume(struct radeon_device *rdev)
+int cik_asic_reset(struct radeon_device *rdev)
 {
-       int r;
+       u32 reset_mask;
 
-       /* Reset dma */
-       WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
-       RREG32(SRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(SRBM_SOFT_RESET, 0);
-       RREG32(SRBM_SOFT_RESET);
+       reset_mask = cik_gpu_check_soft_reset(rdev);
 
-       r = cik_sdma_load_microcode(rdev);
-       if (r)
-               return r;
+       if (reset_mask)
+               r600_set_bios_scratch_engine_hung(rdev, true);
 
-       /* unhalt the MEs */
-       cik_sdma_enable(rdev, true);
+       cik_gpu_soft_reset(rdev, reset_mask);
 
-       /* start the gfx rings and rlc compute queues */
-       r = cik_sdma_gfx_resume(rdev);
-       if (r)
-               return r;
-       r = cik_sdma_rlc_resume(rdev);
-       if (r)
-               return r;
+       reset_mask = cik_gpu_check_soft_reset(rdev);
+
+       if (!reset_mask)
+               r600_set_bios_scratch_engine_hung(rdev, false);
 
        return 0;
 }
 
 /**
- * cik_sdma_fini - tear down the async dma engines
+ * cik_gfx_is_lockup - check if the 3D engine is locked up
  *
  * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
  *
- * Stop the async dma engines and free the rings (CIK).
+ * Check if the 3D engine is locked up (CIK).
+ * Returns true if the engine is locked, false if not.
  */
-static void cik_sdma_fini(struct radeon_device *rdev)
+bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
-       /* stop the gfx rings and rlc compute queues */
-       cik_sdma_gfx_stop(rdev);
-       cik_sdma_rlc_stop(rdev);
-       /* halt the MEs */
-       cik_sdma_enable(rdev, false);
-       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
-       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
-       /* XXX - compute dma queue tear down */
-}
-
-/**
- * cik_copy_dma - copy pages using the DMA engine
- *
- * @rdev: radeon_device pointer
- * @src_offset: src GPU address
- * @dst_offset: dst GPU address
- * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
- *
- * Copy GPU paging using the DMA engine (CIK).
- * Used by the radeon ttm implementation to move pages if
- * registered as the asic copy callback.
- */
-int cik_copy_dma(struct radeon_device *rdev,
-                uint64_t src_offset, uint64_t dst_offset,
-                unsigned num_gpu_pages,
-                struct radeon_fence **fence)
-{
-       struct radeon_semaphore *sem = NULL;
-       int ring_index = rdev->asic->copy.dma_ring_index;
-       struct radeon_ring *ring = &rdev->ring[ring_index];
-       u32 size_in_bytes, cur_size_in_bytes;
-       int i, num_loops;
-       int r = 0;
-
-       r = radeon_semaphore_create(rdev, &sem);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
-       }
-
-       size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
-       num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
-       r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
-       }
+       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
 
-       if (radeon_fence_need_sync(*fence, ring->idx)) {
-               radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-                                           ring->idx);
-               radeon_fence_note_sync(*fence, ring->idx);
-       } else {
-               radeon_semaphore_free(rdev, &sem, NULL);
-       }
-
-       for (i = 0; i < num_loops; i++) {
-               cur_size_in_bytes = size_in_bytes;
-               if (cur_size_in_bytes > 0x1fffff)
-                       cur_size_in_bytes = 0x1fffff;
-               size_in_bytes -= cur_size_in_bytes;
-               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
-               radeon_ring_write(ring, cur_size_in_bytes);
-               radeon_ring_write(ring, 0); /* src/dst endian swap */
-               radeon_ring_write(ring, src_offset & 0xffffffff);
-               radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
-               radeon_ring_write(ring, dst_offset & 0xfffffffc);
-               radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
-               src_offset += cur_size_in_bytes;
-               dst_offset += cur_size_in_bytes;
-       }
-
-       r = radeon_fence_emit(rdev, fence, ring->idx);
-       if (r) {
-               radeon_ring_unlock_undo(rdev, ring);
-               return r;
+       if (!(reset_mask & (RADEON_RESET_GFX |
+                           RADEON_RESET_COMPUTE |
+                           RADEON_RESET_CP))) {
+               radeon_ring_lockup_update(ring);
+               return false;
        }
-
-       radeon_ring_unlock_commit(rdev, ring);
-       radeon_semaphore_free(rdev, &sem, *fence);
-
-       return r;
+       /* force CP activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
 }
 
+/* MC */
 /**
- * cik_sdma_ring_test - simple async dma engine test
+ * cik_mc_program - program the GPU memory controller
  *
  * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
  *
- * Test the DMA engine by writing using it to write an
- * value to memory. (CIK).
- * Returns 0 for success, error for failure.
+ * Set the location of vram, gart, and AGP in the GPU's
+ * physical address space (CIK).
  */
-int cik_sdma_ring_test(struct radeon_device *rdev,
-                      struct radeon_ring *ring)
+static void cik_mc_program(struct radeon_device *rdev)
 {
-       unsigned i;
-       int r;
-       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       struct evergreen_mc_save save;
        u32 tmp;
+       int i, j;
 
-       if (!ptr) {
-               DRM_ERROR("invalid vram scratch pointer\n");
-               return -EINVAL;
-       }
-
-       tmp = 0xCAFEDEAD;
-       writel(tmp, ptr);
-
-       r = radeon_ring_lock(rdev, ring, 4);
-       if (r) {
-               DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
-               return r;
+       /* Initialize HDP */
+       for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+               WREG32((0x2c14 + j), 0x00000000);
+               WREG32((0x2c18 + j), 0x00000000);
+               WREG32((0x2c1c + j), 0x00000000);
+               WREG32((0x2c20 + j), 0x00000000);
+               WREG32((0x2c24 + j), 0x00000000);
        }
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-       radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
-       radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
-       radeon_ring_write(ring, 1); /* number of DWs to follow */
-       radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
+       WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
-       for (i = 0; i < rdev->usec_timeout; i++) {
-               tmp = readl(ptr);
-               if (tmp == 0xDEADBEEF)
-                       break;
-               DRM_UDELAY(1);
+       evergreen_mc_stop(rdev, &save);
+       if (radeon_mc_wait_for_idle(rdev)) {
+               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
        }
-
-       if (i < rdev->usec_timeout) {
-               DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
-       } else {
-               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
-                         ring->idx, tmp);
-               r = -EINVAL;
+       /* Lockout access through VGA aperture*/
+       WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+       /* Update configuration */
+       WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+              rdev->mc.vram_start >> 12);
+       WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+              rdev->mc.vram_end >> 12);
+       WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
+              rdev->vram_scratch.gpu_addr >> 12);
+       tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
+       tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+       WREG32(MC_VM_FB_LOCATION, tmp);
+       /* XXX double check these! */
+       WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+       WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
+       WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
+       WREG32(MC_VM_AGP_BASE, 0);
+       WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+       WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+       if (radeon_mc_wait_for_idle(rdev)) {
+               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
        }
-       return r;
+       evergreen_mc_resume(rdev, &save);
+       /* we need to own VRAM, so turn off the VGA renderer here
+        * to stop it overwriting our objects */
+       rv515_vga_render_disable(rdev);
 }
 
 /**
- * cik_sdma_ib_test - test an IB on the DMA engine
+ * cik_mc_init - initialize the memory controller driver params
  *
  * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
  *
- * Test a simple IB in the DMA ring (CIK).
- * Returns 0 on success, error on failure.
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space (CIK).
+ * Returns 0 for success.
  */
-int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+static int cik_mc_init(struct radeon_device *rdev)
 {
-       struct radeon_ib ib;
-       unsigned i;
-       int r;
-       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
-       u32 tmp = 0;
-
-       if (!ptr) {
-               DRM_ERROR("invalid vram scratch pointer\n");
-               return -EINVAL;
-       }
-
-       tmp = 0xCAFEDEAD;
-       writel(tmp, ptr);
-
-       r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
-       if (r) {
-               DRM_ERROR("radeon: failed to get ib (%d).\n", r);
-               return r;
-       }
-
-       ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
-       ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
-       ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
-       ib.ptr[3] = 1;
-       ib.ptr[4] = 0xDEADBEEF;
-       ib.length_dw = 5;
+       u32 tmp;
+       int chansize, numchan;
 
-       r = radeon_ib_schedule(rdev, &ib, NULL);
-       if (r) {
-               radeon_ib_free(rdev, &ib);
-               DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
-               return r;
-       }
-       r = radeon_fence_wait(ib.fence, false);
-       if (r) {
-               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
-               return r;
-       }
-       for (i = 0; i < rdev->usec_timeout; i++) {
-               tmp = readl(ptr);
-               if (tmp == 0xDEADBEEF)
-                       break;
-               DRM_UDELAY(1);
-       }
-       if (i < rdev->usec_timeout) {
-               DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+       /* Get VRAM informations */
+       rdev->mc.vram_is_ddr = true;
+       tmp = RREG32(MC_ARB_RAMCFG);
+       if (tmp & CHANSIZE_MASK) {
+               chansize = 64;
        } else {
-               DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
-               r = -EINVAL;
+               chansize = 32;
        }
-       radeon_ib_free(rdev, &ib);
-       return r;
-}
+       tmp = RREG32(MC_SHARED_CHMAP);
+       switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
+       case 0:
+       default:
+               numchan = 1;
+               break;
+       case 1:
+               numchan = 2;
+               break;
+       case 2:
+               numchan = 4;
+               break;
+       case 3:
+               numchan = 8;
+               break;
+       case 4:
+               numchan = 3;
+               break;
+       case 5:
+               numchan = 6;
+               break;
+       case 6:
+               numchan = 10;
+               break;
+       case 7:
+               numchan = 12;
+               break;
+       case 8:
+               numchan = 16;
+               break;
+       }
+       rdev->mc.vram_width = numchan * chansize;
+       /* Could aper size report 0 ? */
+       rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+       rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
+       /* size in MB on si */
+       rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+       rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+       rdev->mc.visible_vram_size = rdev->mc.aper_size;
+       si_vram_gtt_location(rdev, &rdev->mc);
+       radeon_update_bandwidth_info(rdev);
 
+       return 0;
+}
 
-static void cik_print_gpu_status_regs(struct radeon_device *rdev)
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the radeon vm/hsa code.
+ */
+/**
+ * cik_pcie_gart_tlb_flush - gart tlb flush callback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Flush the TLB for the VMID 0 page table (CIK).
+ */
+void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
 {
-       dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
-               RREG32(GRBM_STATUS));
-       dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
-               RREG32(GRBM_STATUS2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
-               RREG32(GRBM_STATUS_SE0));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
-               RREG32(GRBM_STATUS_SE1));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
-               RREG32(GRBM_STATUS_SE2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
-               RREG32(GRBM_STATUS_SE3));
-       dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
-               RREG32(SRBM_STATUS));
-       dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
-               RREG32(SRBM_STATUS2));
-       dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
-               RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
-       dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
-                RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
-       dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
-       dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
-                RREG32(CP_STALLED_STAT1));
-       dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
-                RREG32(CP_STALLED_STAT2));
-       dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
-                RREG32(CP_STALLED_STAT3));
-       dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
-                RREG32(CP_CPF_BUSY_STAT));
-       dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
-                RREG32(CP_CPF_STALLED_STAT1));
-       dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
-       dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
-       dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
-                RREG32(CP_CPC_STALLED_STAT1));
-       dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
+       /* flush hdp cache */
+       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+
+       /* bits 0-15 are the VM contexts0-15 */
+       WREG32(VM_INVALIDATE_REQUEST, 0x1);
+}
+
+/**
+ * cik_pcie_gart_enable - gart enable
+ *
+ * @rdev: radeon_device pointer
+ *
+ * This sets up the TLBs, programs the page tables for VMID0,
+ * sets up the hw for VMIDs 1-15 which are allocated on
+ * demand, and sets up the global locations for the LDS, GDS,
+ * and GPUVM for FSA64 clients (CIK).
+ * Returns 0 for success, errors for failure.
+ */
+static int cik_pcie_gart_enable(struct radeon_device *rdev)
+{
+       int r, i;
+
+       if (rdev->gart.robj == NULL) {
+               dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
+               return -EINVAL;
+       }
+       r = radeon_gart_table_vram_pin(rdev);
+       if (r)
+               return r;
+       radeon_gart_restore(rdev);
+       /* Setup TLB control */
+       WREG32(MC_VM_MX_L1_TLB_CNTL,
+              (0xA << 7) |
+              ENABLE_L1_TLB |
+              SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+              ENABLE_ADVANCED_DRIVER_MODEL |
+              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+       /* Setup L2 cache */
+       WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
+              ENABLE_L2_FRAGMENT_PROCESSING |
+              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
+              EFFECTIVE_L2_QUEUE_SIZE(7) |
+              CONTEXT1_IDENTITY_ACCESS_MODE(1));
+       WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
+       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
+              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
+       /* setup context0 */
+       WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+       WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+                       (u32)(rdev->dummy_page.addr >> 12));
+       WREG32(VM_CONTEXT0_CNTL2, 0);
+       WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
+
+       WREG32(0x15D4, 0);
+       WREG32(0x15D8, 0);
+       WREG32(0x15DC, 0);
+
+       /* empty context1-15 */
+       /* FIXME start with 4G, once using 2 level pt switch to full
+        * vm size space
+        */
+       /* set vm size, must be a multiple of 4 */
+       WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
+       WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
+       for (i = 1; i < 16; i++) {
+               if (i < 8)
+                       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
+                              rdev->gart.table_addr >> 12);
+               else
+                       WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
+                              rdev->gart.table_addr >> 12);
+       }
+
+       /* enable context1-15 */
+       WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
+              (u32)(rdev->dummy_page.addr >> 12));
+       WREG32(VM_CONTEXT1_CNTL2, 4);
+       WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
+                               RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               READ_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
+
+       /* TC cache setup ??? */
+       WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
+       WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
+       WREG32(TC_CFG_L1_STORE_POLICY, 0);
+
+       WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
+       WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
+       WREG32(TC_CFG_L2_STORE_POLICY0, 0);
+       WREG32(TC_CFG_L2_STORE_POLICY1, 0);
+       WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
+
+       WREG32(TC_CFG_L1_VOLATILE, 0);
+       WREG32(TC_CFG_L2_VOLATILE, 0);
+
+       if (rdev->family == CHIP_KAVERI) {
+               u32 tmp = RREG32(CHUB_CONTROL);
+               tmp &= ~BYPASS_VM;
+               WREG32(CHUB_CONTROL, tmp);
+       }
+
+       /* XXX SH_MEM regs */
+       /* where to put LDS, scratch, GPUVM in FSA64 space */
+       mutex_lock(&rdev->srbm_mutex);
+       for (i = 0; i < 16; i++) {
+               cik_srbm_select(rdev, 0, 0, 0, i);
+               /* CP and shaders */
+               WREG32(SH_MEM_CONFIG, 0);
+               WREG32(SH_MEM_APE1_BASE, 1);
+               WREG32(SH_MEM_APE1_LIMIT, 0);
+               WREG32(SH_MEM_BASES, 0);
+               /* SDMA GFX */
+               WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
+               WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
+               WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
+               WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
+               /* XXX SDMA RLC - todo */
+       }
+       cik_srbm_select(rdev, 0, 0, 0, 0);
+       mutex_unlock(&rdev->srbm_mutex);
+
+       cik_pcie_gart_tlb_flush(rdev);
+       DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+                (unsigned)(rdev->mc.gtt_size >> 20),
+                (unsigned long long)rdev->gart.table_addr);
+       rdev->gart.ready = true;
+       return 0;
+}
+
+/**
+ * cik_pcie_gart_disable - gart disable
+ *
+ * @rdev: radeon_device pointer
+ *
+ * This disables all VM page table (CIK).
+ */
+static void cik_pcie_gart_disable(struct radeon_device *rdev)
+{
+       /* Disable all tables */
+       WREG32(VM_CONTEXT0_CNTL, 0);
+       WREG32(VM_CONTEXT1_CNTL, 0);
+       /* Setup TLB control */
+       WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+       /* Setup L2 cache */
+       WREG32(VM_L2_CNTL,
+              ENABLE_L2_FRAGMENT_PROCESSING |
+              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
+              EFFECTIVE_L2_QUEUE_SIZE(7) |
+              CONTEXT1_IDENTITY_ACCESS_MODE(1));
+       WREG32(VM_L2_CNTL2, 0);
+       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
+              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
+       radeon_gart_table_vram_unpin(rdev);
+}
+
+/**
+ * cik_pcie_gart_fini - vm fini callback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void cik_pcie_gart_fini(struct radeon_device *rdev)
+{
+       cik_pcie_gart_disable(rdev);
+       radeon_gart_table_vram_free(rdev);
+       radeon_gart_fini(rdev);
+}
+
+/* vm parser */
+/**
+ * cik_ib_parse - vm ib_parse callback
+ *
+ * @rdev: radeon_device pointer
+ * @ib: indirect buffer pointer
+ *
+ * CIK uses hw IB checking so this is a nop (CIK).
+ */
+int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+       return 0;
+}
+
+/*
+ * vm
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the radeon vm/hsa code.
+ */
+/**
+ * cik_vm_init - cik vm init callback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Inits cik specific vm parameters (number of VMs, base of vram for
+ * VMIDs 1-15) (CIK).
+ * Returns 0 for success.
+ */
+int cik_vm_init(struct radeon_device *rdev)
+{
+       /* number of VMs */
+       rdev->vm_manager.nvm = 16;
+       /* base offset of vram pages */
+       if (rdev->flags & RADEON_IS_IGP) {
+               u64 tmp = RREG32(MC_VM_FB_OFFSET);
+               tmp <<= 22;
+               rdev->vm_manager.vram_base_offset = tmp;
+       } else
+               rdev->vm_manager.vram_base_offset = 0;
+
+       return 0;
+}
+
+/**
+ * cik_vm_fini - cik vm fini callback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down any asic specific VM setup (CIK).
+ */
+void cik_vm_fini(struct radeon_device *rdev)
+{
+}
+
+/**
+ * cik_vm_decode_fault - print human readable fault info
+ *
+ * @rdev: radeon_device pointer
+ * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
+ * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ *
+ * Print human readable fault information (CIK).
+ */
+static void cik_vm_decode_fault(struct radeon_device *rdev,
+                               u32 status, u32 addr, u32 mc_client)
+{
+       u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
+       u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
+       u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
+       char *block = (char *)&mc_client;
+
+       printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
+              protections, vmid, addr,
+              (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
+              block, mc_id);
 }
 
 /**
- * cik_gpu_check_soft_reset - check which blocks are busy
+ * cik_vm_flush - cik vm flush using the CP
  *
  * @rdev: radeon_device pointer
  *
- * Check which blocks are busy and return the relevant reset
- * mask to be used by cik_gpu_soft_reset().
- * Returns a mask of the blocks to be reset.
+ * Update the page table base and flush the VM TLB
+ * using the CP (CIK).
  */
-static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
+void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 {
-       u32 reset_mask = 0;
-       u32 tmp;
-
-       /* GRBM_STATUS */
-       tmp = RREG32(GRBM_STATUS);
-       if (tmp & (PA_BUSY | SC_BUSY |
-                  BCI_BUSY | SX_BUSY |
-                  TA_BUSY | VGT_BUSY |
-                  DB_BUSY | CB_BUSY |
-                  GDS_BUSY | SPI_BUSY |
-                  IA_BUSY | IA_BUSY_NO_DMA))
-               reset_mask |= RADEON_RESET_GFX;
-
-       if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
-               reset_mask |= RADEON_RESET_CP;
-
-       /* GRBM_STATUS2 */
-       tmp = RREG32(GRBM_STATUS2);
-       if (tmp & RLC_BUSY)
-               reset_mask |= RADEON_RESET_RLC;
-
-       /* SDMA0_STATUS_REG */
-       tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
-       if (!(tmp & SDMA_IDLE))
-               reset_mask |= RADEON_RESET_DMA;
-
-       /* SDMA1_STATUS_REG */
-       tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
-       if (!(tmp & SDMA_IDLE))
-               reset_mask |= RADEON_RESET_DMA1;
-
-       /* SRBM_STATUS2 */
-       tmp = RREG32(SRBM_STATUS2);
-       if (tmp & SDMA_BUSY)
-               reset_mask |= RADEON_RESET_DMA;
+       struct radeon_ring *ring = &rdev->ring[ridx];
 
-       if (tmp & SDMA1_BUSY)
-               reset_mask |= RADEON_RESET_DMA1;
+       if (vm == NULL)
+               return;
 
-       /* SRBM_STATUS */
-       tmp = RREG32(SRBM_STATUS);
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       if (vm->id < 8) {
+               radeon_ring_write(ring,
+                                 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+       } else {
+               radeon_ring_write(ring,
+                                 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+       }
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
 
-       if (tmp & IH_BUSY)
-               reset_mask |= RADEON_RESET_IH;
+       /* update SH_MEM_* regs */
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, VMID(vm->id));
 
-       if (tmp & SEM_BUSY)
-               reset_mask |= RADEON_RESET_SEM;
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, SH_MEM_BASES >> 2);
+       radeon_ring_write(ring, 0);
 
-       if (tmp & GRBM_RQ_PENDING)
-               reset_mask |= RADEON_RESET_GRBM;
+       radeon_ring_write(ring, 0); /* SH_MEM_BASES */
+       radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
+       radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
+       radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
 
-       if (tmp & VMC_BUSY)
-               reset_mask |= RADEON_RESET_VMC;
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, VMID(0));
 
-       if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
-                  MCC_BUSY | MCD_BUSY))
-               reset_mask |= RADEON_RESET_MC;
+       /* HDP flush */
+       /* We should be using the WAIT_REG_MEM packet here like in
+        * cik_fence_ring_emit(), but it causes the CP to hang in this
+        * context...
+        */
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, 0);
 
-       if (evergreen_is_display_hung(rdev))
-               reset_mask |= RADEON_RESET_DISPLAY;
+       /* bits 0-15 are the VM contexts0-15 */
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, 1 << vm->id);
 
-       /* Skip MC reset as it's mostly likely not hung, just busy */
-       if (reset_mask & RADEON_RESET_MC) {
-               DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
-               reset_mask &= ~RADEON_RESET_MC;
+       /* compute doesn't have PFP */
+       if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
+               /* sync PFP to ME, otherwise we might get invalid PFP reads */
+               radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+               radeon_ring_write(ring, 0x0);
        }
-
-       return reset_mask;
 }
 
 /**
- * cik_gpu_soft_reset - soft reset GPU
+ * cik_vm_set_page - update the page tables using sDMA
  *
  * @rdev: radeon_device pointer
- * @reset_mask: mask of which blocks to reset
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
  *
- * Soft reset the blocks specified in @reset_mask.
+ * Update the page tables using CP or sDMA (CIK).
  */
-static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+void cik_vm_set_page(struct radeon_device *rdev,
+                    struct radeon_ib *ib,
+                    uint64_t pe,
+                    uint64_t addr, unsigned count,
+                    uint32_t incr, uint32_t flags)
 {
-       struct evergreen_mc_save save;
-       u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
-       u32 tmp;
-
-       if (reset_mask == 0)
-               return;
-
-       dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
-
-       cik_print_gpu_status_regs(rdev);
-       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
-                RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
-       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-                RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+       uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+       uint64_t value;
+       unsigned ndw;
 
-       /* stop the rlc */
-       cik_rlc_stop(rdev);
+       if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
+               /* CP */
+               while (count) {
+                       ndw = 2 + count * 2;
+                       if (ndw > 0x3FFE)
+                               ndw = 0x3FFE;
 
-       /* Disable GFX parsing/prefetching */
-       WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
+                       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
+                       ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
+                                                   WRITE_DATA_DST_SEL(1));
+                       ib->ptr[ib->length_dw++] = pe;
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+                       for (; ndw > 2; ndw -= 2, --count, pe += 8) {
+                               if (flags & RADEON_VM_PAGE_SYSTEM) {
+                                       value = radeon_vm_map_gart(rdev, addr);
+                                       value &= 0xFFFFFFFFFFFFF000ULL;
+                               } else if (flags & RADEON_VM_PAGE_VALID) {
+                                       value = addr;
+                               } else {
+                                       value = 0;
+                               }
+                               addr += incr;
+                               value |= r600_flags;
+                               ib->ptr[ib->length_dw++] = value;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       }
+               }
+       } else {
+               /* DMA */
+               cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
+       }
+}
 
-       /* Disable MEC parsing/prefetching */
-       WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
+/*
+ * RLC
+ * The RLC is a multi-purpose microengine that handles a
+ * variety of functions, the most important of which is
+ * the interrupt controller.
+ */
+static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
+                                         bool enable)
+{
+       u32 tmp = RREG32(CP_INT_CNTL_RING0);
 
-       if (reset_mask & RADEON_RESET_DMA) {
-               /* sdma0 */
-               tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
-               tmp |= SDMA_HALT;
-               WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
-       }
-       if (reset_mask & RADEON_RESET_DMA1) {
-               /* sdma1 */
-               tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
-               tmp |= SDMA_HALT;
-               WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
-       }
+       if (enable)
+               tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+       else
+               tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+       WREG32(CP_INT_CNTL_RING0, tmp);
+}
 
-       evergreen_mc_stop(rdev, &save);
-       if (evergreen_mc_wait_for_idle(rdev)) {
-               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
-       }
+static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
+{
+       u32 tmp;
 
-       if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
-               grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
+       tmp = RREG32(RLC_LB_CNTL);
+       if (enable)
+               tmp |= LOAD_BALANCE_ENABLE;
+       else
+               tmp &= ~LOAD_BALANCE_ENABLE;
+       WREG32(RLC_LB_CNTL, tmp);
+}
 
-       if (reset_mask & RADEON_RESET_CP) {
-               grbm_soft_reset |= SOFT_RESET_CP;
+static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
+{
+       u32 i, j, k;
+       u32 mask;
 
-               srbm_soft_reset |= SOFT_RESET_GRBM;
+       for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+                       cik_select_se_sh(rdev, i, j);
+                       for (k = 0; k < rdev->usec_timeout; k++) {
+                               if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
+                                       break;
+                               udelay(1);
+                       }
+               }
        }
+       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 
-       if (reset_mask & RADEON_RESET_DMA)
-               srbm_soft_reset |= SOFT_RESET_SDMA;
+       mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
+       for (k = 0; k < rdev->usec_timeout; k++) {
+               if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+                       break;
+               udelay(1);
+       }
+}
 
-       if (reset_mask & RADEON_RESET_DMA1)
-               srbm_soft_reset |= SOFT_RESET_SDMA1;
+static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
+{
+       u32 tmp;
 
-       if (reset_mask & RADEON_RESET_DISPLAY)
-               srbm_soft_reset |= SOFT_RESET_DC;
+       tmp = RREG32(RLC_CNTL);
+       if (tmp != rlc)
+               WREG32(RLC_CNTL, rlc);
+}
 
-       if (reset_mask & RADEON_RESET_RLC)
-               grbm_soft_reset |= SOFT_RESET_RLC;
+static u32 cik_halt_rlc(struct radeon_device *rdev)
+{
+       u32 data, orig;
 
-       if (reset_mask & RADEON_RESET_SEM)
-               srbm_soft_reset |= SOFT_RESET_SEM;
+       orig = data = RREG32(RLC_CNTL);
 
-       if (reset_mask & RADEON_RESET_IH)
-               srbm_soft_reset |= SOFT_RESET_IH;
+       if (data & RLC_ENABLE) {
+               u32 i;
 
-       if (reset_mask & RADEON_RESET_GRBM)
-               srbm_soft_reset |= SOFT_RESET_GRBM;
+               data &= ~RLC_ENABLE;
+               WREG32(RLC_CNTL, data);
 
-       if (reset_mask & RADEON_RESET_VMC)
-               srbm_soft_reset |= SOFT_RESET_VMC;
+               for (i = 0; i < rdev->usec_timeout; i++) {
+                       if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
+                               break;
+                       udelay(1);
+               }
 
-       if (!(rdev->flags & RADEON_IS_IGP)) {
-               if (reset_mask & RADEON_RESET_MC)
-                       srbm_soft_reset |= SOFT_RESET_MC;
+               cik_wait_for_rlc_serdes(rdev);
        }
 
-       if (grbm_soft_reset) {
-               tmp = RREG32(GRBM_SOFT_RESET);
-               tmp |= grbm_soft_reset;
-               dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
-               WREG32(GRBM_SOFT_RESET, tmp);
-               tmp = RREG32(GRBM_SOFT_RESET);
-
-               udelay(50);
-
-               tmp &= ~grbm_soft_reset;
-               WREG32(GRBM_SOFT_RESET, tmp);
-               tmp = RREG32(GRBM_SOFT_RESET);
-       }
+       return orig;
+}
 
-       if (srbm_soft_reset) {
-               tmp = RREG32(SRBM_SOFT_RESET);
-               tmp |= srbm_soft_reset;
-               dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
-               WREG32(SRBM_SOFT_RESET, tmp);
-               tmp = RREG32(SRBM_SOFT_RESET);
+void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
+{
+       u32 tmp, i, mask;
 
-               udelay(50);
+       tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
+       WREG32(RLC_GPR_REG2, tmp);
 
-               tmp &= ~srbm_soft_reset;
-               WREG32(SRBM_SOFT_RESET, tmp);
-               tmp = RREG32(SRBM_SOFT_RESET);
+       mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               if ((RREG32(RLC_GPM_STAT) & mask) == mask)
+                       break;
+               udelay(1);
        }
 
-       /* Wait a little for things to settle down */
-       udelay(50);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
+                       break;
+               udelay(1);
+       }
+}
 
-       evergreen_mc_resume(rdev, &save);
-       udelay(50);
+void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
+{
+       u32 tmp;
 
-       cik_print_gpu_status_regs(rdev);
+       tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
+       WREG32(RLC_GPR_REG2, tmp);
 }
 
 /**
- * cik_asic_reset - soft reset GPU
+ * cik_rlc_stop - stop the RLC ME
  *
  * @rdev: radeon_device pointer
  *
- * Look up which blocks are hung and attempt
- * to reset them.
- * Returns 0 for success.
+ * Halt the RLC ME (MicroEngine) (CIK).
  */
-int cik_asic_reset(struct radeon_device *rdev)
+static void cik_rlc_stop(struct radeon_device *rdev)
 {
-       u32 reset_mask;
-
-       reset_mask = cik_gpu_check_soft_reset(rdev);
-
-       if (reset_mask)
-               r600_set_bios_scratch_engine_hung(rdev, true);
-
-       cik_gpu_soft_reset(rdev, reset_mask);
-
-       reset_mask = cik_gpu_check_soft_reset(rdev);
+       WREG32(RLC_CNTL, 0);
 
-       if (!reset_mask)
-               r600_set_bios_scratch_engine_hung(rdev, false);
+       cik_enable_gui_idle_interrupt(rdev, false);
 
-       return 0;
+       cik_wait_for_rlc_serdes(rdev);
 }
 
 /**
- * cik_gfx_is_lockup - check if the 3D engine is locked up
+ * cik_rlc_start - start the RLC ME
  *
  * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
  *
- * Check if the 3D engine is locked up (CIK).
- * Returns true if the engine is locked, false if not.
+ * Unhalt the RLC ME (MicroEngine) (CIK).
  */
-bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+static void cik_rlc_start(struct radeon_device *rdev)
 {
-       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
+       WREG32(RLC_CNTL, RLC_ENABLE);
 
-       if (!(reset_mask & (RADEON_RESET_GFX |
-                           RADEON_RESET_COMPUTE |
-                           RADEON_RESET_CP))) {
-               radeon_ring_lockup_update(ring);
-               return false;
-       }
-       /* force CP activities */
-       radeon_ring_force_activity(rdev, ring);
-       return radeon_ring_test_lockup(rdev, ring);
+       cik_enable_gui_idle_interrupt(rdev, true);
+
+       udelay(50);
 }
 
 /**
- * cik_sdma_is_lockup - Check if the DMA engine is locked up
+ * cik_rlc_resume - setup the RLC hw
  *
  * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
  *
- * Check if the async DMA engine is locked up (CIK).
- * Returns true if the engine appears to be locked up, false if not.
+ * Initialize the RLC registers, load the ucode,
+ * and start the RLC (CIK).
+ * Returns 0 for success, -EINVAL if the ucode is not available.
  */
-bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+static int cik_rlc_resume(struct radeon_device *rdev)
 {
-       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
-       u32 mask;
+       u32 i, size, tmp;
+       const __be32 *fw_data;
 
-       if (ring->idx == R600_RING_TYPE_DMA_INDEX)
-               mask = RADEON_RESET_DMA;
-       else
-               mask = RADEON_RESET_DMA1;
+       if (!rdev->rlc_fw)
+               return -EINVAL;
 
-       if (!(reset_mask & mask)) {
-               radeon_ring_lockup_update(ring);
-               return false;
+       switch (rdev->family) {
+       case CHIP_BONAIRE:
+       default:
+               size = BONAIRE_RLC_UCODE_SIZE;
+               break;
+       case CHIP_KAVERI:
+               size = KV_RLC_UCODE_SIZE;
+               break;
+       case CHIP_KABINI:
+               size = KB_RLC_UCODE_SIZE;
+               break;
        }
-       /* force ring activities */
-       radeon_ring_force_activity(rdev, ring);
-       return radeon_ring_test_lockup(rdev, ring);
+
+       cik_rlc_stop(rdev);
+
+       /* disable CG */
+       tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
+       WREG32(RLC_CGCG_CGLS_CTRL, tmp);
+
+       si_rlc_reset(rdev);
+
+       cik_init_pg(rdev);
+
+       cik_init_cg(rdev);
+
+       WREG32(RLC_LB_CNTR_INIT, 0);
+       WREG32(RLC_LB_CNTR_MAX, 0x00008000);
+
+       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+       WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
+       WREG32(RLC_LB_PARAMS, 0x00600408);
+       WREG32(RLC_LB_CNTL, 0x80000004);
+
+       WREG32(RLC_MC_CNTL, 0);
+       WREG32(RLC_UCODE_CNTL, 0);
+
+       fw_data = (const __be32 *)rdev->rlc_fw->data;
+               WREG32(RLC_GPM_UCODE_ADDR, 0);
+       for (i = 0; i < size; i++)
+               WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
+       WREG32(RLC_GPM_UCODE_ADDR, 0);
+
+       /* XXX - find out what chips support lbpw */
+       cik_enable_lbpw(rdev, false);
+
+       if (rdev->family == CHIP_BONAIRE)
+               WREG32(RLC_DRIVER_DMA_STATUS, 0);
+
+       cik_rlc_start(rdev);
+
+       return 0;
 }
 
-/* MC */
-/**
- * cik_mc_program - program the GPU memory controller
- *
- * @rdev: radeon_device pointer
- *
- * Set the location of vram, gart, and AGP in the GPU's
- * physical address space (CIK).
- */
-static void cik_mc_program(struct radeon_device *rdev)
+static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
 {
-       struct evergreen_mc_save save;
-       u32 tmp;
-       int i, j;
+       u32 data, orig, tmp, tmp2;
 
-       /* Initialize HDP */
-       for (i = 0, j = 0; i < 32; i++, j += 0x18) {
-               WREG32((0x2c14 + j), 0x00000000);
-               WREG32((0x2c18 + j), 0x00000000);
-               WREG32((0x2c1c + j), 0x00000000);
-               WREG32((0x2c20 + j), 0x00000000);
-               WREG32((0x2c24 + j), 0x00000000);
+       orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
+
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
+               cik_enable_gui_idle_interrupt(rdev, true);
+
+               tmp = cik_halt_rlc(rdev);
+
+               cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+               WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+               WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+               tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
+               WREG32(RLC_SERDES_WR_CTRL, tmp2);
+
+               cik_update_rlc(rdev, tmp);
+
+               data |= CGCG_EN | CGLS_EN;
+       } else {
+               cik_enable_gui_idle_interrupt(rdev, false);
+
+               RREG32(CB_CGTT_SCLK_CTRL);
+               RREG32(CB_CGTT_SCLK_CTRL);
+               RREG32(CB_CGTT_SCLK_CTRL);
+               RREG32(CB_CGTT_SCLK_CTRL);
+
+               data &= ~(CGCG_EN | CGLS_EN);
        }
-       WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
-       evergreen_mc_stop(rdev, &save);
-       if (radeon_mc_wait_for_idle(rdev)) {
-               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+       if (orig != data)
+               WREG32(RLC_CGCG_CGLS_CTRL, data);
+
+}
+
+static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
+{
+       u32 data, orig, tmp = 0;
+
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
+               if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
+                       if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
+                               orig = data = RREG32(CP_MEM_SLP_CNTL);
+                               data |= CP_MEM_LS_EN;
+                               if (orig != data)
+                                       WREG32(CP_MEM_SLP_CNTL, data);
+                       }
+               }
+
+               orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+               data &= 0xfffffffd;
+               if (orig != data)
+                       WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+
+               tmp = cik_halt_rlc(rdev);
+
+               cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+               WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+               WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+               data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
+               WREG32(RLC_SERDES_WR_CTRL, data);
+
+               cik_update_rlc(rdev, tmp);
+
+               if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
+                       orig = data = RREG32(CGTS_SM_CTRL_REG);
+                       data &= ~SM_MODE_MASK;
+                       data |= SM_MODE(0x2);
+                       data |= SM_MODE_ENABLE;
+                       data &= ~CGTS_OVERRIDE;
+                       if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
+                           (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
+                               data &= ~CGTS_LS_OVERRIDE;
+                       data &= ~ON_MONITOR_ADD_MASK;
+                       data |= ON_MONITOR_ADD_EN;
+                       data |= ON_MONITOR_ADD(0x96);
+                       if (orig != data)
+                               WREG32(CGTS_SM_CTRL_REG, data);
+               }
+       } else {
+               orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+               data |= 0x00000002;
+               if (orig != data)
+                       WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+
+               data = RREG32(RLC_MEM_SLP_CNTL);
+               if (data & RLC_MEM_LS_EN) {
+                       data &= ~RLC_MEM_LS_EN;
+                       WREG32(RLC_MEM_SLP_CNTL, data);
+               }
+
+               data = RREG32(CP_MEM_SLP_CNTL);
+               if (data & CP_MEM_LS_EN) {
+                       data &= ~CP_MEM_LS_EN;
+                       WREG32(CP_MEM_SLP_CNTL, data);
+               }
+
+               orig = data = RREG32(CGTS_SM_CTRL_REG);
+               data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
+               if (orig != data)
+                       WREG32(CGTS_SM_CTRL_REG, data);
+
+               tmp = cik_halt_rlc(rdev);
+
+               cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+               WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+               WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+               data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
+               WREG32(RLC_SERDES_WR_CTRL, data);
+
+               cik_update_rlc(rdev, tmp);
        }
-       /* Lockout access through VGA aperture*/
-       WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
-       /* Update configuration */
-       WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
-              rdev->mc.vram_start >> 12);
-       WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-              rdev->mc.vram_end >> 12);
-       WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
-              rdev->vram_scratch.gpu_addr >> 12);
-       tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
-       tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
-       WREG32(MC_VM_FB_LOCATION, tmp);
-       /* XXX double check these! */
-       WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
-       WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
-       WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
-       WREG32(MC_VM_AGP_BASE, 0);
-       WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
-       WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
-       if (radeon_mc_wait_for_idle(rdev)) {
-               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+}
+
+static const u32 mc_cg_registers[] =
+{
+       MC_HUB_MISC_HUB_CG,
+       MC_HUB_MISC_SIP_CG,
+       MC_HUB_MISC_VM_CG,
+       MC_XPB_CLK_GAT,
+       ATC_MISC_CG,
+       MC_CITF_MISC_WR_CG,
+       MC_CITF_MISC_RD_CG,
+       MC_CITF_MISC_VM_CG,
+       VM_L2_CG,
+};
+
+static void cik_enable_mc_ls(struct radeon_device *rdev,
+                            bool enable)
+{
+       int i;
+       u32 orig, data;
+
+       for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+               orig = data = RREG32(mc_cg_registers[i]);
+               if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
+                       data |= MC_LS_ENABLE;
+               else
+                       data &= ~MC_LS_ENABLE;
+               if (data != orig)
+                       WREG32(mc_cg_registers[i], data);
+       }
+}
+
+static void cik_enable_mc_mgcg(struct radeon_device *rdev,
+                              bool enable)
+{
+       int i;
+       u32 orig, data;
+
+       for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+               orig = data = RREG32(mc_cg_registers[i]);
+               if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
+                       data |= MC_CG_ENABLE;
+               else
+                       data &= ~MC_CG_ENABLE;
+               if (data != orig)
+                       WREG32(mc_cg_registers[i], data);
        }
-       evergreen_mc_resume(rdev, &save);
-       /* we need to own VRAM, so turn off the VGA renderer here
-        * to stop it overwriting our objects */
-       rv515_vga_render_disable(rdev);
 }
 
-/**
- * cik_mc_init - initialize the memory controller driver params
- *
- * @rdev: radeon_device pointer
- *
- * Look up the amount of vram, vram width, and decide how to place
- * vram and gart within the GPU's physical address space (CIK).
- * Returns 0 for success.
- */
-static int cik_mc_init(struct radeon_device *rdev)
+static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
+                                bool enable)
 {
-       u32 tmp;
-       int chansize, numchan;
+       u32 orig, data;
 
-       /* Get VRAM informations */
-       rdev->mc.vram_is_ddr = true;
-       tmp = RREG32(MC_ARB_RAMCFG);
-       if (tmp & CHANSIZE_MASK) {
-               chansize = 64;
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
+               WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
+               WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
        } else {
-               chansize = 32;
-       }
-       tmp = RREG32(MC_SHARED_CHMAP);
-       switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
-       case 0:
-       default:
-               numchan = 1;
-               break;
-       case 1:
-               numchan = 2;
-               break;
-       case 2:
-               numchan = 4;
-               break;
-       case 3:
-               numchan = 8;
-               break;
-       case 4:
-               numchan = 3;
-               break;
-       case 5:
-               numchan = 6;
-               break;
-       case 6:
-               numchan = 10;
-               break;
-       case 7:
-               numchan = 12;
-               break;
-       case 8:
-               numchan = 16;
-               break;
-       }
-       rdev->mc.vram_width = numchan * chansize;
-       /* Could aper size report 0 ? */
-       rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
-       rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
-       /* size in MB on si */
-       rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
-       rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
-       rdev->mc.visible_vram_size = rdev->mc.aper_size;
-       si_vram_gtt_location(rdev, &rdev->mc);
-       radeon_update_bandwidth_info(rdev);
+               orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
+               data |= 0xff000000;
+               if (data != orig)
+                       WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
 
-       return 0;
+               orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
+               data |= 0xff000000;
+               if (data != orig)
+                       WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
+       }
 }
 
-/*
- * GART
- * VMID 0 is the physical GPU addresses as used by the kernel.
- * VMIDs 1-15 are used for userspace clients and are handled
- * by the radeon vm/hsa code.
- */
-/**
- * cik_pcie_gart_tlb_flush - gart tlb flush callback
- *
- * @rdev: radeon_device pointer
- *
- * Flush the TLB for the VMID 0 page table (CIK).
- */
-void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
+static void cik_enable_sdma_mgls(struct radeon_device *rdev,
+                                bool enable)
 {
-       /* flush hdp cache */
-       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+       u32 orig, data;
+
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
+               orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+               data |= 0x100;
+               if (orig != data)
+                       WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+
+               orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+               data |= 0x100;
+               if (orig != data)
+                       WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+       } else {
+               orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+               data &= ~0x100;
+               if (orig != data)
+                       WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
 
-       /* bits 0-15 are the VM contexts0-15 */
-       WREG32(VM_INVALIDATE_REQUEST, 0x1);
+               orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+               data &= ~0x100;
+               if (orig != data)
+                       WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+       }
 }
 
-/**
- * cik_pcie_gart_enable - gart enable
- *
- * @rdev: radeon_device pointer
- *
- * This sets up the TLBs, programs the page tables for VMID0,
- * sets up the hw for VMIDs 1-15 which are allocated on
- * demand, and sets up the global locations for the LDS, GDS,
- * and GPUVM for FSA64 clients (CIK).
- * Returns 0 for success, errors for failure.
- */
-static int cik_pcie_gart_enable(struct radeon_device *rdev)
+static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
+                               bool enable)
 {
-       int r, i;
+       u32 orig, data;
 
-       if (rdev->gart.robj == NULL) {
-               dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
-               return -EINVAL;
-       }
-       r = radeon_gart_table_vram_pin(rdev);
-       if (r)
-               return r;
-       radeon_gart_restore(rdev);
-       /* Setup TLB control */
-       WREG32(MC_VM_MX_L1_TLB_CNTL,
-              (0xA << 7) |
-              ENABLE_L1_TLB |
-              SYSTEM_ACCESS_MODE_NOT_IN_SYS |
-              ENABLE_ADVANCED_DRIVER_MODEL |
-              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
-       /* Setup L2 cache */
-       WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
-              ENABLE_L2_FRAGMENT_PROCESSING |
-              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
-              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
-              EFFECTIVE_L2_QUEUE_SIZE(7) |
-              CONTEXT1_IDENTITY_ACCESS_MODE(1));
-       WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
-       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
-       /* setup context0 */
-       WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
-       WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
-                       (u32)(rdev->dummy_page.addr >> 12));
-       WREG32(VM_CONTEXT0_CNTL2, 0);
-       WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
-                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
+               data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+               data = 0xfff;
+               WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
 
-       WREG32(0x15D4, 0);
-       WREG32(0x15D8, 0);
-       WREG32(0x15DC, 0);
+               orig = data = RREG32(UVD_CGC_CTRL);
+               data |= DCM;
+               if (orig != data)
+                       WREG32(UVD_CGC_CTRL, data);
+       } else {
+               data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+               data &= ~0xfff;
+               WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
 
-       /* empty context1-15 */
-       /* FIXME start with 4G, once using 2 level pt switch to full
-        * vm size space
-        */
-       /* set vm size, must be a multiple of 4 */
-       WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
-       WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
-       for (i = 1; i < 16; i++) {
-               if (i < 8)
-                       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
-                              rdev->gart.table_addr >> 12);
-               else
-                       WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
-                              rdev->gart.table_addr >> 12);
+               orig = data = RREG32(UVD_CGC_CTRL);
+               data &= ~DCM;
+               if (orig != data)
+                       WREG32(UVD_CGC_CTRL, data);
        }
+}
 
-       /* enable context1-15 */
-       WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
-              (u32)(rdev->dummy_page.addr >> 12));
-       WREG32(VM_CONTEXT1_CNTL2, 4);
-       WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
-                               RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               READ_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
+static void cik_enable_bif_mgls(struct radeon_device *rdev,
+                              bool enable)
+{
+       u32 orig, data;
 
-       /* TC cache setup ??? */
-       WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
-       WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
-       WREG32(TC_CFG_L1_STORE_POLICY, 0);
+       orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
 
-       WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
-       WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
-       WREG32(TC_CFG_L2_STORE_POLICY0, 0);
-       WREG32(TC_CFG_L2_STORE_POLICY1, 0);
-       WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
+               data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
+                       REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
+       else
+               data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
+                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
 
-       WREG32(TC_CFG_L1_VOLATILE, 0);
-       WREG32(TC_CFG_L2_VOLATILE, 0);
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_CNTL2, data);
+}
 
-       if (rdev->family == CHIP_KAVERI) {
-               u32 tmp = RREG32(CHUB_CONTROL);
-               tmp &= ~BYPASS_VM;
-               WREG32(CHUB_CONTROL, tmp);
-       }
+static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
+                               bool enable)
+{
+       u32 orig, data;
 
-       /* XXX SH_MEM regs */
-       /* where to put LDS, scratch, GPUVM in FSA64 space */
-       mutex_lock(&rdev->srbm_mutex);
-       for (i = 0; i < 16; i++) {
-               cik_srbm_select(rdev, 0, 0, 0, i);
-               /* CP and shaders */
-               WREG32(SH_MEM_CONFIG, 0);
-               WREG32(SH_MEM_APE1_BASE, 1);
-               WREG32(SH_MEM_APE1_LIMIT, 0);
-               WREG32(SH_MEM_BASES, 0);
-               /* SDMA GFX */
-               WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
-               WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
-               WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
-               WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
-               /* XXX SDMA RLC - todo */
-       }
-       cik_srbm_select(rdev, 0, 0, 0, 0);
-       mutex_unlock(&rdev->srbm_mutex);
+       orig = data = RREG32(HDP_HOST_PATH_CNTL);
 
-       cik_pcie_gart_tlb_flush(rdev);
-       DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
-                (unsigned)(rdev->mc.gtt_size >> 20),
-                (unsigned long long)rdev->gart.table_addr);
-       rdev->gart.ready = true;
-       return 0;
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
+               data &= ~CLOCK_GATING_DIS;
+       else
+               data |= CLOCK_GATING_DIS;
+
+       if (orig != data)
+               WREG32(HDP_HOST_PATH_CNTL, data);
 }
 
-/**
- * cik_pcie_gart_disable - gart disable
- *
- * @rdev: radeon_device pointer
- *
- * This disables all VM page table (CIK).
- */
-static void cik_pcie_gart_disable(struct radeon_device *rdev)
+static void cik_enable_hdp_ls(struct radeon_device *rdev,
+                             bool enable)
 {
-       /* Disable all tables */
-       WREG32(VM_CONTEXT0_CNTL, 0);
-       WREG32(VM_CONTEXT1_CNTL, 0);
-       /* Setup TLB control */
-       WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
-              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
-       /* Setup L2 cache */
-       WREG32(VM_L2_CNTL,
-              ENABLE_L2_FRAGMENT_PROCESSING |
-              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
-              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
-              EFFECTIVE_L2_QUEUE_SIZE(7) |
-              CONTEXT1_IDENTITY_ACCESS_MODE(1));
-       WREG32(VM_L2_CNTL2, 0);
-       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
-       radeon_gart_table_vram_unpin(rdev);
+       u32 orig, data;
+
+       orig = data = RREG32(HDP_MEM_POWER_LS);
+
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
+               data |= HDP_LS_ENABLE;
+       else
+               data &= ~HDP_LS_ENABLE;
+
+       if (orig != data)
+               WREG32(HDP_MEM_POWER_LS, data);
+}
+
+void cik_update_cg(struct radeon_device *rdev,
+                  u32 block, bool enable)
+{
+       if (block & RADEON_CG_BLOCK_GFX) {
+               /* order matters! */
+               if (enable) {
+                       cik_enable_mgcg(rdev, true);
+                       cik_enable_cgcg(rdev, true);
+               } else {
+                       cik_enable_cgcg(rdev, false);
+                       cik_enable_mgcg(rdev, false);
+               }
+       }
+
+       if (block & RADEON_CG_BLOCK_MC) {
+               if (!(rdev->flags & RADEON_IS_IGP)) {
+                       cik_enable_mc_mgcg(rdev, enable);
+                       cik_enable_mc_ls(rdev, enable);
+               }
+       }
+
+       if (block & RADEON_CG_BLOCK_SDMA) {
+               cik_enable_sdma_mgcg(rdev, enable);
+               cik_enable_sdma_mgls(rdev, enable);
+       }
+
+       if (block & RADEON_CG_BLOCK_BIF) {
+               cik_enable_bif_mgls(rdev, enable);
+       }
+
+       if (block & RADEON_CG_BLOCK_UVD) {
+               if (rdev->has_uvd)
+                       cik_enable_uvd_mgcg(rdev, enable);
+       }
+
+       if (block & RADEON_CG_BLOCK_HDP) {
+               cik_enable_hdp_mgcg(rdev, enable);
+               cik_enable_hdp_ls(rdev, enable);
+       }
 }
 
-/**
- * cik_pcie_gart_fini - vm fini callback
- *
- * @rdev: radeon_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void cik_pcie_gart_fini(struct radeon_device *rdev)
+static void cik_init_cg(struct radeon_device *rdev)
 {
-       cik_pcie_gart_disable(rdev);
-       radeon_gart_table_vram_free(rdev);
-       radeon_gart_fini(rdev);
+
+       cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
+
+       if (rdev->has_uvd)
+               si_init_uvd_internal_cg(rdev);
+
+       cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
+                            RADEON_CG_BLOCK_SDMA |
+                            RADEON_CG_BLOCK_BIF |
+                            RADEON_CG_BLOCK_UVD |
+                            RADEON_CG_BLOCK_HDP), true);
 }
 
-/* vm parser */
-/**
- * cik_ib_parse - vm ib_parse callback
- *
- * @rdev: radeon_device pointer
- * @ib: indirect buffer pointer
- *
- * CIK uses hw IB checking so this is a nop (CIK).
- */
-int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+static void cik_fini_cg(struct radeon_device *rdev)
 {
-       return 0;
+       cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
+                            RADEON_CG_BLOCK_SDMA |
+                            RADEON_CG_BLOCK_BIF |
+                            RADEON_CG_BLOCK_UVD |
+                            RADEON_CG_BLOCK_HDP), false);
+
+       cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
 }
 
-/*
- * vm
- * VMID 0 is the physical GPU addresses as used by the kernel.
- * VMIDs 1-15 are used for userspace clients and are handled
- * by the radeon vm/hsa code.
- */
-/**
- * cik_vm_init - cik vm init callback
- *
- * @rdev: radeon_device pointer
- *
- * Inits cik specific vm parameters (number of VMs, base of vram for
- * VMIDs 1-15) (CIK).
- * Returns 0 for success.
- */
-int cik_vm_init(struct radeon_device *rdev)
+static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
+                                         bool enable)
 {
-       /* number of VMs */
-       rdev->vm_manager.nvm = 16;
-       /* base offset of vram pages */
-       if (rdev->flags & RADEON_IS_IGP) {
-               u64 tmp = RREG32(MC_VM_FB_OFFSET);
-               tmp <<= 22;
-               rdev->vm_manager.vram_base_offset = tmp;
-       } else
-               rdev->vm_manager.vram_base_offset = 0;
+       u32 data, orig;
 
-       return 0;
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
+               data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
+       else
+               data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
 }
 
-/**
- * cik_vm_fini - cik vm fini callback
- *
- * @rdev: radeon_device pointer
- *
- * Tear down any asic specific VM setup (CIK).
- */
-void cik_vm_fini(struct radeon_device *rdev)
+static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
+                                         bool enable)
 {
+       u32 data, orig;
+
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
+               data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
+       else
+               data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
 }
 
-/**
- * cik_vm_decode_fault - print human readable fault info
- *
- * @rdev: radeon_device pointer
- * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
- * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
- *
- * Print human readable fault information (CIK).
- */
-static void cik_vm_decode_fault(struct radeon_device *rdev,
-                               u32 status, u32 addr, u32 mc_client)
+static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
 {
-       u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
-       u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
-       u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
-       char *block = (char *)&mc_client;
+       u32 data, orig;
 
-       printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
-              protections, vmid, addr,
-              (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
-              block, mc_id);
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
+               data &= ~DISABLE_CP_PG;
+       else
+               data |= DISABLE_CP_PG;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
 }
 
-/**
- * cik_vm_flush - cik vm flush using the CP
- *
- * @rdev: radeon_device pointer
- *
- * Update the page table base and flush the VM TLB
- * using the CP (CIK).
- */
-void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
 {
-       struct radeon_ring *ring = &rdev->ring[ridx];
+       u32 data, orig;
 
-       if (vm == NULL)
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
+               data &= ~DISABLE_GDS_PG;
+       else
+               data |= DISABLE_GDS_PG;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
+}
+
+#define CP_ME_TABLE_SIZE    96
+#define CP_ME_TABLE_OFFSET  2048
+#define CP_MEC_TABLE_OFFSET 4096
+
+void cik_init_cp_pg_table(struct radeon_device *rdev)
+{
+       const __be32 *fw_data;
+       volatile u32 *dst_ptr;
+       int me, i, max_me = 4;
+       u32 bo_offset = 0;
+       u32 table_offset;
+
+       if (rdev->family == CHIP_KAVERI)
+               max_me = 5;
+
+       if (rdev->rlc.cp_table_ptr == NULL)
                return;
 
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       if (vm->id < 8) {
-               radeon_ring_write(ring,
-                                 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
-       } else {
-               radeon_ring_write(ring,
-                                 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+       /* write the cp table buffer */
+       dst_ptr = rdev->rlc.cp_table_ptr;
+       for (me = 0; me < max_me; me++) {
+               if (me == 0) {
+                       fw_data = (const __be32 *)rdev->ce_fw->data;
+                       table_offset = CP_ME_TABLE_OFFSET;
+               } else if (me == 1) {
+                       fw_data = (const __be32 *)rdev->pfp_fw->data;
+                       table_offset = CP_ME_TABLE_OFFSET;
+               } else if (me == 2) {
+                       fw_data = (const __be32 *)rdev->me_fw->data;
+                       table_offset = CP_ME_TABLE_OFFSET;
+               } else {
+                       fw_data = (const __be32 *)rdev->mec_fw->data;
+                       table_offset = CP_MEC_TABLE_OFFSET;
+               }
+
+               for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
+                       dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
+               }
+               bo_offset += CP_ME_TABLE_SIZE;
        }
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+}
 
-       /* update SH_MEM_* regs */
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, VMID(vm->id));
+static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
+                               bool enable)
+{
+       u32 data, orig;
+
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
+               orig = data = RREG32(RLC_PG_CNTL);
+               data |= GFX_PG_ENABLE;
+               if (orig != data)
+                       WREG32(RLC_PG_CNTL, data);
+
+               orig = data = RREG32(RLC_AUTO_PG_CTRL);
+               data |= AUTO_PG_EN;
+               if (orig != data)
+                       WREG32(RLC_AUTO_PG_CTRL, data);
+       } else {
+               orig = data = RREG32(RLC_PG_CNTL);
+               data &= ~GFX_PG_ENABLE;
+               if (orig != data)
+                       WREG32(RLC_PG_CNTL, data);
 
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, SH_MEM_BASES >> 2);
-       radeon_ring_write(ring, 0);
+               orig = data = RREG32(RLC_AUTO_PG_CTRL);
+               data &= ~AUTO_PG_EN;
+               if (orig != data)
+                       WREG32(RLC_AUTO_PG_CTRL, data);
 
-       radeon_ring_write(ring, 0); /* SH_MEM_BASES */
-       radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
-       radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
-       radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
+               data = RREG32(DB_RENDER_CONTROL);
+       }
+}
 
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, VMID(0));
+static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
+{
+       u32 mask = 0, tmp, tmp1;
+       int i;
 
-       /* HDP flush */
-       /* We should be using the WAIT_REG_MEM packet here like in
-        * cik_fence_ring_emit(), but it causes the CP to hang in this
-        * context...
-        */
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, 0);
+       cik_select_se_sh(rdev, se, sh);
+       tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
+       tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
+       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 
-       /* bits 0-15 are the VM contexts0-15 */
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, 1 << vm->id);
+       tmp &= 0xffff0000;
 
-       /* compute doesn't have PFP */
-       if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
-               /* sync PFP to ME, otherwise we might get invalid PFP reads */
-               radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
-               radeon_ring_write(ring, 0x0);
+       tmp |= tmp1;
+       tmp >>= 16;
+
+       for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
+               mask <<= 1;
+               mask |= 1;
        }
+
+       return (~tmp) & mask;
 }
 
-/**
- * cik_vm_set_page - update the page tables using sDMA
- *
- * @rdev: radeon_device pointer
- * @ib: indirect buffer to fill with commands
- * @pe: addr of the page entry
- * @addr: dst addr to write into pe
- * @count: number of page entries to update
- * @incr: increase next addr by incr bytes
- * @flags: access flags
- *
- * Update the page tables using CP or sDMA (CIK).
- */
-void cik_vm_set_page(struct radeon_device *rdev,
-                    struct radeon_ib *ib,
-                    uint64_t pe,
-                    uint64_t addr, unsigned count,
-                    uint32_t incr, uint32_t flags)
+static void cik_init_ao_cu_mask(struct radeon_device *rdev)
 {
-       uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
-       uint64_t value;
-       unsigned ndw;
-
-       if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
-               /* CP */
-               while (count) {
-                       ndw = 2 + count * 2;
-                       if (ndw > 0x3FFE)
-                               ndw = 0x3FFE;
+       u32 i, j, k, active_cu_number = 0;
+       u32 mask, counter, cu_bitmap;
+       u32 tmp = 0;
 
-                       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
-                       ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
-                                                   WRITE_DATA_DST_SEL(1));
-                       ib->ptr[ib->length_dw++] = pe;
-                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-                       for (; ndw > 2; ndw -= 2, --count, pe += 8) {
-                               if (flags & RADEON_VM_PAGE_SYSTEM) {
-                                       value = radeon_vm_map_gart(rdev, addr);
-                                       value &= 0xFFFFFFFFFFFFF000ULL;
-                               } else if (flags & RADEON_VM_PAGE_VALID) {
-                                       value = addr;
-                               } else {
-                                       value = 0;
-                               }
-                               addr += incr;
-                               value |= r600_flags;
-                               ib->ptr[ib->length_dw++] = value;
-                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-                       }
-               }
-       } else {
-               /* DMA */
-               if (flags & RADEON_VM_PAGE_SYSTEM) {
-                       while (count) {
-                               ndw = count * 2;
-                               if (ndw > 0xFFFFE)
-                                       ndw = 0xFFFFE;
-
-                               /* for non-physically contiguous pages (system) */
-                               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
-                               ib->ptr[ib->length_dw++] = pe;
-                               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-                               ib->ptr[ib->length_dw++] = ndw;
-                               for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-                                       if (flags & RADEON_VM_PAGE_SYSTEM) {
-                                               value = radeon_vm_map_gart(rdev, addr);
-                                               value &= 0xFFFFFFFFFFFFF000ULL;
-                                       } else if (flags & RADEON_VM_PAGE_VALID) {
-                                               value = addr;
-                                       } else {
-                                               value = 0;
-                                       }
-                                       addr += incr;
-                                       value |= r600_flags;
-                                       ib->ptr[ib->length_dw++] = value;
-                                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
+       for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+                       mask = 1;
+                       cu_bitmap = 0;
+                       counter = 0;
+                       for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
+                               if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
+                                       if (counter < 2)
+                                               cu_bitmap |= mask;
+                                       counter ++;
                                }
+                               mask <<= 1;
                        }
-               } else {
-                       while (count) {
-                               ndw = count;
-                               if (ndw > 0x7FFFF)
-                                       ndw = 0x7FFFF;
 
-                               if (flags & RADEON_VM_PAGE_VALID)
-                                       value = addr;
-                               else
-                                       value = 0;
-                               /* for physically contiguous pages (vram) */
-                               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
-                               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-                               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-                               ib->ptr[ib->length_dw++] = r600_flags; /* mask */
-                               ib->ptr[ib->length_dw++] = 0;
-                               ib->ptr[ib->length_dw++] = value; /* value */
-                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-                               ib->ptr[ib->length_dw++] = incr; /* increment size */
-                               ib->ptr[ib->length_dw++] = 0;
-                               ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-                               pe += ndw * 8;
-                               addr += ndw * incr;
-                               count -= ndw;
-                       }
+                       active_cu_number += counter;
+                       tmp |= (cu_bitmap << (i * 16 + j * 8));
                }
-               while (ib->length_dw & 0x7)
-                       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
        }
+
+       WREG32(RLC_PG_AO_CU_MASK, tmp);
+
+       tmp = RREG32(RLC_MAX_PG_CU);
+       tmp &= ~MAX_PU_CU_MASK;
+       tmp |= MAX_PU_CU(active_cu_number);
+       WREG32(RLC_MAX_PG_CU, tmp);
 }
 
-/**
- * cik_dma_vm_flush - cik vm flush using sDMA
- *
- * @rdev: radeon_device pointer
- *
- * Update the page table base and flush the VM TLB
- * using sDMA (CIK).
- */
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
+                                      bool enable)
 {
-       struct radeon_ring *ring = &rdev->ring[ridx];
-       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
-                         SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
-       u32 ref_and_mask;
+       u32 data, orig;
+
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
+               data |= STATIC_PER_CU_PG_ENABLE;
+       else
+               data &= ~STATIC_PER_CU_PG_ENABLE;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
+}
 
-       if (vm == NULL)
-               return;
+static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
+                                       bool enable)
+{
+       u32 data, orig;
 
-       if (ridx == R600_RING_TYPE_DMA_INDEX)
-               ref_and_mask = SDMA0;
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
+               data |= DYN_PER_CU_PG_ENABLE;
        else
-               ref_and_mask = SDMA1;
+               data &= ~DYN_PER_CU_PG_ENABLE;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
+}
 
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       if (vm->id < 8) {
-               radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
+#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
+
+static void cik_init_gfx_cgpg(struct radeon_device *rdev)
+{
+       u32 data, orig;
+       u32 i;
+
+       if (rdev->rlc.cs_data) {
+               WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+               WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
+               WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
+               WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
        } else {
-               radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+               WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+               for (i = 0; i < 3; i++)
+                       WREG32(RLC_GPM_SCRATCH_DATA, 0);
+       }
+       if (rdev->rlc.reg_list) {
+               WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
+               for (i = 0; i < rdev->rlc.reg_list_size; i++)
+                       WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
        }
-       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
-
-       /* update SH_MEM_* regs */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-       radeon_ring_write(ring, VMID(vm->id));
 
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SH_MEM_BASES >> 2);
-       radeon_ring_write(ring, 0);
+       orig = data = RREG32(RLC_PG_CNTL);
+       data |= GFX_PG_SRC;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
 
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
-       radeon_ring_write(ring, 0);
+       WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+       WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
 
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
-       radeon_ring_write(ring, 1);
+       data = RREG32(CP_RB_WPTR_POLL_CNTL);
+       data &= ~IDLE_POLL_COUNT_MASK;
+       data |= IDLE_POLL_COUNT(0x60);
+       WREG32(CP_RB_WPTR_POLL_CNTL, data);
 
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
-       radeon_ring_write(ring, 0);
+       data = 0x10101010;
+       WREG32(RLC_PG_DELAY, data);
 
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-       radeon_ring_write(ring, VMID(0));
+       data = RREG32(RLC_PG_DELAY_2);
+       data &= ~0xff;
+       data |= 0x3;
+       WREG32(RLC_PG_DELAY_2, data);
 
-       /* flush HDP */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
-       radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
-       radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
-       radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
-       radeon_ring_write(ring, ref_and_mask); /* MASK */
-       radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
+       data = RREG32(RLC_AUTO_PG_CTRL);
+       data &= ~GRBM_REG_SGIT_MASK;
+       data |= GRBM_REG_SGIT(0x700);
+       WREG32(RLC_AUTO_PG_CTRL, data);
 
-       /* flush TLB */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
-       radeon_ring_write(ring, 1 << vm->id);
 }
 
-/*
- * RLC
- * The RLC is a multi-purpose microengine that handles a
- * variety of functions, the most important of which is
- * the interrupt controller.
- */
-/**
- * cik_rlc_stop - stop the RLC ME
- *
- * @rdev: radeon_device pointer
- *
- * Halt the RLC ME (MicroEngine) (CIK).
- */
-static void cik_rlc_stop(struct radeon_device *rdev)
+static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
 {
-       int i, j, k;
-       u32 mask, tmp;
-
-       tmp = RREG32(CP_INT_CNTL_RING0);
-       tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
-       WREG32(CP_INT_CNTL_RING0, tmp);
-
-       RREG32(CB_CGTT_SCLK_CTRL);
-       RREG32(CB_CGTT_SCLK_CTRL);
-       RREG32(CB_CGTT_SCLK_CTRL);
-       RREG32(CB_CGTT_SCLK_CTRL);
+       cik_enable_gfx_cgpg(rdev, enable);
+       cik_enable_gfx_static_mgpg(rdev, enable);
+       cik_enable_gfx_dynamic_mgpg(rdev, enable);
+}
 
-       tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
-       WREG32(RLC_CGCG_CGLS_CTRL, tmp);
+u32 cik_get_csb_size(struct radeon_device *rdev)
+{
+       u32 count = 0;
+       const struct cs_section_def *sect = NULL;
+       const struct cs_extent_def *ext = NULL;
 
-       WREG32(RLC_CNTL, 0);
+       if (rdev->rlc.cs_data == NULL)
+               return 0;
 
-       for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
-               for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
-                       cik_select_se_sh(rdev, i, j);
-                       for (k = 0; k < rdev->usec_timeout; k++) {
-                               if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
-                                       break;
-                               udelay(1);
-                       }
+       /* begin clear state */
+       count += 2;
+       /* context control state */
+       count += 3;
+
+       for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+               for (ext = sect->section; ext->extent != NULL; ++ext) {
+                       if (sect->id == SECT_CONTEXT)
+                               count += 2 + ext->reg_count;
+                       else
+                               return 0;
                }
        }
-       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+       /* pa_sc_raster_config/pa_sc_raster_config1 */
+       count += 4;
+       /* end clear state */
+       count += 2;
+       /* clear state */
+       count += 2;
 
-       mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
-       for (k = 0; k < rdev->usec_timeout; k++) {
-               if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
-                       break;
-               udelay(1);
-       }
+       return count;
 }
 
-/**
- * cik_rlc_start - start the RLC ME
- *
- * @rdev: radeon_device pointer
- *
- * Unhalt the RLC ME (MicroEngine) (CIK).
- */
-static void cik_rlc_start(struct radeon_device *rdev)
+void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
 {
-       u32 tmp;
-
-       WREG32(RLC_CNTL, RLC_ENABLE);
-
-       tmp = RREG32(CP_INT_CNTL_RING0);
-       tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
-       WREG32(CP_INT_CNTL_RING0, tmp);
-
-       udelay(50);
-}
+       u32 count = 0, i;
+       const struct cs_section_def *sect = NULL;
+       const struct cs_extent_def *ext = NULL;
 
-/**
- * cik_rlc_resume - setup the RLC hw
- *
- * @rdev: radeon_device pointer
- *
- * Initialize the RLC registers, load the ucode,
- * and start the RLC (CIK).
- * Returns 0 for success, -EINVAL if the ucode is not available.
- */
-static int cik_rlc_resume(struct radeon_device *rdev)
-{
-       u32 i, size;
-       u32 clear_state_info[3];
-       const __be32 *fw_data;
+       if (rdev->rlc.cs_data == NULL)
+               return;
+       if (buffer == NULL)
+               return;
 
-       if (!rdev->rlc_fw)
-               return -EINVAL;
+       buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+       buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
+
+       buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
+       buffer[count++] = 0x80000000;
+       buffer[count++] = 0x80000000;
+
+       for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+               for (ext = sect->section; ext->extent != NULL; ++ext) {
+                       if (sect->id == SECT_CONTEXT) {
+                               buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
+                               buffer[count++] = ext->reg_index - 0xa000;
+                               for (i = 0; i < ext->reg_count; i++)
+                                       buffer[count++] = ext->extent[i];
+                       } else {
+                               return;
+                       }
+               }
+       }
 
+       buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
+       buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
        switch (rdev->family) {
        case CHIP_BONAIRE:
-       default:
-               size = BONAIRE_RLC_UCODE_SIZE;
+               buffer[count++] = 0x16000012;
+               buffer[count++] = 0x00000000;
                break;
        case CHIP_KAVERI:
-               size = KV_RLC_UCODE_SIZE;
+               buffer[count++] = 0x00000000; /* XXX */
+               buffer[count++] = 0x00000000;
                break;
        case CHIP_KABINI:
-               size = KB_RLC_UCODE_SIZE;
+               buffer[count++] = 0x00000000; /* XXX */
+               buffer[count++] = 0x00000000;
+               break;
+       default:
+               buffer[count++] = 0x00000000;
+               buffer[count++] = 0x00000000;
                break;
        }
 
-       cik_rlc_stop(rdev);
-
-       WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
-       RREG32(GRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(GRBM_SOFT_RESET, 0);
-       RREG32(GRBM_SOFT_RESET);
-       udelay(50);
-
-       WREG32(RLC_LB_CNTR_INIT, 0);
-       WREG32(RLC_LB_CNTR_MAX, 0x00008000);
-
-       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
-       WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
-       WREG32(RLC_LB_PARAMS, 0x00600408);
-       WREG32(RLC_LB_CNTL, 0x80000004);
-
-       WREG32(RLC_MC_CNTL, 0);
-       WREG32(RLC_UCODE_CNTL, 0);
-
-       fw_data = (const __be32 *)rdev->rlc_fw->data;
-               WREG32(RLC_GPM_UCODE_ADDR, 0);
-       for (i = 0; i < size; i++)
-               WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
-       WREG32(RLC_GPM_UCODE_ADDR, 0);
+       buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+       buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
 
-       /* XXX */
-       clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
-       clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
-       clear_state_info[2] = 0;//cik_default_size;
-       WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
-       for (i = 0; i < 3; i++)
-               WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
-       WREG32(RLC_DRIVER_DMA_STATUS, 0);
+       buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
+       buffer[count++] = 0;
+}
 
-       cik_rlc_start(rdev);
+static void cik_init_pg(struct radeon_device *rdev)
+{
+       if (rdev->pg_flags) {
+               cik_enable_sck_slowdown_on_pu(rdev, true);
+               cik_enable_sck_slowdown_on_pd(rdev, true);
+               if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+                       cik_init_gfx_cgpg(rdev);
+                       cik_enable_cp_pg(rdev, true);
+                       cik_enable_gds_pg(rdev, true);
+               }
+               cik_init_ao_cu_mask(rdev);
+               cik_update_gfx_pg(rdev, true);
+       }
+}
 
-       return 0;
+static void cik_fini_pg(struct radeon_device *rdev)
+{
+       if (rdev->pg_flags) {
+               cik_update_gfx_pg(rdev, false);
+               if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+                       cik_enable_cp_pg(rdev, false);
+                       cik_enable_gds_pg(rdev, false);
+               }
+       }
 }
 
 /*
@@ -5040,7 +5988,7 @@ static int cik_irq_init(struct radeon_device *rdev)
        WREG32(INTERRUPT_CNTL, interrupt_cntl);
 
        WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
-       rb_bufsz = drm_order(rdev->ih.ring_size / 4);
+       rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
 
        ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
                      IH_WPTR_OVERFLOW_CLEAR |
@@ -5096,6 +6044,7 @@ int cik_irq_set(struct radeon_device *rdev)
        u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
        u32 grbm_int_cntl = 0;
        u32 dma_cntl, dma_cntl1;
+       u32 thermal_int;
 
        if (!rdev->irq.installed) {
                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -5128,6 +6077,13 @@ int cik_irq_set(struct radeon_device *rdev)
        cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
        cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
 
+       if (rdev->flags & RADEON_IS_IGP)
+               thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
+                       ~(THERM_INTH_MASK | THERM_INTL_MASK);
+       else
+               thermal_int = RREG32_SMC(CG_THERMAL_INT) &
+                       ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
+
        /* enable CP interrupts on all rings */
        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
                DRM_DEBUG("cik_irq_set: sw int gfx\n");
@@ -5285,6 +6241,14 @@ int cik_irq_set(struct radeon_device *rdev)
                hpd6 |= DC_HPDx_INT_EN;
        }
 
+       if (rdev->irq.dpm_thermal) {
+               DRM_DEBUG("dpm thermal\n");
+               if (rdev->flags & RADEON_IS_IGP)
+                       thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
+               else
+                       thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
+       }
+
        WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
 
        WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
@@ -5319,6 +6283,11 @@ int cik_irq_set(struct radeon_device *rdev)
        WREG32(DC_HPD5_INT_CONTROL, hpd5);
        WREG32(DC_HPD6_INT_CONTROL, hpd6);
 
+       if (rdev->flags & RADEON_IS_IGP)
+               WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
+       else
+               WREG32_SMC(CG_THERMAL_INT, thermal_int);
+
        return 0;
 }
 
@@ -5530,6 +6499,7 @@ int cik_irq_process(struct radeon_device *rdev)
        bool queue_hotplug = false;
        bool queue_reset = false;
        u32 addr, status, mc_client;
+       bool queue_thermal = false;
 
        if (!rdev->ih.enabled || rdev->shutdown)
                return IRQ_NONE;
@@ -5763,6 +6733,10 @@ restart_ih:
                                break;
                        }
                        break;
+               case 124: /* UVD */
+                       DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
+                       radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
+                       break;
                case 146:
                case 147:
                        addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
@@ -5880,6 +6854,19 @@ restart_ih:
                                break;
                        }
                        break;
+               case 230: /* thermal low to high */
+                       DRM_DEBUG("IH: thermal low to high\n");
+                       rdev->pm.dpm.thermal.high_to_low = false;
+                       queue_thermal = true;
+                       break;
+               case 231: /* thermal high to low */
+                       DRM_DEBUG("IH: thermal high to low\n");
+                       rdev->pm.dpm.thermal.high_to_low = true;
+                       queue_thermal = true;
+                       break;
+               case 233: /* GUI IDLE */
+                       DRM_DEBUG("IH: GUI idle\n");
+                       break;
                case 241: /* SDMA Privileged inst */
                case 247: /* SDMA Privileged inst */
                        DRM_ERROR("Illegal instruction in SDMA command stream\n");
@@ -5919,9 +6906,6 @@ restart_ih:
                                break;
                        }
                        break;
-               case 233: /* GUI IDLE */
-                       DRM_DEBUG("IH: GUI idle\n");
-                       break;
                default:
                        DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
                        break;
@@ -5935,6 +6919,8 @@ restart_ih:
                schedule_work(&rdev->hotplug_work);
        if (queue_reset)
                schedule_work(&rdev->reset_work);
+       if (queue_thermal)
+               schedule_work(&rdev->pm.dpm.thermal.work);
        rdev->ih.rptr = rptr;
        WREG32(IH_RB_RPTR, rdev->ih.rptr);
        atomic_set(&rdev->ih.lock, 0);
@@ -5964,6 +6950,16 @@ static int cik_startup(struct radeon_device *rdev)
        struct radeon_ring *ring;
        int r;
 
+       /* enable pcie gen2/3 link */
+       cik_pcie_gen3_enable(rdev);
+       /* enable aspm */
+       cik_program_aspm(rdev);
+
+       /* scratch needs to be initialized before MC */
+       r = r600_vram_scratch_init(rdev);
+       if (r)
+               return r;
+
        cik_mc_program(rdev);
 
        if (rdev->flags & RADEON_IS_IGP) {
@@ -5993,17 +6989,26 @@ static int cik_startup(struct radeon_device *rdev)
                }
        }
 
-       r = r600_vram_scratch_init(rdev);
-       if (r)
-               return r;
-
        r = cik_pcie_gart_enable(rdev);
        if (r)
                return r;
        cik_gpu_init(rdev);
 
        /* allocate rlc buffers */
-       r = si_rlc_init(rdev);
+       if (rdev->flags & RADEON_IS_IGP) {
+               if (rdev->family == CHIP_KAVERI) {
+                       rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
+                       rdev->rlc.reg_list_size =
+                               (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
+               } else {
+                       rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
+                       rdev->rlc.reg_list_size =
+                               (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
+               }
+       }
+       rdev->rlc.cs_data = ci_cs_data;
+       rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+       r = sumo_rlc_init(rdev);
        if (r) {
                DRM_ERROR("Failed to init rlc BOs!\n");
                return r;
@@ -6051,12 +7056,15 @@ static int cik_startup(struct radeon_device *rdev)
                return r;
        }
 
-       r = cik_uvd_resume(rdev);
+       r = radeon_uvd_resume(rdev);
        if (!r) {
-               r = radeon_fence_driver_start_ring(rdev,
-                                                  R600_RING_TYPE_UVD_INDEX);
-               if (r)
-                       dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+               r = uvd_v4_2_resume(rdev);
+               if (!r) {
+                       r = radeon_fence_driver_start_ring(rdev,
+                                                          R600_RING_TYPE_UVD_INDEX);
+                       if (r)
+                               dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+               }
        }
        if (r)
                rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
@@ -6079,7 +7087,7 @@ static int cik_startup(struct radeon_device *rdev)
        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
                             CP_RB0_RPTR, CP_RB0_WPTR,
-                            0, 0xfffff, RADEON_CP_PACKET2);
+                            RADEON_CP_PACKET2);
        if (r)
                return r;
 
@@ -6088,7 +7096,7 @@ static int cik_startup(struct radeon_device *rdev)
        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
                             CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
-                            0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
+                            PACKET3(PACKET3_NOP, 0x3FFF));
        if (r)
                return r;
        ring->me = 1; /* first MEC */
@@ -6100,7 +7108,7 @@ static int cik_startup(struct radeon_device *rdev)
        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
                             CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
-                            0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
+                            PACKET3(PACKET3_NOP, 0x3FFF));
        if (r)
                return r;
        /* dGPU only have 1 MEC */
@@ -6113,7 +7121,7 @@ static int cik_startup(struct radeon_device *rdev)
        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
                             SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
                             SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
-                            2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+                            SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
        if (r)
                return r;
 
@@ -6121,7 +7129,7 @@ static int cik_startup(struct radeon_device *rdev)
        r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
                             SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
                             SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
-                            2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+                            SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
        if (r)
                return r;
 
@@ -6135,12 +7143,11 @@ static int cik_startup(struct radeon_device *rdev)
 
        ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
        if (ring->ring_size) {
-               r = radeon_ring_init(rdev, ring, ring->ring_size,
-                                    R600_WB_UVD_RPTR_OFFSET,
+               r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
                                     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-                                    0, 0xfffff, RADEON_CP_PACKET2);
+                                    RADEON_CP_PACKET2);
                if (!r)
-                       r = r600_uvd_init(rdev);
+                       r = uvd_v1_0_init(rdev);
                if (r)
                        DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
        }
@@ -6157,6 +7164,10 @@ static int cik_startup(struct radeon_device *rdev)
                return r;
        }
 
+       r = dce6_audio_init(rdev);
+       if (r)
+               return r;
+
        return 0;
 }
 
@@ -6202,11 +7213,14 @@ int cik_resume(struct radeon_device *rdev)
  */
 int cik_suspend(struct radeon_device *rdev)
 {
+       dce6_audio_fini(rdev);
        radeon_vm_manager_fini(rdev);
        cik_cp_enable(rdev, false);
        cik_sdma_enable(rdev, false);
-       r600_uvd_stop(rdev);
+       uvd_v1_0_fini(rdev);
        radeon_uvd_suspend(rdev);
+       cik_fini_pg(rdev);
+       cik_fini_cg(rdev);
        cik_irq_suspend(rdev);
        radeon_wb_disable(rdev);
        cik_pcie_gart_disable(rdev);
@@ -6327,7 +7341,7 @@ int cik_init(struct radeon_device *rdev)
                cik_cp_fini(rdev);
                cik_sdma_fini(rdev);
                cik_irq_fini(rdev);
-               si_rlc_fini(rdev);
+               sumo_rlc_fini(rdev);
                cik_mec_fini(rdev);
                radeon_wb_fini(rdev);
                radeon_ib_pool_fini(rdev);
@@ -6362,14 +7376,16 @@ void cik_fini(struct radeon_device *rdev)
 {
        cik_cp_fini(rdev);
        cik_sdma_fini(rdev);
+       cik_fini_pg(rdev);
+       cik_fini_cg(rdev);
        cik_irq_fini(rdev);
-       si_rlc_fini(rdev);
+       sumo_rlc_fini(rdev);
        cik_mec_fini(rdev);
        radeon_wb_fini(rdev);
        radeon_vm_manager_fini(rdev);
        radeon_ib_pool_fini(rdev);
        radeon_irq_kms_fini(rdev);
-       r600_uvd_stop(rdev);
+       uvd_v1_0_fini(rdev);
        radeon_uvd_fini(rdev);
        cik_pcie_gart_fini(rdev);
        r600_vram_scratch_fini(rdev);
@@ -6398,8 +7414,8 @@ static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
                                   struct radeon_crtc *radeon_crtc,
                                   struct drm_display_mode *mode)
 {
-       u32 tmp;
-
+       u32 tmp, buffer_alloc, i;
+       u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
        /*
         * Line Buffer Setup
         * There are 6 line buffers, one for each display controllers.
@@ -6409,22 +7425,37 @@ static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
         * them using the stereo blender.
         */
        if (radeon_crtc->base.enabled && mode) {
-               if (mode->crtc_hdisplay < 1920)
+               if (mode->crtc_hdisplay < 1920) {
                        tmp = 1;
-               else if (mode->crtc_hdisplay < 2560)
+                       buffer_alloc = 2;
+               } else if (mode->crtc_hdisplay < 2560) {
                        tmp = 2;
-               else if (mode->crtc_hdisplay < 4096)
+                       buffer_alloc = 2;
+               } else if (mode->crtc_hdisplay < 4096) {
                        tmp = 0;
-               else {
+                       buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
+               } else {
                        DRM_DEBUG_KMS("Mode too big for LB!\n");
                        tmp = 0;
+                       buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
                }
-       } else
+       } else {
                tmp = 1;
+               buffer_alloc = 0;
+       }
 
        WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
               LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
 
+       WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+              DMIF_BUFFERS_ALLOCATED(buffer_alloc));
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+                   DMIF_BUFFERS_ALLOCATED_COMPLETED)
+                       break;
+               udelay(1);
+       }
+
        if (radeon_crtc->base.enabled && mode) {
                switch (tmp) {
                case 0:
@@ -6826,7 +7857,7 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
                                    u32 lb_size, u32 num_heads)
 {
        struct drm_display_mode *mode = &radeon_crtc->base.mode;
-       struct dce8_wm_params wm;
+       struct dce8_wm_params wm_low, wm_high;
        u32 pixel_period;
        u32 line_time = 0;
        u32 latency_watermark_a = 0, latency_watermark_b = 0;
@@ -6836,35 +7867,82 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
                pixel_period = 1000000 / (u32)mode->clock;
                line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
 
-               wm.yclk = rdev->pm.current_mclk * 10;
-               wm.sclk = rdev->pm.current_sclk * 10;
-               wm.disp_clk = mode->clock;
-               wm.src_width = mode->crtc_hdisplay;
-               wm.active_time = mode->crtc_hdisplay * pixel_period;
-               wm.blank_time = line_time - wm.active_time;
-               wm.interlaced = false;
+               /* watermark for high clocks */
+               if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
+                   rdev->pm.dpm_enabled) {
+                       wm_high.yclk =
+                               radeon_dpm_get_mclk(rdev, false) * 10;
+                       wm_high.sclk =
+                               radeon_dpm_get_sclk(rdev, false) * 10;
+               } else {
+                       wm_high.yclk = rdev->pm.current_mclk * 10;
+                       wm_high.sclk = rdev->pm.current_sclk * 10;
+               }
+
+               wm_high.disp_clk = mode->clock;
+               wm_high.src_width = mode->crtc_hdisplay;
+               wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+               wm_high.blank_time = line_time - wm_high.active_time;
+               wm_high.interlaced = false;
                if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-                       wm.interlaced = true;
-               wm.vsc = radeon_crtc->vsc;
-               wm.vtaps = 1;
+                       wm_high.interlaced = true;
+               wm_high.vsc = radeon_crtc->vsc;
+               wm_high.vtaps = 1;
                if (radeon_crtc->rmx_type != RMX_OFF)
-                       wm.vtaps = 2;
-               wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
-               wm.lb_size = lb_size;
-               wm.dram_channels = cik_get_number_of_dram_channels(rdev);
-               wm.num_heads = num_heads;
+                       wm_high.vtaps = 2;
+               wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+               wm_high.lb_size = lb_size;
+               wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
+               wm_high.num_heads = num_heads;
 
                /* set for high clocks */
-               latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
+               latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
+
+               /* possibly force display priority to high */
+               /* should really do this at mode validation time... */
+               if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+                   !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+                   !dce8_check_latency_hiding(&wm_high) ||
+                   (rdev->disp_priority == 2)) {
+                       DRM_DEBUG_KMS("force priority to high\n");
+               }
+
+               /* watermark for low clocks */
+               if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
+                   rdev->pm.dpm_enabled) {
+                       wm_low.yclk =
+                               radeon_dpm_get_mclk(rdev, true) * 10;
+                       wm_low.sclk =
+                               radeon_dpm_get_sclk(rdev, true) * 10;
+               } else {
+                       wm_low.yclk = rdev->pm.current_mclk * 10;
+                       wm_low.sclk = rdev->pm.current_sclk * 10;
+               }
+
+               wm_low.disp_clk = mode->clock;
+               wm_low.src_width = mode->crtc_hdisplay;
+               wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+               wm_low.blank_time = line_time - wm_low.active_time;
+               wm_low.interlaced = false;
+               if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+                       wm_low.interlaced = true;
+               wm_low.vsc = radeon_crtc->vsc;
+               wm_low.vtaps = 1;
+               if (radeon_crtc->rmx_type != RMX_OFF)
+                       wm_low.vtaps = 2;
+               wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+               wm_low.lb_size = lb_size;
+               wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
+               wm_low.num_heads = num_heads;
+
                /* set for low clocks */
-               /* wm.yclk = low clk; wm.sclk = low clk */
-               latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
+               latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
 
                /* possibly force display priority to high */
                /* should really do this at mode validation time... */
-               if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
-                   !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
-                   !dce8_check_latency_hiding(&wm) ||
+               if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+                   !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+                   !dce8_check_latency_hiding(&wm_low) ||
                    (rdev->disp_priority == 2)) {
                        DRM_DEBUG_KMS("force priority to high\n");
                }
@@ -6889,6 +7967,11 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
                LATENCY_HIGH_WATERMARK(line_time)));
        /* restore original selection */
        WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
+
+       /* save values for DPM */
+       radeon_crtc->line_time = line_time;
+       radeon_crtc->wm_high = latency_watermark_a;
+       radeon_crtc->wm_low = latency_watermark_b;
 }
 
 /**
@@ -6978,39 +8061,307 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
        return r;
 }
 
-int cik_uvd_resume(struct radeon_device *rdev)
+static void cik_pcie_gen3_enable(struct radeon_device *rdev)
 {
-       uint64_t addr;
-       uint32_t size;
-       int r;
+       struct pci_dev *root = rdev->pdev->bus->self;
+       int bridge_pos, gpu_pos;
+       u32 speed_cntl, mask, current_data_rate;
+       int ret, i;
+       u16 tmp16;
 
-       r = radeon_uvd_resume(rdev);
-       if (r)
-               return r;
+       if (radeon_pcie_gen2 == 0)
+               return;
 
-       /* programm the VCPU memory controller bits 0-27 */
-       addr = rdev->uvd.gpu_addr >> 3;
-       size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
-       WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
-       WREG32(UVD_VCPU_CACHE_SIZE0, size);
+       if (rdev->flags & RADEON_IS_IGP)
+               return;
 
-       addr += size;
-       size = RADEON_UVD_STACK_SIZE >> 3;
-       WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
-       WREG32(UVD_VCPU_CACHE_SIZE1, size);
+       if (!(rdev->flags & RADEON_IS_PCIE))
+               return;
 
-       addr += size;
-       size = RADEON_UVD_HEAP_SIZE >> 3;
-       WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
-       WREG32(UVD_VCPU_CACHE_SIZE2, size);
+       ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
+       if (ret != 0)
+               return;
 
-       /* bits 28-31 */
-       addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
-       WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+       if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
+               return;
 
-       /* bits 32-39 */
-       addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
-       WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+       speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+       current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
+               LC_CURRENT_DATA_RATE_SHIFT;
+       if (mask & DRM_PCIE_SPEED_80) {
+               if (current_data_rate == 2) {
+                       DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+                       return;
+               }
+               DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
+       } else if (mask & DRM_PCIE_SPEED_50) {
+               if (current_data_rate == 1) {
+                       DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+                       return;
+               }
+               DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
+       }
 
-       return 0;
+       bridge_pos = pci_pcie_cap(root);
+       if (!bridge_pos)
+               return;
+
+       gpu_pos = pci_pcie_cap(rdev->pdev);
+       if (!gpu_pos)
+               return;
+
+       if (mask & DRM_PCIE_SPEED_80) {
+               /* re-try equalization if gen3 is not already enabled */
+               if (current_data_rate != 2) {
+                       u16 bridge_cfg, gpu_cfg;
+                       u16 bridge_cfg2, gpu_cfg2;
+                       u32 max_lw, current_lw, tmp;
+
+                       pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+                       pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+
+                       tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
+                       pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+
+                       tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
+                       pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+
+                       tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
+                       max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
+                       current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+
+                       if (current_lw < max_lw) {
+                               tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+                               if (tmp & LC_RENEGOTIATION_SUPPORT) {
+                                       tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
+                                       tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
+                                       tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
+                                       WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+                               }
+                       }
+
+                       for (i = 0; i < 10; i++) {
+                               /* check status */
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+                               if (tmp16 & PCI_EXP_DEVSTA_TRPND)
+                                       break;
+
+                               pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+
+                               pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+
+                               tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+                               tmp |= LC_SET_QUIESCE;
+                               WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+                               tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+                               tmp |= LC_REDO_EQ;
+                               WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+                               mdelay(100);
+
+                               /* linkctl */
+                               pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+                               tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+                               tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
+                               pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+                               tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+                               tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
+                               pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+
+                               /* linkctl2 */
+                               pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
+                               tmp16 &= ~((1 << 4) | (7 << 9));
+                               tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
+                               pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
+
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+                               tmp16 &= ~((1 << 4) | (7 << 9));
+                               tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
+                               pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+
+                               tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+                               tmp &= ~LC_SET_QUIESCE;
+                               WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+                       }
+               }
+       }
+
+       /* set the link speed */
+       speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
+       speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
+       WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+       pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+       tmp16 &= ~0xf;
+       if (mask & DRM_PCIE_SPEED_80)
+               tmp16 |= 3; /* gen3 */
+       else if (mask & DRM_PCIE_SPEED_50)
+               tmp16 |= 2; /* gen2 */
+       else
+               tmp16 |= 1; /* gen1 */
+       pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+
+       speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+       speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
+       WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+               if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+                       break;
+               udelay(1);
+       }
+}
+
+static void cik_program_aspm(struct radeon_device *rdev)
+{
+       u32 data, orig;
+       bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+       bool disable_clkreq = false;
+
+       if (radeon_aspm == 0)
+               return;
+
+       /* XXX double check IGPs */
+       if (rdev->flags & RADEON_IS_IGP)
+               return;
+
+       if (!(rdev->flags & RADEON_IS_PCIE))
+               return;
+
+       orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+       data &= ~LC_XMIT_N_FTS_MASK;
+       data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+
+       orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
+       data |= LC_GO_TO_RECOVERY;
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+
+       orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
+       data |= P_IGNORE_EDB_ERR;
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_P_CNTL, data);
+
+       orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+       data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
+       data |= LC_PMI_TO_L1_DIS;
+       if (!disable_l0s)
+               data |= LC_L0S_INACTIVITY(7);
+
+       if (!disable_l1) {
+               data |= LC_L1_INACTIVITY(7);
+               data &= ~LC_PMI_TO_L1_DIS;
+               if (orig != data)
+                       WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+
+               if (!disable_plloff_in_l1) {
+                       bool clk_req_support;
+
+                       orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
+                       data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+                       data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
+
+                       orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
+                       data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+                       data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
+
+                       orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
+                       data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+                       data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
+
+                       orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
+                       data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+                       data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
+
+                       orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+                       data &= ~LC_DYN_LANES_PWR_STATE_MASK;
+                       data |= LC_DYN_LANES_PWR_STATE(3);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+
+                       if (!disable_clkreq) {
+                               struct pci_dev *root = rdev->pdev->bus->self;
+                               u32 lnkcap;
+
+                               clk_req_support = false;
+                               pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
+                               if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
+                                       clk_req_support = true;
+                       } else {
+                               clk_req_support = false;
+                       }
+
+                       if (clk_req_support) {
+                               orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
+                               data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+                               if (orig != data)
+                                       WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+
+                               orig = data = RREG32_SMC(THM_CLK_CNTL);
+                               data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
+                               data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+                               if (orig != data)
+                                       WREG32_SMC(THM_CLK_CNTL, data);
+
+                               orig = data = RREG32_SMC(MISC_CLK_CTRL);
+                               data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
+                               data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+                               if (orig != data)
+                                       WREG32_SMC(MISC_CLK_CTRL, data);
+
+                               orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
+                               data &= ~BCLK_AS_XCLK;
+                               if (orig != data)
+                                       WREG32_SMC(CG_CLKPIN_CNTL, data);
+
+                               orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
+                               data &= ~FORCE_BIF_REFCLK_EN;
+                               if (orig != data)
+                                       WREG32_SMC(CG_CLKPIN_CNTL_2, data);
+
+                               orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
+                               data &= ~MPLL_CLKOUT_SEL_MASK;
+                               data |= MPLL_CLKOUT_SEL(4);
+                               if (orig != data)
+                                       WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
+                       }
+               }
+       } else {
+               if (orig != data)
+                       WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+       }
+
+       orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
+       data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_CNTL2, data);
+
+       if (!disable_l0s) {
+               data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+               if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
+                       data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
+                       if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
+                               orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+                               data &= ~LC_L0S_INACTIVITY_MASK;
+                               if (orig != data)
+                                       WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+                       }
+               }
+       }
 }