cgroup: introduce cgroup_subsys->legacy_name
[cascardo/linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142                                           bool enable);
143
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155                                   u32 reg, u32 *val)
156 {
157         switch (reg) {
158         case GRBM_STATUS:
159         case GRBM_STATUS2:
160         case GRBM_STATUS_SE0:
161         case GRBM_STATUS_SE1:
162         case GRBM_STATUS_SE2:
163         case GRBM_STATUS_SE3:
164         case SRBM_STATUS:
165         case SRBM_STATUS2:
166         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168         case UVD_STATUS:
169         /* TODO VCE */
170                 *val = RREG32(reg);
171                 return 0;
172         default:
173                 return -EINVAL;
174         }
175 }
176
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182         unsigned long flags;
183         u32 r;
184
185         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186         WREG32(CIK_DIDT_IND_INDEX, (reg));
187         r = RREG32(CIK_DIDT_IND_DATA);
188         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189         return r;
190 }
191
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194         unsigned long flags;
195
196         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197         WREG32(CIK_DIDT_IND_INDEX, (reg));
198         WREG32(CIK_DIDT_IND_DATA, (v));
199         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205         u32 temp;
206         int actual_temp = 0;
207
208         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209                 CTF_TEMP_SHIFT;
210
211         if (temp & 0x200)
212                 actual_temp = 255;
213         else
214                 actual_temp = temp & 0x1ff;
215
216         actual_temp = actual_temp * 1000;
217
218         return actual_temp;
219 }
220
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224         u32 temp;
225         int actual_temp = 0;
226
227         temp = RREG32_SMC(0xC0300E0C);
228
229         if (temp)
230                 actual_temp = (temp / 8) - 49;
231         else
232                 actual_temp = 0;
233
234         actual_temp = actual_temp * 1000;
235
236         return actual_temp;
237 }
238
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244         unsigned long flags;
245         u32 r;
246
247         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248         WREG32(PCIE_INDEX, reg);
249         (void)RREG32(PCIE_INDEX);
250         r = RREG32(PCIE_DATA);
251         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252         return r;
253 }
254
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257         unsigned long flags;
258
259         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260         WREG32(PCIE_INDEX, reg);
261         (void)RREG32(PCIE_INDEX);
262         WREG32(PCIE_DATA, v);
263         (void)RREG32(PCIE_DATA);
264         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269         (0x0e00 << 16) | (0xc12c >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc140 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc150 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc15c >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc168 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc170 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc178 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc204 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc2b4 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc2b8 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2bc >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2c0 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0x8228 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0x829c >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x869c >> 2),
298         0x00000000,
299         (0x0600 << 16) | (0x98f4 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x98f8 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0x9900 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc260 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0x90e8 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x3c000 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x3c00c >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x8c1c >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x9700 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xcd20 >> 2),
318         0x00000000,
319         (0x4e00 << 16) | (0xcd20 >> 2),
320         0x00000000,
321         (0x5e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x6e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x7e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x8e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x9e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0xae00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0xbe00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0x89bc >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x8900 >> 2),
338         0x00000000,
339         0x3,
340         (0x0e00 << 16) | (0xc130 >> 2),
341         0x00000000,
342         (0x0e00 << 16) | (0xc134 >> 2),
343         0x00000000,
344         (0x0e00 << 16) | (0xc1fc >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc208 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc264 >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc268 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc26c >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc270 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc274 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc278 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc27c >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc280 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc284 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc288 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc28c >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc290 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc294 >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc298 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc29c >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc2a0 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc2a4 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a8 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2ac  >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2b0 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x301d0 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x30238 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30250 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30254 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30258 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x3025c >> 2),
399         0x00000000,
400         (0x4e00 << 16) | (0xc900 >> 2),
401         0x00000000,
402         (0x5e00 << 16) | (0xc900 >> 2),
403         0x00000000,
404         (0x6e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x7e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x8e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x9e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0xae00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0xbe00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0x4e00 << 16) | (0xc904 >> 2),
417         0x00000000,
418         (0x5e00 << 16) | (0xc904 >> 2),
419         0x00000000,
420         (0x6e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x7e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x8e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x9e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0xae00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0xbe00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0x4e00 << 16) | (0xc908 >> 2),
433         0x00000000,
434         (0x5e00 << 16) | (0xc908 >> 2),
435         0x00000000,
436         (0x6e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x7e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x8e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x9e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0xae00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0xbe00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0x4e00 << 16) | (0xc90c >> 2),
449         0x00000000,
450         (0x5e00 << 16) | (0xc90c >> 2),
451         0x00000000,
452         (0x6e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x7e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x8e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x9e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0xae00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0xbe00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0x4e00 << 16) | (0xc910 >> 2),
465         0x00000000,
466         (0x5e00 << 16) | (0xc910 >> 2),
467         0x00000000,
468         (0x6e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x7e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x8e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x9e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0xae00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0xbe00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0xc99c >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x9834 >> 2),
483         0x00000000,
484         (0x0000 << 16) | (0x30f00 >> 2),
485         0x00000000,
486         (0x0001 << 16) | (0x30f00 >> 2),
487         0x00000000,
488         (0x0000 << 16) | (0x30f04 >> 2),
489         0x00000000,
490         (0x0001 << 16) | (0x30f04 >> 2),
491         0x00000000,
492         (0x0000 << 16) | (0x30f08 >> 2),
493         0x00000000,
494         (0x0001 << 16) | (0x30f08 >> 2),
495         0x00000000,
496         (0x0000 << 16) | (0x30f0c >> 2),
497         0x00000000,
498         (0x0001 << 16) | (0x30f0c >> 2),
499         0x00000000,
500         (0x0600 << 16) | (0x9b7c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0x8a14 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x8a18 >> 2),
505         0x00000000,
506         (0x0600 << 16) | (0x30a00 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8bf0 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x8bcc >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8b24 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x30a04 >> 2),
515         0x00000000,
516         (0x0600 << 16) | (0x30a10 >> 2),
517         0x00000000,
518         (0x0600 << 16) | (0x30a14 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a18 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a2c >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0xc700 >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xc704 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc708 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc768 >> 2),
531         0x00000000,
532         (0x0400 << 16) | (0xc770 >> 2),
533         0x00000000,
534         (0x0400 << 16) | (0xc774 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc778 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc77c >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc780 >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc784 >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc788 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc78c >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc798 >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc79c >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc7a0 >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc7a4 >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a8 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7ac >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7b0 >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7b4 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x9100 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x3c010 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x92a8 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x92ac >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92b4 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92b8 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92bc >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92c0 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92c4 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c8 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92cc >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92d0 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x8c00 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8c04 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c20 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c38 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c3c >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xae00 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x9604 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xac08 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xac0c >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac10 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac14 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac58 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac68 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac6c >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac70 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac74 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac78 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac7c >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac80 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac84 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac88 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac8c >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x970c >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9714 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x9718 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x971c >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x31068 >> 2),
641         0x00000000,
642         (0x4e00 << 16) | (0x31068 >> 2),
643         0x00000000,
644         (0x5e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x6e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x7e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x8e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x9e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0xae00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0xbe00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0xcd10 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xcd14 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0x88b0 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x88b4 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b8 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88bc >> 2),
669         0x00000000,
670         (0x0400 << 16) | (0x89c0 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x88c4 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x88c8 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88d0 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88d4 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d8 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x8980 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x30938 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x3093c >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x30940 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x89a0 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x30900 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30904 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x89b4 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x3c210 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x3c214 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c218 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x8904 >> 2),
705         0x00000000,
706         0x5,
707         (0x0e00 << 16) | (0x8c28 >> 2),
708         (0x0e00 << 16) | (0x8c2c >> 2),
709         (0x0e00 << 16) | (0x8c30 >> 2),
710         (0x0e00 << 16) | (0x8c34 >> 2),
711         (0x0e00 << 16) | (0x9600 >> 2),
712 };
713
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716         (0x0e00 << 16) | (0xc12c >> 2),
717         0x00000000,
718         (0x0e00 << 16) | (0xc140 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc150 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc15c >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc168 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc170 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc204 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc2b4 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc2b8 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2bc >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2c0 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0x8228 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x829c >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x869c >> 2),
743         0x00000000,
744         (0x0600 << 16) | (0x98f4 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x98f8 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x9900 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc260 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x90e8 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x3c000 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x3c00c >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x8c1c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x9700 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0xcd20 >> 2),
763         0x00000000,
764         (0x4e00 << 16) | (0xcd20 >> 2),
765         0x00000000,
766         (0x5e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x6e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x7e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x89bc >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x8900 >> 2),
775         0x00000000,
776         0x3,
777         (0x0e00 << 16) | (0xc130 >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0xc134 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc1fc >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc208 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc264 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc268 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc26c >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc270 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc274 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc28c >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc290 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc294 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc298 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc2a0 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc2a4 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a8 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2ac >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x301d0 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x30238 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x30250 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30254 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30258 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x3025c >> 2),
822         0x00000000,
823         (0x4e00 << 16) | (0xc900 >> 2),
824         0x00000000,
825         (0x5e00 << 16) | (0xc900 >> 2),
826         0x00000000,
827         (0x6e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x7e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x4e00 << 16) | (0xc904 >> 2),
832         0x00000000,
833         (0x5e00 << 16) | (0xc904 >> 2),
834         0x00000000,
835         (0x6e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x7e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x4e00 << 16) | (0xc908 >> 2),
840         0x00000000,
841         (0x5e00 << 16) | (0xc908 >> 2),
842         0x00000000,
843         (0x6e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x7e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x4e00 << 16) | (0xc90c >> 2),
848         0x00000000,
849         (0x5e00 << 16) | (0xc90c >> 2),
850         0x00000000,
851         (0x6e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x7e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x4e00 << 16) | (0xc910 >> 2),
856         0x00000000,
857         (0x5e00 << 16) | (0xc910 >> 2),
858         0x00000000,
859         (0x6e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x7e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0xc99c >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0x9834 >> 2),
866         0x00000000,
867         (0x0000 << 16) | (0x30f00 >> 2),
868         0x00000000,
869         (0x0000 << 16) | (0x30f04 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f08 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f0c >> 2),
874         0x00000000,
875         (0x0600 << 16) | (0x9b7c >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x8a14 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x8a18 >> 2),
880         0x00000000,
881         (0x0600 << 16) | (0x30a00 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8bf0 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x8bcc >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8b24 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x30a04 >> 2),
890         0x00000000,
891         (0x0600 << 16) | (0x30a10 >> 2),
892         0x00000000,
893         (0x0600 << 16) | (0x30a14 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a18 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a2c >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0xc700 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xc704 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc708 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc768 >> 2),
906         0x00000000,
907         (0x0400 << 16) | (0xc770 >> 2),
908         0x00000000,
909         (0x0400 << 16) | (0xc774 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc798 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc79c >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x9100 >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x3c010 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x8c00 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x8c04 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c20 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c38 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c3c >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0xae00 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x9604 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xac08 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0xac0c >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac10 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac14 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac58 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac68 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac6c >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac70 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac74 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac78 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac7c >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac80 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac84 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac88 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac8c >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0x970c >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x9714 >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x9718 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x971c >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x31068 >> 2),
972         0x00000000,
973         (0x4e00 << 16) | (0x31068 >> 2),
974         0x00000000,
975         (0x5e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x6e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x7e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x0e00 << 16) | (0xcd10 >> 2),
982         0x00000000,
983         (0x0e00 << 16) | (0xcd14 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0x88b0 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0x88b4 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b8 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88bc >> 2),
992         0x00000000,
993         (0x0400 << 16) | (0x89c0 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x88c4 >> 2),
996         0x00000000,
997         (0x0e00 << 16) | (0x88c8 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88d0 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88d4 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d8 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x8980 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x30938 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x3093c >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x30940 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x89a0 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30900 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x30904 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x89b4 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x3e1fc >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x3c210 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3c214 >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c218 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x8904 >> 2),
1030         0x00000000,
1031         0x5,
1032         (0x0e00 << 16) | (0x8c28 >> 2),
1033         (0x0e00 << 16) | (0x8c2c >> 2),
1034         (0x0e00 << 16) | (0x8c30 >> 2),
1035         (0x0e00 << 16) | (0x8c34 >> 2),
1036         (0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041         0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046         0xc770, 0xffffffff, 0x00000800,
1047         0xc774, 0xffffffff, 0x00000800,
1048         0xc798, 0xffffffff, 0x00007fbf,
1049         0xc79c, 0xffffffff, 0x00007faf
1050 };
1051
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054         0x3354, 0x00000333, 0x00000333,
1055         0x3350, 0x000c0fc0, 0x00040200,
1056         0x9a10, 0x00010000, 0x00058208,
1057         0x3c000, 0xffff1fff, 0x00140000,
1058         0x3c200, 0xfdfc0fff, 0x00000100,
1059         0x3c234, 0x40000000, 0x40000200,
1060         0x9830, 0xffffffff, 0x00000000,
1061         0x9834, 0xf00fffff, 0x00000400,
1062         0x9838, 0x0002021c, 0x00020200,
1063         0xc78, 0x00000080, 0x00000000,
1064         0x5bb0, 0x000000f0, 0x00000070,
1065         0x5bc0, 0xf0311fff, 0x80300000,
1066         0x98f8, 0x73773777, 0x12010001,
1067         0x350c, 0x00810000, 0x408af000,
1068         0x7030, 0x31000111, 0x00000011,
1069         0x2f48, 0x73773777, 0x12010001,
1070         0x220c, 0x00007fb6, 0x0021a1b1,
1071         0x2210, 0x00007fb6, 0x002021b1,
1072         0x2180, 0x00007fb6, 0x00002191,
1073         0x2218, 0x00007fb6, 0x002121b1,
1074         0x221c, 0x00007fb6, 0x002021b1,
1075         0x21dc, 0x00007fb6, 0x00002191,
1076         0x21e0, 0x00007fb6, 0x00002191,
1077         0x3628, 0x0000003f, 0x0000000a,
1078         0x362c, 0x0000003f, 0x0000000a,
1079         0x2ae4, 0x00073ffe, 0x000022a2,
1080         0x240c, 0x000007ff, 0x00000000,
1081         0x8a14, 0xf000003f, 0x00000007,
1082         0x8bf0, 0x00002001, 0x00000001,
1083         0x8b24, 0xffffffff, 0x00ffffff,
1084         0x30a04, 0x0000ff0f, 0x00000000,
1085         0x28a4c, 0x07ffffff, 0x06000000,
1086         0x4d8, 0x00000fff, 0x00000100,
1087         0x3e78, 0x00000001, 0x00000002,
1088         0x9100, 0x03000000, 0x0362c688,
1089         0x8c00, 0x000000ff, 0x00000001,
1090         0xe40, 0x00001fff, 0x00001fff,
1091         0x9060, 0x0000007f, 0x00000020,
1092         0x9508, 0x00010000, 0x00010000,
1093         0xac14, 0x000003ff, 0x000000f3,
1094         0xac0c, 0xffffffff, 0x00001032
1095 };
1096
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099         0xc420, 0xffffffff, 0xfffffffc,
1100         0x30800, 0xffffffff, 0xe0000000,
1101         0x3c2a0, 0xffffffff, 0x00000100,
1102         0x3c208, 0xffffffff, 0x00000100,
1103         0x3c2c0, 0xffffffff, 0xc0000100,
1104         0x3c2c8, 0xffffffff, 0xc0000100,
1105         0x3c2c4, 0xffffffff, 0xc0000100,
1106         0x55e4, 0xffffffff, 0x00600100,
1107         0x3c280, 0xffffffff, 0x00000100,
1108         0x3c214, 0xffffffff, 0x06000100,
1109         0x3c220, 0xffffffff, 0x00000100,
1110         0x3c218, 0xffffffff, 0x06000100,
1111         0x3c204, 0xffffffff, 0x00000100,
1112         0x3c2e0, 0xffffffff, 0x00000100,
1113         0x3c224, 0xffffffff, 0x00000100,
1114         0x3c200, 0xffffffff, 0x00000100,
1115         0x3c230, 0xffffffff, 0x00000100,
1116         0x3c234, 0xffffffff, 0x00000100,
1117         0x3c250, 0xffffffff, 0x00000100,
1118         0x3c254, 0xffffffff, 0x00000100,
1119         0x3c258, 0xffffffff, 0x00000100,
1120         0x3c25c, 0xffffffff, 0x00000100,
1121         0x3c260, 0xffffffff, 0x00000100,
1122         0x3c27c, 0xffffffff, 0x00000100,
1123         0x3c278, 0xffffffff, 0x00000100,
1124         0x3c210, 0xffffffff, 0x06000100,
1125         0x3c290, 0xffffffff, 0x00000100,
1126         0x3c274, 0xffffffff, 0x00000100,
1127         0x3c2b4, 0xffffffff, 0x00000100,
1128         0x3c2b0, 0xffffffff, 0x00000100,
1129         0x3c270, 0xffffffff, 0x00000100,
1130         0x30800, 0xffffffff, 0xe0000000,
1131         0x3c020, 0xffffffff, 0x00010000,
1132         0x3c024, 0xffffffff, 0x00030002,
1133         0x3c028, 0xffffffff, 0x00040007,
1134         0x3c02c, 0xffffffff, 0x00060005,
1135         0x3c030, 0xffffffff, 0x00090008,
1136         0x3c034, 0xffffffff, 0x00010000,
1137         0x3c038, 0xffffffff, 0x00030002,
1138         0x3c03c, 0xffffffff, 0x00040007,
1139         0x3c040, 0xffffffff, 0x00060005,
1140         0x3c044, 0xffffffff, 0x00090008,
1141         0x3c048, 0xffffffff, 0x00010000,
1142         0x3c04c, 0xffffffff, 0x00030002,
1143         0x3c050, 0xffffffff, 0x00040007,
1144         0x3c054, 0xffffffff, 0x00060005,
1145         0x3c058, 0xffffffff, 0x00090008,
1146         0x3c05c, 0xffffffff, 0x00010000,
1147         0x3c060, 0xffffffff, 0x00030002,
1148         0x3c064, 0xffffffff, 0x00040007,
1149         0x3c068, 0xffffffff, 0x00060005,
1150         0x3c06c, 0xffffffff, 0x00090008,
1151         0x3c070, 0xffffffff, 0x00010000,
1152         0x3c074, 0xffffffff, 0x00030002,
1153         0x3c078, 0xffffffff, 0x00040007,
1154         0x3c07c, 0xffffffff, 0x00060005,
1155         0x3c080, 0xffffffff, 0x00090008,
1156         0x3c084, 0xffffffff, 0x00010000,
1157         0x3c088, 0xffffffff, 0x00030002,
1158         0x3c08c, 0xffffffff, 0x00040007,
1159         0x3c090, 0xffffffff, 0x00060005,
1160         0x3c094, 0xffffffff, 0x00090008,
1161         0x3c098, 0xffffffff, 0x00010000,
1162         0x3c09c, 0xffffffff, 0x00030002,
1163         0x3c0a0, 0xffffffff, 0x00040007,
1164         0x3c0a4, 0xffffffff, 0x00060005,
1165         0x3c0a8, 0xffffffff, 0x00090008,
1166         0x3c000, 0xffffffff, 0x96e00200,
1167         0x8708, 0xffffffff, 0x00900100,
1168         0xc424, 0xffffffff, 0x0020003f,
1169         0x38, 0xffffffff, 0x0140001c,
1170         0x3c, 0x000f0000, 0x000f0000,
1171         0x220, 0xffffffff, 0xC060000C,
1172         0x224, 0xc0000fff, 0x00000100,
1173         0xf90, 0xffffffff, 0x00000100,
1174         0xf98, 0x00000101, 0x00000000,
1175         0x20a8, 0xffffffff, 0x00000104,
1176         0x55e4, 0xff000fff, 0x00000100,
1177         0x30cc, 0xc0000fff, 0x00000104,
1178         0xc1e4, 0x00000001, 0x00000001,
1179         0xd00c, 0xff000ff0, 0x00000100,
1180         0xd80c, 0xff000ff0, 0x00000100
1181 };
1182
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185         0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190         0xc770, 0xffffffff, 0x00000800,
1191         0xc774, 0xffffffff, 0x00000800,
1192         0xc798, 0xffffffff, 0x00007fbf,
1193         0xc79c, 0xffffffff, 0x00007faf
1194 };
1195
1196 static const u32 spectre_golden_registers[] =
1197 {
1198         0x3c000, 0xffff1fff, 0x96940200,
1199         0x3c00c, 0xffff0001, 0xff000000,
1200         0x3c200, 0xfffc0fff, 0x00000100,
1201         0x6ed8, 0x00010101, 0x00010000,
1202         0x9834, 0xf00fffff, 0x00000400,
1203         0x9838, 0xfffffffc, 0x00020200,
1204         0x5bb0, 0x000000f0, 0x00000070,
1205         0x5bc0, 0xf0311fff, 0x80300000,
1206         0x98f8, 0x73773777, 0x12010001,
1207         0x9b7c, 0x00ff0000, 0x00fc0000,
1208         0x2f48, 0x73773777, 0x12010001,
1209         0x8a14, 0xf000003f, 0x00000007,
1210         0x8b24, 0xffffffff, 0x00ffffff,
1211         0x28350, 0x3f3f3fff, 0x00000082,
1212         0x28354, 0x0000003f, 0x00000000,
1213         0x3e78, 0x00000001, 0x00000002,
1214         0x913c, 0xffff03df, 0x00000004,
1215         0xc768, 0x00000008, 0x00000008,
1216         0x8c00, 0x000008ff, 0x00000800,
1217         0x9508, 0x00010000, 0x00010000,
1218         0xac0c, 0xffffffff, 0x54763210,
1219         0x214f8, 0x01ff01ff, 0x00000002,
1220         0x21498, 0x007ff800, 0x00200000,
1221         0x2015c, 0xffffffff, 0x00000f40,
1222         0x30934, 0xffffffff, 0x00000001
1223 };
1224
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227         0xc420, 0xffffffff, 0xfffffffc,
1228         0x30800, 0xffffffff, 0xe0000000,
1229         0x3c2a0, 0xffffffff, 0x00000100,
1230         0x3c208, 0xffffffff, 0x00000100,
1231         0x3c2c0, 0xffffffff, 0x00000100,
1232         0x3c2c8, 0xffffffff, 0x00000100,
1233         0x3c2c4, 0xffffffff, 0x00000100,
1234         0x55e4, 0xffffffff, 0x00600100,
1235         0x3c280, 0xffffffff, 0x00000100,
1236         0x3c214, 0xffffffff, 0x06000100,
1237         0x3c220, 0xffffffff, 0x00000100,
1238         0x3c218, 0xffffffff, 0x06000100,
1239         0x3c204, 0xffffffff, 0x00000100,
1240         0x3c2e0, 0xffffffff, 0x00000100,
1241         0x3c224, 0xffffffff, 0x00000100,
1242         0x3c200, 0xffffffff, 0x00000100,
1243         0x3c230, 0xffffffff, 0x00000100,
1244         0x3c234, 0xffffffff, 0x00000100,
1245         0x3c250, 0xffffffff, 0x00000100,
1246         0x3c254, 0xffffffff, 0x00000100,
1247         0x3c258, 0xffffffff, 0x00000100,
1248         0x3c25c, 0xffffffff, 0x00000100,
1249         0x3c260, 0xffffffff, 0x00000100,
1250         0x3c27c, 0xffffffff, 0x00000100,
1251         0x3c278, 0xffffffff, 0x00000100,
1252         0x3c210, 0xffffffff, 0x06000100,
1253         0x3c290, 0xffffffff, 0x00000100,
1254         0x3c274, 0xffffffff, 0x00000100,
1255         0x3c2b4, 0xffffffff, 0x00000100,
1256         0x3c2b0, 0xffffffff, 0x00000100,
1257         0x3c270, 0xffffffff, 0x00000100,
1258         0x30800, 0xffffffff, 0xe0000000,
1259         0x3c020, 0xffffffff, 0x00010000,
1260         0x3c024, 0xffffffff, 0x00030002,
1261         0x3c028, 0xffffffff, 0x00040007,
1262         0x3c02c, 0xffffffff, 0x00060005,
1263         0x3c030, 0xffffffff, 0x00090008,
1264         0x3c034, 0xffffffff, 0x00010000,
1265         0x3c038, 0xffffffff, 0x00030002,
1266         0x3c03c, 0xffffffff, 0x00040007,
1267         0x3c040, 0xffffffff, 0x00060005,
1268         0x3c044, 0xffffffff, 0x00090008,
1269         0x3c048, 0xffffffff, 0x00010000,
1270         0x3c04c, 0xffffffff, 0x00030002,
1271         0x3c050, 0xffffffff, 0x00040007,
1272         0x3c054, 0xffffffff, 0x00060005,
1273         0x3c058, 0xffffffff, 0x00090008,
1274         0x3c05c, 0xffffffff, 0x00010000,
1275         0x3c060, 0xffffffff, 0x00030002,
1276         0x3c064, 0xffffffff, 0x00040007,
1277         0x3c068, 0xffffffff, 0x00060005,
1278         0x3c06c, 0xffffffff, 0x00090008,
1279         0x3c070, 0xffffffff, 0x00010000,
1280         0x3c074, 0xffffffff, 0x00030002,
1281         0x3c078, 0xffffffff, 0x00040007,
1282         0x3c07c, 0xffffffff, 0x00060005,
1283         0x3c080, 0xffffffff, 0x00090008,
1284         0x3c084, 0xffffffff, 0x00010000,
1285         0x3c088, 0xffffffff, 0x00030002,
1286         0x3c08c, 0xffffffff, 0x00040007,
1287         0x3c090, 0xffffffff, 0x00060005,
1288         0x3c094, 0xffffffff, 0x00090008,
1289         0x3c098, 0xffffffff, 0x00010000,
1290         0x3c09c, 0xffffffff, 0x00030002,
1291         0x3c0a0, 0xffffffff, 0x00040007,
1292         0x3c0a4, 0xffffffff, 0x00060005,
1293         0x3c0a8, 0xffffffff, 0x00090008,
1294         0x3c0ac, 0xffffffff, 0x00010000,
1295         0x3c0b0, 0xffffffff, 0x00030002,
1296         0x3c0b4, 0xffffffff, 0x00040007,
1297         0x3c0b8, 0xffffffff, 0x00060005,
1298         0x3c0bc, 0xffffffff, 0x00090008,
1299         0x3c000, 0xffffffff, 0x96e00200,
1300         0x8708, 0xffffffff, 0x00900100,
1301         0xc424, 0xffffffff, 0x0020003f,
1302         0x38, 0xffffffff, 0x0140001c,
1303         0x3c, 0x000f0000, 0x000f0000,
1304         0x220, 0xffffffff, 0xC060000C,
1305         0x224, 0xc0000fff, 0x00000100,
1306         0xf90, 0xffffffff, 0x00000100,
1307         0xf98, 0x00000101, 0x00000000,
1308         0x20a8, 0xffffffff, 0x00000104,
1309         0x55e4, 0xff000fff, 0x00000100,
1310         0x30cc, 0xc0000fff, 0x00000104,
1311         0xc1e4, 0x00000001, 0x00000001,
1312         0xd00c, 0xff000ff0, 0x00000100,
1313         0xd80c, 0xff000ff0, 0x00000100
1314 };
1315
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318         0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323         0xc770, 0xffffffff, 0x00000800,
1324         0xc774, 0xffffffff, 0x00000800,
1325         0xc798, 0xffffffff, 0x00007fbf,
1326         0xc79c, 0xffffffff, 0x00007faf
1327 };
1328
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331         0x3c000, 0xffffdfff, 0x6e944040,
1332         0x55e4, 0xff607fff, 0xfc000100,
1333         0x3c220, 0xff000fff, 0x00000100,
1334         0x3c224, 0xff000fff, 0x00000100,
1335         0x3c200, 0xfffc0fff, 0x00000100,
1336         0x6ed8, 0x00010101, 0x00010000,
1337         0x9830, 0xffffffff, 0x00000000,
1338         0x9834, 0xf00fffff, 0x00000400,
1339         0x5bb0, 0x000000f0, 0x00000070,
1340         0x5bc0, 0xf0311fff, 0x80300000,
1341         0x98f8, 0x73773777, 0x12010001,
1342         0x98fc, 0xffffffff, 0x00000010,
1343         0x9b7c, 0x00ff0000, 0x00fc0000,
1344         0x8030, 0x00001f0f, 0x0000100a,
1345         0x2f48, 0x73773777, 0x12010001,
1346         0x2408, 0x000fffff, 0x000c007f,
1347         0x8a14, 0xf000003f, 0x00000007,
1348         0x8b24, 0x3fff3fff, 0x00ffcfff,
1349         0x30a04, 0x0000ff0f, 0x00000000,
1350         0x28a4c, 0x07ffffff, 0x06000000,
1351         0x4d8, 0x00000fff, 0x00000100,
1352         0x3e78, 0x00000001, 0x00000002,
1353         0xc768, 0x00000008, 0x00000008,
1354         0x8c00, 0x000000ff, 0x00000003,
1355         0x214f8, 0x01ff01ff, 0x00000002,
1356         0x21498, 0x007ff800, 0x00200000,
1357         0x2015c, 0xffffffff, 0x00000f40,
1358         0x88c4, 0x001f3ae3, 0x00000082,
1359         0x88d4, 0x0000001f, 0x00000010,
1360         0x30934, 0xffffffff, 0x00000000
1361 };
1362
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365         0xc420, 0xffffffff, 0xfffffffc,
1366         0x30800, 0xffffffff, 0xe0000000,
1367         0x3c2a0, 0xffffffff, 0x00000100,
1368         0x3c208, 0xffffffff, 0x00000100,
1369         0x3c2c0, 0xffffffff, 0x00000100,
1370         0x3c2c8, 0xffffffff, 0x00000100,
1371         0x3c2c4, 0xffffffff, 0x00000100,
1372         0x55e4, 0xffffffff, 0x00600100,
1373         0x3c280, 0xffffffff, 0x00000100,
1374         0x3c214, 0xffffffff, 0x06000100,
1375         0x3c220, 0xffffffff, 0x00000100,
1376         0x3c218, 0xffffffff, 0x06000100,
1377         0x3c204, 0xffffffff, 0x00000100,
1378         0x3c2e0, 0xffffffff, 0x00000100,
1379         0x3c224, 0xffffffff, 0x00000100,
1380         0x3c200, 0xffffffff, 0x00000100,
1381         0x3c230, 0xffffffff, 0x00000100,
1382         0x3c234, 0xffffffff, 0x00000100,
1383         0x3c250, 0xffffffff, 0x00000100,
1384         0x3c254, 0xffffffff, 0x00000100,
1385         0x3c258, 0xffffffff, 0x00000100,
1386         0x3c25c, 0xffffffff, 0x00000100,
1387         0x3c260, 0xffffffff, 0x00000100,
1388         0x3c27c, 0xffffffff, 0x00000100,
1389         0x3c278, 0xffffffff, 0x00000100,
1390         0x3c210, 0xffffffff, 0x06000100,
1391         0x3c290, 0xffffffff, 0x00000100,
1392         0x3c274, 0xffffffff, 0x00000100,
1393         0x3c2b4, 0xffffffff, 0x00000100,
1394         0x3c2b0, 0xffffffff, 0x00000100,
1395         0x3c270, 0xffffffff, 0x00000100,
1396         0x30800, 0xffffffff, 0xe0000000,
1397         0x3c020, 0xffffffff, 0x00010000,
1398         0x3c024, 0xffffffff, 0x00030002,
1399         0x3c028, 0xffffffff, 0x00040007,
1400         0x3c02c, 0xffffffff, 0x00060005,
1401         0x3c030, 0xffffffff, 0x00090008,
1402         0x3c034, 0xffffffff, 0x00010000,
1403         0x3c038, 0xffffffff, 0x00030002,
1404         0x3c03c, 0xffffffff, 0x00040007,
1405         0x3c040, 0xffffffff, 0x00060005,
1406         0x3c044, 0xffffffff, 0x00090008,
1407         0x3c000, 0xffffffff, 0x96e00200,
1408         0x8708, 0xffffffff, 0x00900100,
1409         0xc424, 0xffffffff, 0x0020003f,
1410         0x38, 0xffffffff, 0x0140001c,
1411         0x3c, 0x000f0000, 0x000f0000,
1412         0x220, 0xffffffff, 0xC060000C,
1413         0x224, 0xc0000fff, 0x00000100,
1414         0x20a8, 0xffffffff, 0x00000104,
1415         0x55e4, 0xff000fff, 0x00000100,
1416         0x30cc, 0xc0000fff, 0x00000104,
1417         0xc1e4, 0x00000001, 0x00000001,
1418         0xd00c, 0xff000ff0, 0x00000100,
1419         0xd80c, 0xff000ff0, 0x00000100
1420 };
1421
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424         0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429         0x30800, 0xffffffff, 0xe0000000,
1430         0x28350, 0xffffffff, 0x3a00161a,
1431         0x28354, 0xffffffff, 0x0000002e,
1432         0x9a10, 0xffffffff, 0x00018208,
1433         0x98f8, 0xffffffff, 0x12011003
1434 };
1435
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438         0x3354, 0x00000333, 0x00000333,
1439         0x9a10, 0x00010000, 0x00058208,
1440         0x9830, 0xffffffff, 0x00000000,
1441         0x9834, 0xf00fffff, 0x00000400,
1442         0x9838, 0x0002021c, 0x00020200,
1443         0xc78, 0x00000080, 0x00000000,
1444         0x5bb0, 0x000000f0, 0x00000070,
1445         0x5bc0, 0xf0311fff, 0x80300000,
1446         0x350c, 0x00810000, 0x408af000,
1447         0x7030, 0x31000111, 0x00000011,
1448         0x2f48, 0x73773777, 0x12010001,
1449         0x2120, 0x0000007f, 0x0000001b,
1450         0x21dc, 0x00007fb6, 0x00002191,
1451         0x3628, 0x0000003f, 0x0000000a,
1452         0x362c, 0x0000003f, 0x0000000a,
1453         0x2ae4, 0x00073ffe, 0x000022a2,
1454         0x240c, 0x000007ff, 0x00000000,
1455         0x8bf0, 0x00002001, 0x00000001,
1456         0x8b24, 0xffffffff, 0x00ffffff,
1457         0x30a04, 0x0000ff0f, 0x00000000,
1458         0x28a4c, 0x07ffffff, 0x06000000,
1459         0x3e78, 0x00000001, 0x00000002,
1460         0xc768, 0x00000008, 0x00000008,
1461         0xc770, 0x00000f00, 0x00000800,
1462         0xc774, 0x00000f00, 0x00000800,
1463         0xc798, 0x00ffffff, 0x00ff7fbf,
1464         0xc79c, 0x00ffffff, 0x00ff7faf,
1465         0x8c00, 0x000000ff, 0x00000800,
1466         0xe40, 0x00001fff, 0x00001fff,
1467         0x9060, 0x0000007f, 0x00000020,
1468         0x9508, 0x00010000, 0x00010000,
1469         0xae00, 0x00100000, 0x000ff07c,
1470         0xac14, 0x000003ff, 0x0000000f,
1471         0xac10, 0xffffffff, 0x7564fdec,
1472         0xac0c, 0xffffffff, 0x3120b9a8,
1473         0xac08, 0x20000000, 0x0f9c0000
1474 };
1475
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478         0xc420, 0xffffffff, 0xfffffffd,
1479         0x30800, 0xffffffff, 0xe0000000,
1480         0x3c2a0, 0xffffffff, 0x00000100,
1481         0x3c208, 0xffffffff, 0x00000100,
1482         0x3c2c0, 0xffffffff, 0x00000100,
1483         0x3c2c8, 0xffffffff, 0x00000100,
1484         0x3c2c4, 0xffffffff, 0x00000100,
1485         0x55e4, 0xffffffff, 0x00200100,
1486         0x3c280, 0xffffffff, 0x00000100,
1487         0x3c214, 0xffffffff, 0x06000100,
1488         0x3c220, 0xffffffff, 0x00000100,
1489         0x3c218, 0xffffffff, 0x06000100,
1490         0x3c204, 0xffffffff, 0x00000100,
1491         0x3c2e0, 0xffffffff, 0x00000100,
1492         0x3c224, 0xffffffff, 0x00000100,
1493         0x3c200, 0xffffffff, 0x00000100,
1494         0x3c230, 0xffffffff, 0x00000100,
1495         0x3c234, 0xffffffff, 0x00000100,
1496         0x3c250, 0xffffffff, 0x00000100,
1497         0x3c254, 0xffffffff, 0x00000100,
1498         0x3c258, 0xffffffff, 0x00000100,
1499         0x3c25c, 0xffffffff, 0x00000100,
1500         0x3c260, 0xffffffff, 0x00000100,
1501         0x3c27c, 0xffffffff, 0x00000100,
1502         0x3c278, 0xffffffff, 0x00000100,
1503         0x3c210, 0xffffffff, 0x06000100,
1504         0x3c290, 0xffffffff, 0x00000100,
1505         0x3c274, 0xffffffff, 0x00000100,
1506         0x3c2b4, 0xffffffff, 0x00000100,
1507         0x3c2b0, 0xffffffff, 0x00000100,
1508         0x3c270, 0xffffffff, 0x00000100,
1509         0x30800, 0xffffffff, 0xe0000000,
1510         0x3c020, 0xffffffff, 0x00010000,
1511         0x3c024, 0xffffffff, 0x00030002,
1512         0x3c028, 0xffffffff, 0x00040007,
1513         0x3c02c, 0xffffffff, 0x00060005,
1514         0x3c030, 0xffffffff, 0x00090008,
1515         0x3c034, 0xffffffff, 0x00010000,
1516         0x3c038, 0xffffffff, 0x00030002,
1517         0x3c03c, 0xffffffff, 0x00040007,
1518         0x3c040, 0xffffffff, 0x00060005,
1519         0x3c044, 0xffffffff, 0x00090008,
1520         0x3c048, 0xffffffff, 0x00010000,
1521         0x3c04c, 0xffffffff, 0x00030002,
1522         0x3c050, 0xffffffff, 0x00040007,
1523         0x3c054, 0xffffffff, 0x00060005,
1524         0x3c058, 0xffffffff, 0x00090008,
1525         0x3c05c, 0xffffffff, 0x00010000,
1526         0x3c060, 0xffffffff, 0x00030002,
1527         0x3c064, 0xffffffff, 0x00040007,
1528         0x3c068, 0xffffffff, 0x00060005,
1529         0x3c06c, 0xffffffff, 0x00090008,
1530         0x3c070, 0xffffffff, 0x00010000,
1531         0x3c074, 0xffffffff, 0x00030002,
1532         0x3c078, 0xffffffff, 0x00040007,
1533         0x3c07c, 0xffffffff, 0x00060005,
1534         0x3c080, 0xffffffff, 0x00090008,
1535         0x3c084, 0xffffffff, 0x00010000,
1536         0x3c088, 0xffffffff, 0x00030002,
1537         0x3c08c, 0xffffffff, 0x00040007,
1538         0x3c090, 0xffffffff, 0x00060005,
1539         0x3c094, 0xffffffff, 0x00090008,
1540         0x3c098, 0xffffffff, 0x00010000,
1541         0x3c09c, 0xffffffff, 0x00030002,
1542         0x3c0a0, 0xffffffff, 0x00040007,
1543         0x3c0a4, 0xffffffff, 0x00060005,
1544         0x3c0a8, 0xffffffff, 0x00090008,
1545         0x3c0ac, 0xffffffff, 0x00010000,
1546         0x3c0b0, 0xffffffff, 0x00030002,
1547         0x3c0b4, 0xffffffff, 0x00040007,
1548         0x3c0b8, 0xffffffff, 0x00060005,
1549         0x3c0bc, 0xffffffff, 0x00090008,
1550         0x3c0c0, 0xffffffff, 0x00010000,
1551         0x3c0c4, 0xffffffff, 0x00030002,
1552         0x3c0c8, 0xffffffff, 0x00040007,
1553         0x3c0cc, 0xffffffff, 0x00060005,
1554         0x3c0d0, 0xffffffff, 0x00090008,
1555         0x3c0d4, 0xffffffff, 0x00010000,
1556         0x3c0d8, 0xffffffff, 0x00030002,
1557         0x3c0dc, 0xffffffff, 0x00040007,
1558         0x3c0e0, 0xffffffff, 0x00060005,
1559         0x3c0e4, 0xffffffff, 0x00090008,
1560         0x3c0e8, 0xffffffff, 0x00010000,
1561         0x3c0ec, 0xffffffff, 0x00030002,
1562         0x3c0f0, 0xffffffff, 0x00040007,
1563         0x3c0f4, 0xffffffff, 0x00060005,
1564         0x3c0f8, 0xffffffff, 0x00090008,
1565         0xc318, 0xffffffff, 0x00020200,
1566         0x3350, 0xffffffff, 0x00000200,
1567         0x15c0, 0xffffffff, 0x00000400,
1568         0x55e8, 0xffffffff, 0x00000000,
1569         0x2f50, 0xffffffff, 0x00000902,
1570         0x3c000, 0xffffffff, 0x96940200,
1571         0x8708, 0xffffffff, 0x00900100,
1572         0xc424, 0xffffffff, 0x0020003f,
1573         0x38, 0xffffffff, 0x0140001c,
1574         0x3c, 0x000f0000, 0x000f0000,
1575         0x220, 0xffffffff, 0xc060000c,
1576         0x224, 0xc0000fff, 0x00000100,
1577         0xf90, 0xffffffff, 0x00000100,
1578         0xf98, 0x00000101, 0x00000000,
1579         0x20a8, 0xffffffff, 0x00000104,
1580         0x55e4, 0xff000fff, 0x00000100,
1581         0x30cc, 0xc0000fff, 0x00000104,
1582         0xc1e4, 0x00000001, 0x00000001,
1583         0xd00c, 0xff000ff0, 0x00000100,
1584         0xd80c, 0xff000ff0, 0x00000100
1585 };
1586
1587 static const u32 godavari_golden_registers[] =
1588 {
1589         0x55e4, 0xff607fff, 0xfc000100,
1590         0x6ed8, 0x00010101, 0x00010000,
1591         0x9830, 0xffffffff, 0x00000000,
1592         0x98302, 0xf00fffff, 0x00000400,
1593         0x6130, 0xffffffff, 0x00010000,
1594         0x5bb0, 0x000000f0, 0x00000070,
1595         0x5bc0, 0xf0311fff, 0x80300000,
1596         0x98f8, 0x73773777, 0x12010001,
1597         0x98fc, 0xffffffff, 0x00000010,
1598         0x8030, 0x00001f0f, 0x0000100a,
1599         0x2f48, 0x73773777, 0x12010001,
1600         0x2408, 0x000fffff, 0x000c007f,
1601         0x8a14, 0xf000003f, 0x00000007,
1602         0x8b24, 0xffffffff, 0x00ff0fff,
1603         0x30a04, 0x0000ff0f, 0x00000000,
1604         0x28a4c, 0x07ffffff, 0x06000000,
1605         0x4d8, 0x00000fff, 0x00000100,
1606         0xd014, 0x00010000, 0x00810001,
1607         0xd814, 0x00010000, 0x00810001,
1608         0x3e78, 0x00000001, 0x00000002,
1609         0xc768, 0x00000008, 0x00000008,
1610         0xc770, 0x00000f00, 0x00000800,
1611         0xc774, 0x00000f00, 0x00000800,
1612         0xc798, 0x00ffffff, 0x00ff7fbf,
1613         0xc79c, 0x00ffffff, 0x00ff7faf,
1614         0x8c00, 0x000000ff, 0x00000001,
1615         0x214f8, 0x01ff01ff, 0x00000002,
1616         0x21498, 0x007ff800, 0x00200000,
1617         0x2015c, 0xffffffff, 0x00000f40,
1618         0x88c4, 0x001f3ae3, 0x00000082,
1619         0x88d4, 0x0000001f, 0x00000010,
1620         0x30934, 0xffffffff, 0x00000000
1621 };
1622
1623
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627         mutex_lock(&rdev->grbm_idx_mutex);
1628         switch (rdev->family) {
1629         case CHIP_BONAIRE:
1630                 radeon_program_register_sequence(rdev,
1631                                                  bonaire_mgcg_cgcg_init,
1632                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633                 radeon_program_register_sequence(rdev,
1634                                                  bonaire_golden_registers,
1635                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  bonaire_golden_common_registers,
1638                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639                 radeon_program_register_sequence(rdev,
1640                                                  bonaire_golden_spm_registers,
1641                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642                 break;
1643         case CHIP_KABINI:
1644                 radeon_program_register_sequence(rdev,
1645                                                  kalindi_mgcg_cgcg_init,
1646                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647                 radeon_program_register_sequence(rdev,
1648                                                  kalindi_golden_registers,
1649                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650                 radeon_program_register_sequence(rdev,
1651                                                  kalindi_golden_common_registers,
1652                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653                 radeon_program_register_sequence(rdev,
1654                                                  kalindi_golden_spm_registers,
1655                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656                 break;
1657         case CHIP_MULLINS:
1658                 radeon_program_register_sequence(rdev,
1659                                                  kalindi_mgcg_cgcg_init,
1660                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661                 radeon_program_register_sequence(rdev,
1662                                                  godavari_golden_registers,
1663                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1664                 radeon_program_register_sequence(rdev,
1665                                                  kalindi_golden_common_registers,
1666                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667                 radeon_program_register_sequence(rdev,
1668                                                  kalindi_golden_spm_registers,
1669                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670                 break;
1671         case CHIP_KAVERI:
1672                 radeon_program_register_sequence(rdev,
1673                                                  spectre_mgcg_cgcg_init,
1674                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675                 radeon_program_register_sequence(rdev,
1676                                                  spectre_golden_registers,
1677                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1678                 radeon_program_register_sequence(rdev,
1679                                                  spectre_golden_common_registers,
1680                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681                 radeon_program_register_sequence(rdev,
1682                                                  spectre_golden_spm_registers,
1683                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684                 break;
1685         case CHIP_HAWAII:
1686                 radeon_program_register_sequence(rdev,
1687                                                  hawaii_mgcg_cgcg_init,
1688                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689                 radeon_program_register_sequence(rdev,
1690                                                  hawaii_golden_registers,
1691                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692                 radeon_program_register_sequence(rdev,
1693                                                  hawaii_golden_common_registers,
1694                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695                 radeon_program_register_sequence(rdev,
1696                                                  hawaii_golden_spm_registers,
1697                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698                 break;
1699         default:
1700                 break;
1701         }
1702         mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716
1717         if (rdev->flags & RADEON_IS_IGP) {
1718                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719                         return reference_clock / 2;
1720         } else {
1721                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722                         return reference_clock / 4;
1723         }
1724         return reference_clock;
1725 }
1726
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738         if (index < rdev->doorbell.num_doorbells) {
1739                 return readl(rdev->doorbell.ptr + index);
1740         } else {
1741                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742                 return 0;
1743         }
1744 }
1745
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758         if (index < rdev->doorbell.num_doorbells) {
1759                 writel(v, rdev->doorbell.ptr + index);
1760         } else {
1761                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762         }
1763 }
1764
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769         {0x00000070, 0x04400000},
1770         {0x00000071, 0x80c01803},
1771         {0x00000072, 0x00004004},
1772         {0x00000073, 0x00000100},
1773         {0x00000074, 0x00ff0000},
1774         {0x00000075, 0x34000000},
1775         {0x00000076, 0x08000014},
1776         {0x00000077, 0x00cc08ec},
1777         {0x00000078, 0x00000400},
1778         {0x00000079, 0x00000000},
1779         {0x0000007a, 0x04090000},
1780         {0x0000007c, 0x00000000},
1781         {0x0000007e, 0x4408a8e8},
1782         {0x0000007f, 0x00000304},
1783         {0x00000080, 0x00000000},
1784         {0x00000082, 0x00000001},
1785         {0x00000083, 0x00000002},
1786         {0x00000084, 0xf3e4f400},
1787         {0x00000085, 0x052024e3},
1788         {0x00000087, 0x00000000},
1789         {0x00000088, 0x01000000},
1790         {0x0000008a, 0x1c0a0000},
1791         {0x0000008b, 0xff010000},
1792         {0x0000008d, 0xffffefff},
1793         {0x0000008e, 0xfff3efff},
1794         {0x0000008f, 0xfff3efbf},
1795         {0x00000092, 0xf7ffffff},
1796         {0x00000093, 0xffffff7f},
1797         {0x00000095, 0x00101101},
1798         {0x00000096, 0x00000fff},
1799         {0x00000097, 0x00116fff},
1800         {0x00000098, 0x60010000},
1801         {0x00000099, 0x10010000},
1802         {0x0000009a, 0x00006000},
1803         {0x0000009b, 0x00001000},
1804         {0x0000009f, 0x00b48000}
1805 };
1806
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811         {0x0000007d, 0x40000000},
1812         {0x0000007e, 0x40180304},
1813         {0x0000007f, 0x0000ff00},
1814         {0x00000081, 0x00000000},
1815         {0x00000083, 0x00000800},
1816         {0x00000086, 0x00000000},
1817         {0x00000087, 0x00000100},
1818         {0x00000088, 0x00020100},
1819         {0x00000089, 0x00000000},
1820         {0x0000008b, 0x00040000},
1821         {0x0000008c, 0x00000100},
1822         {0x0000008e, 0xff010000},
1823         {0x00000090, 0xffffefff},
1824         {0x00000091, 0xfff3efff},
1825         {0x00000092, 0xfff3efbf},
1826         {0x00000093, 0xf7ffffff},
1827         {0x00000094, 0xffffff7f},
1828         {0x00000095, 0x00000fff},
1829         {0x00000096, 0x00116fff},
1830         {0x00000097, 0x60010000},
1831         {0x00000098, 0x10010000},
1832         {0x0000009f, 0x00c79000}
1833 };
1834
1835
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850                             u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853                              MEID(me & 0x3) |
1854                              VMID(vmid & 0xf) |
1855                              QUEUEID(queue & 0x7));
1856         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870         const __be32 *fw_data = NULL;
1871         const __le32 *new_fw_data = NULL;
1872         u32 running, blackout = 0, tmp;
1873         u32 *io_mc_regs = NULL;
1874         const __le32 *new_io_mc_regs = NULL;
1875         int i, regs_size, ucode_size;
1876
1877         if (!rdev->mc_fw)
1878                 return -EINVAL;
1879
1880         if (rdev->new_fw) {
1881                 const struct mc_firmware_header_v1_0 *hdr =
1882                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883
1884                 radeon_ucode_print_mc_hdr(&hdr->header);
1885
1886                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887                 new_io_mc_regs = (const __le32 *)
1888                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890                 new_fw_data = (const __le32 *)
1891                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892         } else {
1893                 ucode_size = rdev->mc_fw->size / 4;
1894
1895                 switch (rdev->family) {
1896                 case CHIP_BONAIRE:
1897                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899                         break;
1900                 case CHIP_HAWAII:
1901                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1903                         break;
1904                 default:
1905                         return -EINVAL;
1906                 }
1907                 fw_data = (const __be32 *)rdev->mc_fw->data;
1908         }
1909
1910         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911
1912         if (running == 0) {
1913                 if (running) {
1914                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916                 }
1917
1918                 /* reset the engine and set to writable */
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922                 /* load mc io regs */
1923                 for (i = 0; i < regs_size; i++) {
1924                         if (rdev->new_fw) {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927                         } else {
1928                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930                         }
1931                 }
1932
1933                 tmp = RREG32(MC_SEQ_MISC0);
1934                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939                 }
1940
1941                 /* load the MC ucode */
1942                 for (i = 0; i < ucode_size; i++) {
1943                         if (rdev->new_fw)
1944                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945                         else
1946                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947                 }
1948
1949                 /* put the engine back into the active state */
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954                 /* wait for training to complete */
1955                 for (i = 0; i < rdev->usec_timeout; i++) {
1956                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957                                 break;
1958                         udelay(1);
1959                 }
1960                 for (i = 0; i < rdev->usec_timeout; i++) {
1961                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962                                 break;
1963                         udelay(1);
1964                 }
1965
1966                 if (running)
1967                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968         }
1969
1970         return 0;
1971 }
1972
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984         const char *chip_name;
1985         const char *new_chip_name;
1986         size_t pfp_req_size, me_req_size, ce_req_size,
1987                 mec_req_size, rlc_req_size, mc_req_size = 0,
1988                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989         char fw_name[30];
1990         int new_fw = 0;
1991         int err;
1992         int num_fw;
1993
1994         DRM_DEBUG("\n");
1995
1996         switch (rdev->family) {
1997         case CHIP_BONAIRE:
1998                 chip_name = "BONAIRE";
1999                 new_chip_name = "bonaire";
2000                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2002                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009                 num_fw = 8;
2010                 break;
2011         case CHIP_HAWAII:
2012                 chip_name = "HAWAII";
2013                 new_chip_name = "hawaii";
2014                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023                 num_fw = 8;
2024                 break;
2025         case CHIP_KAVERI:
2026                 chip_name = "KAVERI";
2027                 new_chip_name = "kaveri";
2028                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2030                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034                 num_fw = 7;
2035                 break;
2036         case CHIP_KABINI:
2037                 chip_name = "KABINI";
2038                 new_chip_name = "kabini";
2039                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2041                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045                 num_fw = 6;
2046                 break;
2047         case CHIP_MULLINS:
2048                 chip_name = "MULLINS";
2049                 new_chip_name = "mullins";
2050                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2052                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056                 num_fw = 6;
2057                 break;
2058         default: BUG();
2059         }
2060
2061         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065         if (err) {
2066                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068                 if (err)
2069                         goto out;
2070                 if (rdev->pfp_fw->size != pfp_req_size) {
2071                         printk(KERN_ERR
2072                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073                                rdev->pfp_fw->size, fw_name);
2074                         err = -EINVAL;
2075                         goto out;
2076                 }
2077         } else {
2078                 err = radeon_ucode_validate(rdev->pfp_fw);
2079                 if (err) {
2080                         printk(KERN_ERR
2081                                "cik_fw: validation failed for firmware \"%s\"\n",
2082                                fw_name);
2083                         goto out;
2084                 } else {
2085                         new_fw++;
2086                 }
2087         }
2088
2089         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091         if (err) {
2092                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094                 if (err)
2095                         goto out;
2096                 if (rdev->me_fw->size != me_req_size) {
2097                         printk(KERN_ERR
2098                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->me_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->me_fw);
2104                 if (err) {
2105                         printk(KERN_ERR
2106                                "cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->ce_fw->size != ce_req_size) {
2122                         printk(KERN_ERR
2123                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124                                rdev->ce_fw->size, fw_name);
2125                         err = -EINVAL;
2126                 }
2127         } else {
2128                 err = radeon_ucode_validate(rdev->ce_fw);
2129                 if (err) {
2130                         printk(KERN_ERR
2131                                "cik_fw: validation failed for firmware \"%s\"\n",
2132                                fw_name);
2133                         goto out;
2134                 } else {
2135                         new_fw++;
2136                 }
2137         }
2138
2139         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141         if (err) {
2142                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144                 if (err)
2145                         goto out;
2146                 if (rdev->mec_fw->size != mec_req_size) {
2147                         printk(KERN_ERR
2148                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149                                rdev->mec_fw->size, fw_name);
2150                         err = -EINVAL;
2151                 }
2152         } else {
2153                 err = radeon_ucode_validate(rdev->mec_fw);
2154                 if (err) {
2155                         printk(KERN_ERR
2156                                "cik_fw: validation failed for firmware \"%s\"\n",
2157                                fw_name);
2158                         goto out;
2159                 } else {
2160                         new_fw++;
2161                 }
2162         }
2163
2164         if (rdev->family == CHIP_KAVERI) {
2165                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167                 if (err) {
2168                         goto out;
2169                 } else {
2170                         err = radeon_ucode_validate(rdev->mec2_fw);
2171                         if (err) {
2172                                 goto out;
2173                         } else {
2174                                 new_fw++;
2175                         }
2176                 }
2177         }
2178
2179         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181         if (err) {
2182                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184                 if (err)
2185                         goto out;
2186                 if (rdev->rlc_fw->size != rlc_req_size) {
2187                         printk(KERN_ERR
2188                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189                                rdev->rlc_fw->size, fw_name);
2190                         err = -EINVAL;
2191                 }
2192         } else {
2193                 err = radeon_ucode_validate(rdev->rlc_fw);
2194                 if (err) {
2195                         printk(KERN_ERR
2196                                "cik_fw: validation failed for firmware \"%s\"\n",
2197                                fw_name);
2198                         goto out;
2199                 } else {
2200                         new_fw++;
2201                 }
2202         }
2203
2204         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206         if (err) {
2207                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209                 if (err)
2210                         goto out;
2211                 if (rdev->sdma_fw->size != sdma_req_size) {
2212                         printk(KERN_ERR
2213                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214                                rdev->sdma_fw->size, fw_name);
2215                         err = -EINVAL;
2216                 }
2217         } else {
2218                 err = radeon_ucode_validate(rdev->sdma_fw);
2219                 if (err) {
2220                         printk(KERN_ERR
2221                                "cik_fw: validation failed for firmware \"%s\"\n",
2222                                fw_name);
2223                         goto out;
2224                 } else {
2225                         new_fw++;
2226                 }
2227         }
2228
2229         /* No SMC, MC ucode on APUs */
2230         if (!(rdev->flags & RADEON_IS_IGP)) {
2231                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233                 if (err) {
2234                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236                         if (err) {
2237                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239                                 if (err)
2240                                         goto out;
2241                         }
2242                         if ((rdev->mc_fw->size != mc_req_size) &&
2243                             (rdev->mc_fw->size != mc2_req_size)){
2244                                 printk(KERN_ERR
2245                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246                                        rdev->mc_fw->size, fw_name);
2247                                 err = -EINVAL;
2248                         }
2249                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250                 } else {
2251                         err = radeon_ucode_validate(rdev->mc_fw);
2252                         if (err) {
2253                                 printk(KERN_ERR
2254                                        "cik_fw: validation failed for firmware \"%s\"\n",
2255                                        fw_name);
2256                                 goto out;
2257                         } else {
2258                                 new_fw++;
2259                         }
2260                 }
2261
2262                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264                 if (err) {
2265                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267                         if (err) {
2268                                 printk(KERN_ERR
2269                                        "smc: error loading firmware \"%s\"\n",
2270                                        fw_name);
2271                                 release_firmware(rdev->smc_fw);
2272                                 rdev->smc_fw = NULL;
2273                                 err = 0;
2274                         } else if (rdev->smc_fw->size != smc_req_size) {
2275                                 printk(KERN_ERR
2276                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277                                        rdev->smc_fw->size, fw_name);
2278                                 err = -EINVAL;
2279                         }
2280                 } else {
2281                         err = radeon_ucode_validate(rdev->smc_fw);
2282                         if (err) {
2283                                 printk(KERN_ERR
2284                                        "cik_fw: validation failed for firmware \"%s\"\n",
2285                                        fw_name);
2286                                 goto out;
2287                         } else {
2288                                 new_fw++;
2289                         }
2290                 }
2291         }
2292
2293         if (new_fw == 0) {
2294                 rdev->new_fw = false;
2295         } else if (new_fw < num_fw) {
2296                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297                 err = -EINVAL;
2298         } else {
2299                 rdev->new_fw = true;
2300         }
2301
2302 out:
2303         if (err) {
2304                 if (err != -EINVAL)
2305                         printk(KERN_ERR
2306                                "cik_cp: Failed to load firmware \"%s\"\n",
2307                                fw_name);
2308                 release_firmware(rdev->pfp_fw);
2309                 rdev->pfp_fw = NULL;
2310                 release_firmware(rdev->me_fw);
2311                 rdev->me_fw = NULL;
2312                 release_firmware(rdev->ce_fw);
2313                 rdev->ce_fw = NULL;
2314                 release_firmware(rdev->mec_fw);
2315                 rdev->mec_fw = NULL;
2316                 release_firmware(rdev->mec2_fw);
2317                 rdev->mec2_fw = NULL;
2318                 release_firmware(rdev->rlc_fw);
2319                 rdev->rlc_fw = NULL;
2320                 release_firmware(rdev->sdma_fw);
2321                 rdev->sdma_fw = NULL;
2322                 release_firmware(rdev->mc_fw);
2323                 rdev->mc_fw = NULL;
2324                 release_firmware(rdev->smc_fw);
2325                 rdev->smc_fw = NULL;
2326         }
2327         return err;
2328 }
2329
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346         const u32 num_tile_mode_states = 32;
2347         const u32 num_secondary_tile_mode_states = 16;
2348         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349         u32 num_pipe_configs;
2350         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351                 rdev->config.cik.max_shader_engines;
2352
2353         switch (rdev->config.cik.mem_row_size_in_kb) {
2354         case 1:
2355                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356                 break;
2357         case 2:
2358         default:
2359                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360                 break;
2361         case 4:
2362                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363                 break;
2364         }
2365
2366         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367         if (num_pipe_configs > 8)
2368                 num_pipe_configs = 16;
2369
2370         if (num_pipe_configs == 16) {
2371                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372                         switch (reg_offset) {
2373                         case 0:
2374                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378                                 break;
2379                         case 1:
2380                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384                                 break;
2385                         case 2:
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390                                 break;
2391                         case 3:
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396                                 break;
2397                         case 4:
2398                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                                  TILE_SPLIT(split_equal_to_row_size));
2402                                 break;
2403                         case 5:
2404                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407                                 break;
2408                         case 6:
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413                                 break;
2414                         case 7:
2415                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                                  TILE_SPLIT(split_equal_to_row_size));
2419                                 break;
2420                         case 8:
2421                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423                                 break;
2424                         case 9:
2425                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428                                 break;
2429                         case 10:
2430                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                                 break;
2435                         case 11:
2436                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                                 break;
2441                         case 12:
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446                                 break;
2447                         case 13:
2448                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451                                 break;
2452                         case 14:
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457                                 break;
2458                         case 16:
2459                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463                                 break;
2464                         case 17:
2465                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469                                 break;
2470                         case 27:
2471                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474                                 break;
2475                         case 28:
2476                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480                                 break;
2481                         case 29:
2482                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486                                 break;
2487                         case 30:
2488                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492                                 break;
2493                         default:
2494                                 gb_tile_moden = 0;
2495                                 break;
2496                         }
2497                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499                 }
2500                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501                         switch (reg_offset) {
2502                         case 0:
2503                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2507                                 break;
2508                         case 1:
2509                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2513                                 break;
2514                         case 2:
2515                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2519                                 break;
2520                         case 3:
2521                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2525                                 break;
2526                         case 4:
2527                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2531                                 break;
2532                         case 5:
2533                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2537                                 break;
2538                         case 6:
2539                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2543                                 break;
2544                         case 8:
2545                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2549                                 break;
2550                         case 9:
2551                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2555                                 break;
2556                         case 10:
2557                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2561                                 break;
2562                         case 11:
2563                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2567                                 break;
2568                         case 12:
2569                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2573                                 break;
2574                         case 13:
2575                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2579                                 break;
2580                         case 14:
2581                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2585                                 break;
2586                         default:
2587                                 gb_tile_moden = 0;
2588                                 break;
2589                         }
2590                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592                 }
2593         } else if (num_pipe_configs == 8) {
2594                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595                         switch (reg_offset) {
2596                         case 0:
2597                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601                                 break;
2602                         case 1:
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607                                 break;
2608                         case 2:
2609                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613                                 break;
2614                         case 3:
2615                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619                                 break;
2620                         case 4:
2621                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                                  TILE_SPLIT(split_equal_to_row_size));
2625                                 break;
2626                         case 5:
2627                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630                                 break;
2631                         case 6:
2632                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636                                 break;
2637                         case 7:
2638                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                                  TILE_SPLIT(split_equal_to_row_size));
2642                                 break;
2643                         case 8:
2644                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646                                 break;
2647                         case 9:
2648                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651                                 break;
2652                         case 10:
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                                 break;
2658                         case 11:
2659                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663                                 break;
2664                         case 12:
2665                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669                                 break;
2670                         case 13:
2671                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674                                 break;
2675                         case 14:
2676                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680                                 break;
2681                         case 16:
2682                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686                                 break;
2687                         case 17:
2688                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                                 break;
2693                         case 27:
2694                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697                                 break;
2698                         case 28:
2699                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                                 break;
2704                         case 29:
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709                                 break;
2710                         case 30:
2711                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                                 break;
2716                         default:
2717                                 gb_tile_moden = 0;
2718                                 break;
2719                         }
2720                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722                 }
2723                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724                         switch (reg_offset) {
2725                         case 0:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2730                                 break;
2731                         case 1:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2736                                 break;
2737                         case 2:
2738                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2742                                 break;
2743                         case 3:
2744                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2748                                 break;
2749                         case 4:
2750                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2754                                 break;
2755                         case 5:
2756                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2760                                 break;
2761                         case 6:
2762                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2766                                 break;
2767                         case 8:
2768                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2772                                 break;
2773                         case 9:
2774                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2778                                 break;
2779                         case 10:
2780                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2784                                 break;
2785                         case 11:
2786                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2790                                 break;
2791                         case 12:
2792                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2796                                 break;
2797                         case 13:
2798                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2802                                 break;
2803                         case 14:
2804                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2808                                 break;
2809                         default:
2810                                 gb_tile_moden = 0;
2811                                 break;
2812                         }
2813                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815                 }
2816         } else if (num_pipe_configs == 4) {
2817                 if (num_rbs == 4) {
2818                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819                                 switch (reg_offset) {
2820                                 case 0:
2821                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825                                         break;
2826                                 case 1:
2827                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831                                         break;
2832                                 case 2:
2833                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837                                         break;
2838                                 case 3:
2839                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843                                         break;
2844                                 case 4:
2845                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                                          TILE_SPLIT(split_equal_to_row_size));
2849                                         break;
2850                                 case 5:
2851                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854                                         break;
2855                                 case 6:
2856                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860                                         break;
2861                                 case 7:
2862                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                                          TILE_SPLIT(split_equal_to_row_size));
2866                                         break;
2867                                 case 8:
2868                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870                                         break;
2871                                 case 9:
2872                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875                                         break;
2876                                 case 10:
2877                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881                                         break;
2882                                 case 11:
2883                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887                                         break;
2888                                 case 12:
2889                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893                                         break;
2894                                 case 13:
2895                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898                                         break;
2899                                 case 14:
2900                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904                                         break;
2905                                 case 16:
2906                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910                                         break;
2911                                 case 17:
2912                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                                         break;
2917                                 case 27:
2918                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921                                         break;
2922                                 case 28:
2923                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927                                         break;
2928                                 case 29:
2929                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                                         break;
2934                                 case 30:
2935                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                                         break;
2940                                 default:
2941                                         gb_tile_moden = 0;
2942                                         break;
2943                                 }
2944                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946                         }
2947                 } else if (num_rbs < 4) {
2948                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949                                 switch (reg_offset) {
2950                                 case 0:
2951                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955                                         break;
2956                                 case 1:
2957                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961                                         break;
2962                                 case 2:
2963                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967                                         break;
2968                                 case 3:
2969                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973                                         break;
2974                                 case 4:
2975                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978                                                          TILE_SPLIT(split_equal_to_row_size));
2979                                         break;
2980                                 case 5:
2981                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984                                         break;
2985                                 case 6:
2986                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990                                         break;
2991                                 case 7:
2992                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995                                                          TILE_SPLIT(split_equal_to_row_size));
2996                                         break;
2997                                 case 8:
2998                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000                                         break;
3001                                 case 9:
3002                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005                                         break;
3006                                 case 10:
3007                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011                                         break;
3012                                 case 11:
3013                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                                         break;
3018                                 case 12:
3019                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023                                         break;
3024                                 case 13:
3025                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028                                         break;
3029                                 case 14:
3030                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034                                         break;
3035                                 case 16:
3036                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040                                         break;
3041                                 case 17:
3042                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046                                         break;
3047                                 case 27:
3048                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051                                         break;
3052                                 case 28:
3053                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057                                         break;
3058                                 case 29:
3059                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063                                         break;
3064                                 case 30:
3065                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069                                         break;
3070                                 default:
3071                                         gb_tile_moden = 0;
3072                                         break;
3073                                 }
3074                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076                         }
3077                 }
3078                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079                         switch (reg_offset) {
3080                         case 0:
3081                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3085                                 break;
3086                         case 1:
3087                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3091                                 break;
3092                         case 2:
3093                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3097                                 break;
3098                         case 3:
3099                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3103                                 break;
3104                         case 4:
3105                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3109                                 break;
3110                         case 5:
3111                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3115                                 break;
3116                         case 6:
3117                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3121                                 break;
3122                         case 8:
3123                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3127                                 break;
3128                         case 9:
3129                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3133                                 break;
3134                         case 10:
3135                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3139                                 break;
3140                         case 11:
3141                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3145                                 break;
3146                         case 12:
3147                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3151                                 break;
3152                         case 13:
3153                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3157                                 break;
3158                         case 14:
3159                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3163                                 break;
3164                         default:
3165                                 gb_tile_moden = 0;
3166                                 break;
3167                         }
3168                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170                 }
3171         } else if (num_pipe_configs == 2) {
3172                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173                         switch (reg_offset) {
3174                         case 0:
3175                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179                                 break;
3180                         case 1:
3181                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3184                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185                                 break;
3186                         case 2:
3187                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191                                 break;
3192                         case 3:
3193                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3196                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197                                 break;
3198                         case 4:
3199                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                                  TILE_SPLIT(split_equal_to_row_size));
3203                                 break;
3204                         case 5:
3205                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3207                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208                                 break;
3209                         case 6:
3210                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3213                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214                                 break;
3215                         case 7:
3216                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3219                                                  TILE_SPLIT(split_equal_to_row_size));
3220                                 break;
3221                         case 8:
3222                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223                                                 PIPE_CONFIG(ADDR_SURF_P2);
3224                                 break;
3225                         case 9:
3226                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228                                                  PIPE_CONFIG(ADDR_SURF_P2));
3229                                 break;
3230                         case 10:
3231                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235                                 break;
3236                         case 11:
3237                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3240                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241                                 break;
3242                         case 12:
3243                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3246                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247                                 break;
3248                         case 13:
3249                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3251                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252                                 break;
3253                         case 14:
3254                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258                                 break;
3259                         case 16:
3260                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3263                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264                                 break;
3265                         case 17:
3266                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270                                 break;
3271                         case 27:
3272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274                                                  PIPE_CONFIG(ADDR_SURF_P2));
3275                                 break;
3276                         case 28:
3277                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3280                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281                                 break;
3282                         case 29:
3283                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287                                 break;
3288                         case 30:
3289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293                                 break;
3294                         default:
3295                                 gb_tile_moden = 0;
3296                                 break;
3297                         }
3298                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300                 }
3301                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302                         switch (reg_offset) {
3303                         case 0:
3304                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3308                                 break;
3309                         case 1:
3310                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3314                                 break;
3315                         case 2:
3316                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3320                                 break;
3321                         case 3:
3322                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3326                                 break;
3327                         case 4:
3328                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3332                                 break;
3333                         case 5:
3334                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3338                                 break;
3339                         case 6:
3340                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3344                                 break;
3345                         case 8:
3346                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3350                                 break;
3351                         case 9:
3352                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3356                                 break;
3357                         case 10:
3358                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3362                                 break;
3363                         case 11:
3364                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3368                                 break;
3369                         case 12:
3370                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3374                                 break;
3375                         case 13:
3376                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3380                                 break;
3381                         case 14:
3382                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3386                                 break;
3387                         default:
3388                                 gb_tile_moden = 0;
3389                                 break;
3390                         }
3391                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393                 }
3394         } else
3395                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410                              u32 se_num, u32 sh_num)
3411 {
3412         u32 data = INSTANCE_BROADCAST_WRITES;
3413
3414         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416         else if (se_num == 0xffffffff)
3417                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418         else if (sh_num == 0xffffffff)
3419                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420         else
3421                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422         WREG32(GRBM_GFX_INDEX, data);
3423 }
3424
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435         u32 i, mask = 0;
3436
3437         for (i = 0; i < bit_width; i++) {
3438                 mask <<= 1;
3439                 mask |= 1;
3440         }
3441         return mask;
3442 }
3443
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456                               u32 max_rb_num_per_se,
3457                               u32 sh_per_se)
3458 {
3459         u32 data, mask;
3460
3461         data = RREG32(CC_RB_BACKEND_DISABLE);
3462         if (data & 1)
3463                 data &= BACKEND_DISABLE_MASK;
3464         else
3465                 data = 0;
3466         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467
3468         data >>= BACKEND_DISABLE_SHIFT;
3469
3470         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471
3472         return data & mask;
3473 }
3474
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
3485 static void cik_setup_rb(struct radeon_device *rdev,
3486                          u32 se_num, u32 sh_per_se,
3487                          u32 max_rb_num_per_se)
3488 {
3489         int i, j;
3490         u32 data, mask;
3491         u32 disabled_rbs = 0;
3492         u32 enabled_rbs = 0;
3493
3494         mutex_lock(&rdev->grbm_idx_mutex);
3495         for (i = 0; i < se_num; i++) {
3496                 for (j = 0; j < sh_per_se; j++) {
3497                         cik_select_se_sh(rdev, i, j);
3498                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499                         if (rdev->family == CHIP_HAWAII)
3500                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501                         else
3502                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503                 }
3504         }
3505         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506         mutex_unlock(&rdev->grbm_idx_mutex);
3507
3508         mask = 1;
3509         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510                 if (!(disabled_rbs & mask))
3511                         enabled_rbs |= mask;
3512                 mask <<= 1;
3513         }
3514
3515         rdev->config.cik.backend_enable_mask = enabled_rbs;
3516
3517         mutex_lock(&rdev->grbm_idx_mutex);
3518         for (i = 0; i < se_num; i++) {
3519                 cik_select_se_sh(rdev, i, 0xffffffff);
3520                 data = 0;
3521                 for (j = 0; j < sh_per_se; j++) {
3522                         switch (enabled_rbs & 3) {
3523                         case 0:
3524                                 if (j == 0)
3525                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526                                 else
3527                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528                                 break;
3529                         case 1:
3530                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531                                 break;
3532                         case 2:
3533                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534                                 break;
3535                         case 3:
3536                         default:
3537                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538                                 break;
3539                         }
3540                         enabled_rbs >>= 2;
3541                 }
3542                 WREG32(PA_SC_RASTER_CONFIG, data);
3543         }
3544         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545         mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559         u32 mc_shared_chmap, mc_arb_ramcfg;
3560         u32 hdp_host_path_cntl;
3561         u32 tmp;
3562         int i, j;
3563
3564         switch (rdev->family) {
3565         case CHIP_BONAIRE:
3566                 rdev->config.cik.max_shader_engines = 2;
3567                 rdev->config.cik.max_tile_pipes = 4;
3568                 rdev->config.cik.max_cu_per_sh = 7;
3569                 rdev->config.cik.max_sh_per_se = 1;
3570                 rdev->config.cik.max_backends_per_se = 2;
3571                 rdev->config.cik.max_texture_channel_caches = 4;
3572                 rdev->config.cik.max_gprs = 256;
3573                 rdev->config.cik.max_gs_threads = 32;
3574                 rdev->config.cik.max_hw_contexts = 8;
3575
3576                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581                 break;
3582         case CHIP_HAWAII:
3583                 rdev->config.cik.max_shader_engines = 4;
3584                 rdev->config.cik.max_tile_pipes = 16;
3585                 rdev->config.cik.max_cu_per_sh = 11;
3586                 rdev->config.cik.max_sh_per_se = 1;
3587                 rdev->config.cik.max_backends_per_se = 4;
3588                 rdev->config.cik.max_texture_channel_caches = 16;
3589                 rdev->config.cik.max_gprs = 256;
3590                 rdev->config.cik.max_gs_threads = 32;
3591                 rdev->config.cik.max_hw_contexts = 8;
3592
3593                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598                 break;
3599         case CHIP_KAVERI:
3600                 rdev->config.cik.max_shader_engines = 1;
3601                 rdev->config.cik.max_tile_pipes = 4;
3602                 if ((rdev->pdev->device == 0x1304) ||
3603                     (rdev->pdev->device == 0x1305) ||
3604                     (rdev->pdev->device == 0x130C) ||
3605                     (rdev->pdev->device == 0x130F) ||
3606                     (rdev->pdev->device == 0x1310) ||
3607                     (rdev->pdev->device == 0x1311) ||
3608                     (rdev->pdev->device == 0x131C)) {
3609                         rdev->config.cik.max_cu_per_sh = 8;
3610                         rdev->config.cik.max_backends_per_se = 2;
3611                 } else if ((rdev->pdev->device == 0x1309) ||
3612                            (rdev->pdev->device == 0x130A) ||
3613                            (rdev->pdev->device == 0x130D) ||
3614                            (rdev->pdev->device == 0x1313) ||
3615                            (rdev->pdev->device == 0x131D)) {
3616                         rdev->config.cik.max_cu_per_sh = 6;
3617                         rdev->config.cik.max_backends_per_se = 2;
3618                 } else if ((rdev->pdev->device == 0x1306) ||
3619                            (rdev->pdev->device == 0x1307) ||
3620                            (rdev->pdev->device == 0x130B) ||
3621                            (rdev->pdev->device == 0x130E) ||
3622                            (rdev->pdev->device == 0x1315) ||
3623                            (rdev->pdev->device == 0x1318) ||
3624                            (rdev->pdev->device == 0x131B)) {
3625                         rdev->config.cik.max_cu_per_sh = 4;
3626                         rdev->config.cik.max_backends_per_se = 1;
3627                 } else {
3628                         rdev->config.cik.max_cu_per_sh = 3;
3629                         rdev->config.cik.max_backends_per_se = 1;
3630                 }
3631                 rdev->config.cik.max_sh_per_se = 1;
3632                 rdev->config.cik.max_texture_channel_caches = 4;
3633                 rdev->config.cik.max_gprs = 256;
3634                 rdev->config.cik.max_gs_threads = 16;
3635                 rdev->config.cik.max_hw_contexts = 8;
3636
3637                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3638                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3639                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3640                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3641                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3642                 break;
3643         case CHIP_KABINI:
3644         case CHIP_MULLINS:
3645         default:
3646                 rdev->config.cik.max_shader_engines = 1;
3647                 rdev->config.cik.max_tile_pipes = 2;
3648                 rdev->config.cik.max_cu_per_sh = 2;
3649                 rdev->config.cik.max_sh_per_se = 1;
3650                 rdev->config.cik.max_backends_per_se = 1;
3651                 rdev->config.cik.max_texture_channel_caches = 2;
3652                 rdev->config.cik.max_gprs = 256;
3653                 rdev->config.cik.max_gs_threads = 16;
3654                 rdev->config.cik.max_hw_contexts = 8;
3655
3656                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3657                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3658                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3659                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3660                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3661                 break;
3662         }
3663
3664         /* Initialize HDP */
3665         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3666                 WREG32((0x2c14 + j), 0x00000000);
3667                 WREG32((0x2c18 + j), 0x00000000);
3668                 WREG32((0x2c1c + j), 0x00000000);
3669                 WREG32((0x2c20 + j), 0x00000000);
3670                 WREG32((0x2c24 + j), 0x00000000);
3671         }
3672
3673         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3674         WREG32(SRBM_INT_CNTL, 0x1);
3675         WREG32(SRBM_INT_ACK, 0x1);
3676
3677         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3678
3679         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3680         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3681
3682         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3683         rdev->config.cik.mem_max_burst_length_bytes = 256;
3684         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3685         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3686         if (rdev->config.cik.mem_row_size_in_kb > 4)
3687                 rdev->config.cik.mem_row_size_in_kb = 4;
3688         /* XXX use MC settings? */
3689         rdev->config.cik.shader_engine_tile_size = 32;
3690         rdev->config.cik.num_gpus = 1;
3691         rdev->config.cik.multi_gpu_tile_size = 64;
3692
3693         /* fix up row size */
3694         gb_addr_config &= ~ROW_SIZE_MASK;
3695         switch (rdev->config.cik.mem_row_size_in_kb) {
3696         case 1:
3697         default:
3698                 gb_addr_config |= ROW_SIZE(0);
3699                 break;
3700         case 2:
3701                 gb_addr_config |= ROW_SIZE(1);
3702                 break;
3703         case 4:
3704                 gb_addr_config |= ROW_SIZE(2);
3705                 break;
3706         }
3707
3708         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3709          * not have bank info, so create a custom tiling dword.
3710          * bits 3:0   num_pipes
3711          * bits 7:4   num_banks
3712          * bits 11:8  group_size
3713          * bits 15:12 row_size
3714          */
3715         rdev->config.cik.tile_config = 0;
3716         switch (rdev->config.cik.num_tile_pipes) {
3717         case 1:
3718                 rdev->config.cik.tile_config |= (0 << 0);
3719                 break;
3720         case 2:
3721                 rdev->config.cik.tile_config |= (1 << 0);
3722                 break;
3723         case 4:
3724                 rdev->config.cik.tile_config |= (2 << 0);
3725                 break;
3726         case 8:
3727         default:
3728                 /* XXX what about 12? */
3729                 rdev->config.cik.tile_config |= (3 << 0);
3730                 break;
3731         }
3732         rdev->config.cik.tile_config |=
3733                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3734         rdev->config.cik.tile_config |=
3735                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3736         rdev->config.cik.tile_config |=
3737                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3738
3739         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3740         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3741         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3742         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3743         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3744         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3745         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3746         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3747
3748         cik_tiling_mode_table_init(rdev);
3749
3750         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3751                      rdev->config.cik.max_sh_per_se,
3752                      rdev->config.cik.max_backends_per_se);
3753
3754         rdev->config.cik.active_cus = 0;
3755         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3756                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3757                         rdev->config.cik.active_cus +=
3758                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3759                 }
3760         }
3761
3762         /* set HW defaults for 3D engine */
3763         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3764
3765         mutex_lock(&rdev->grbm_idx_mutex);
3766         /*
3767          * making sure that the following register writes will be broadcasted
3768          * to all the shaders
3769          */
3770         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3771         WREG32(SX_DEBUG_1, 0x20);
3772
3773         WREG32(TA_CNTL_AUX, 0x00010000);
3774
3775         tmp = RREG32(SPI_CONFIG_CNTL);
3776         tmp |= 0x03000000;
3777         WREG32(SPI_CONFIG_CNTL, tmp);
3778
3779         WREG32(SQ_CONFIG, 1);
3780
3781         WREG32(DB_DEBUG, 0);
3782
3783         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3784         tmp |= 0x00000400;
3785         WREG32(DB_DEBUG2, tmp);
3786
3787         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3788         tmp |= 0x00020200;
3789         WREG32(DB_DEBUG3, tmp);
3790
3791         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3792         tmp |= 0x00018208;
3793         WREG32(CB_HW_CONTROL, tmp);
3794
3795         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3796
3797         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3798                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3799                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3800                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3801
3802         WREG32(VGT_NUM_INSTANCES, 1);
3803
3804         WREG32(CP_PERFMON_CNTL, 0);
3805
3806         WREG32(SQ_CONFIG, 0);
3807
3808         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3809                                           FORCE_EOV_MAX_REZ_CNT(255)));
3810
3811         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3812                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3813
3814         WREG32(VGT_GS_VERTEX_REUSE, 16);
3815         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3816
3817         tmp = RREG32(HDP_MISC_CNTL);
3818         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3819         WREG32(HDP_MISC_CNTL, tmp);
3820
3821         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3822         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3823
3824         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3825         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3826         mutex_unlock(&rdev->grbm_idx_mutex);
3827
3828         udelay(50);
3829 }
3830
3831 /*
3832  * GPU scratch registers helpers function.
3833  */
3834 /**
3835  * cik_scratch_init - setup driver info for CP scratch regs
3836  *
3837  * @rdev: radeon_device pointer
3838  *
3839  * Set up the number and offset of the CP scratch registers.
3840  * NOTE: use of CP scratch registers is a legacy inferface and
3841  * is not used by default on newer asics (r6xx+).  On newer asics,
3842  * memory buffers are used for fences rather than scratch regs.
3843  */
3844 static void cik_scratch_init(struct radeon_device *rdev)
3845 {
3846         int i;
3847
3848         rdev->scratch.num_reg = 7;
3849         rdev->scratch.reg_base = SCRATCH_REG0;
3850         for (i = 0; i < rdev->scratch.num_reg; i++) {
3851                 rdev->scratch.free[i] = true;
3852                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3853         }
3854 }
3855
3856 /**
3857  * cik_ring_test - basic gfx ring test
3858  *
3859  * @rdev: radeon_device pointer
3860  * @ring: radeon_ring structure holding ring information
3861  *
3862  * Allocate a scratch register and write to it using the gfx ring (CIK).
3863  * Provides a basic gfx ring test to verify that the ring is working.
3864  * Used by cik_cp_gfx_resume();
3865  * Returns 0 on success, error on failure.
3866  */
3867 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3868 {
3869         uint32_t scratch;
3870         uint32_t tmp = 0;
3871         unsigned i;
3872         int r;
3873
3874         r = radeon_scratch_get(rdev, &scratch);
3875         if (r) {
3876                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3877                 return r;
3878         }
3879         WREG32(scratch, 0xCAFEDEAD);
3880         r = radeon_ring_lock(rdev, ring, 3);
3881         if (r) {
3882                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3883                 radeon_scratch_free(rdev, scratch);
3884                 return r;
3885         }
3886         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3887         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3888         radeon_ring_write(ring, 0xDEADBEEF);
3889         radeon_ring_unlock_commit(rdev, ring, false);
3890
3891         for (i = 0; i < rdev->usec_timeout; i++) {
3892                 tmp = RREG32(scratch);
3893                 if (tmp == 0xDEADBEEF)
3894                         break;
3895                 DRM_UDELAY(1);
3896         }
3897         if (i < rdev->usec_timeout) {
3898                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3899         } else {
3900                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3901                           ring->idx, scratch, tmp);
3902                 r = -EINVAL;
3903         }
3904         radeon_scratch_free(rdev, scratch);
3905         return r;
3906 }
3907
3908 /**
3909  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3910  *
3911  * @rdev: radeon_device pointer
3912  * @ridx: radeon ring index
3913  *
3914  * Emits an hdp flush on the cp.
3915  */
3916 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3917                                        int ridx)
3918 {
3919         struct radeon_ring *ring = &rdev->ring[ridx];
3920         u32 ref_and_mask;
3921
3922         switch (ring->idx) {
3923         case CAYMAN_RING_TYPE_CP1_INDEX:
3924         case CAYMAN_RING_TYPE_CP2_INDEX:
3925         default:
3926                 switch (ring->me) {
3927                 case 0:
3928                         ref_and_mask = CP2 << ring->pipe;
3929                         break;
3930                 case 1:
3931                         ref_and_mask = CP6 << ring->pipe;
3932                         break;
3933                 default:
3934                         return;
3935                 }
3936                 break;
3937         case RADEON_RING_TYPE_GFX_INDEX:
3938                 ref_and_mask = CP0;
3939                 break;
3940         }
3941
3942         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3943         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3944                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3945                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3946         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3947         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3948         radeon_ring_write(ring, ref_and_mask);
3949         radeon_ring_write(ring, ref_and_mask);
3950         radeon_ring_write(ring, 0x20); /* poll interval */
3951 }
3952
3953 /**
3954  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3955  *
3956  * @rdev: radeon_device pointer
3957  * @fence: radeon fence object
3958  *
3959  * Emits a fence sequnce number on the gfx ring and flushes
3960  * GPU caches.
3961  */
3962 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3963                              struct radeon_fence *fence)
3964 {
3965         struct radeon_ring *ring = &rdev->ring[fence->ring];
3966         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3967
3968         /* Workaround for cache flush problems. First send a dummy EOP
3969          * event down the pipe with seq one below.
3970          */
3971         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3972         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3973                                  EOP_TC_ACTION_EN |
3974                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3975                                  EVENT_INDEX(5)));
3976         radeon_ring_write(ring, addr & 0xfffffffc);
3977         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3978                                 DATA_SEL(1) | INT_SEL(0));
3979         radeon_ring_write(ring, fence->seq - 1);
3980         radeon_ring_write(ring, 0);
3981
3982         /* Then send the real EOP event down the pipe. */
3983         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3984         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985                                  EOP_TC_ACTION_EN |
3986                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987                                  EVENT_INDEX(5)));
3988         radeon_ring_write(ring, addr & 0xfffffffc);
3989         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3990         radeon_ring_write(ring, fence->seq);
3991         radeon_ring_write(ring, 0);
3992 }
3993
3994 /**
3995  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3996  *
3997  * @rdev: radeon_device pointer
3998  * @fence: radeon fence object
3999  *
4000  * Emits a fence sequnce number on the compute ring and flushes
4001  * GPU caches.
4002  */
4003 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
4004                                  struct radeon_fence *fence)
4005 {
4006         struct radeon_ring *ring = &rdev->ring[fence->ring];
4007         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4008
4009         /* RELEASE_MEM - flush caches, send int */
4010         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4011         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4012                                  EOP_TC_ACTION_EN |
4013                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4014                                  EVENT_INDEX(5)));
4015         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4016         radeon_ring_write(ring, addr & 0xfffffffc);
4017         radeon_ring_write(ring, upper_32_bits(addr));
4018         radeon_ring_write(ring, fence->seq);
4019         radeon_ring_write(ring, 0);
4020 }
4021
4022 /**
4023  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4024  *
4025  * @rdev: radeon_device pointer
4026  * @ring: radeon ring buffer object
4027  * @semaphore: radeon semaphore object
4028  * @emit_wait: Is this a sempahore wait?
4029  *
4030  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4031  * from running ahead of semaphore waits.
4032  */
4033 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4034                              struct radeon_ring *ring,
4035                              struct radeon_semaphore *semaphore,
4036                              bool emit_wait)
4037 {
4038         uint64_t addr = semaphore->gpu_addr;
4039         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4040
4041         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4042         radeon_ring_write(ring, lower_32_bits(addr));
4043         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4044
4045         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4046                 /* Prevent the PFP from running ahead of the semaphore wait */
4047                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4048                 radeon_ring_write(ring, 0x0);
4049         }
4050
4051         return true;
4052 }
4053
4054 /**
4055  * cik_copy_cpdma - copy pages using the CP DMA engine
4056  *
4057  * @rdev: radeon_device pointer
4058  * @src_offset: src GPU address
4059  * @dst_offset: dst GPU address
4060  * @num_gpu_pages: number of GPU pages to xfer
4061  * @resv: reservation object to sync to
4062  *
4063  * Copy GPU paging using the CP DMA engine (CIK+).
4064  * Used by the radeon ttm implementation to move pages if
4065  * registered as the asic copy callback.
4066  */
4067 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4068                                     uint64_t src_offset, uint64_t dst_offset,
4069                                     unsigned num_gpu_pages,
4070                                     struct reservation_object *resv)
4071 {
4072         struct radeon_fence *fence;
4073         struct radeon_sync sync;
4074         int ring_index = rdev->asic->copy.blit_ring_index;
4075         struct radeon_ring *ring = &rdev->ring[ring_index];
4076         u32 size_in_bytes, cur_size_in_bytes, control;
4077         int i, num_loops;
4078         int r = 0;
4079
4080         radeon_sync_create(&sync);
4081
4082         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4083         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4084         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4085         if (r) {
4086                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4087                 radeon_sync_free(rdev, &sync, NULL);
4088                 return ERR_PTR(r);
4089         }
4090
4091         radeon_sync_resv(rdev, &sync, resv, false);
4092         radeon_sync_rings(rdev, &sync, ring->idx);
4093
4094         for (i = 0; i < num_loops; i++) {
4095                 cur_size_in_bytes = size_in_bytes;
4096                 if (cur_size_in_bytes > 0x1fffff)
4097                         cur_size_in_bytes = 0x1fffff;
4098                 size_in_bytes -= cur_size_in_bytes;
4099                 control = 0;
4100                 if (size_in_bytes == 0)
4101                         control |= PACKET3_DMA_DATA_CP_SYNC;
4102                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4103                 radeon_ring_write(ring, control);
4104                 radeon_ring_write(ring, lower_32_bits(src_offset));
4105                 radeon_ring_write(ring, upper_32_bits(src_offset));
4106                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4107                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4108                 radeon_ring_write(ring, cur_size_in_bytes);
4109                 src_offset += cur_size_in_bytes;
4110                 dst_offset += cur_size_in_bytes;
4111         }
4112
4113         r = radeon_fence_emit(rdev, &fence, ring->idx);
4114         if (r) {
4115                 radeon_ring_unlock_undo(rdev, ring);
4116                 radeon_sync_free(rdev, &sync, NULL);
4117                 return ERR_PTR(r);
4118         }
4119
4120         radeon_ring_unlock_commit(rdev, ring, false);
4121         radeon_sync_free(rdev, &sync, fence);
4122
4123         return fence;
4124 }
4125
4126 /*
4127  * IB stuff
4128  */
4129 /**
4130  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4131  *
4132  * @rdev: radeon_device pointer
4133  * @ib: radeon indirect buffer object
4134  *
4135  * Emits an DE (drawing engine) or CE (constant engine) IB
4136  * on the gfx ring.  IBs are usually generated by userspace
4137  * acceleration drivers and submitted to the kernel for
4138  * sheduling on the ring.  This function schedules the IB
4139  * on the gfx ring for execution by the GPU.
4140  */
4141 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4142 {
4143         struct radeon_ring *ring = &rdev->ring[ib->ring];
4144         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4145         u32 header, control = INDIRECT_BUFFER_VALID;
4146
4147         if (ib->is_const_ib) {
4148                 /* set switch buffer packet before const IB */
4149                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4150                 radeon_ring_write(ring, 0);
4151
4152                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4153         } else {
4154                 u32 next_rptr;
4155                 if (ring->rptr_save_reg) {
4156                         next_rptr = ring->wptr + 3 + 4;
4157                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4158                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4159                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4160                         radeon_ring_write(ring, next_rptr);
4161                 } else if (rdev->wb.enabled) {
4162                         next_rptr = ring->wptr + 5 + 4;
4163                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4164                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4165                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4166                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4167                         radeon_ring_write(ring, next_rptr);
4168                 }
4169
4170                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4171         }
4172
4173         control |= ib->length_dw | (vm_id << 24);
4174
4175         radeon_ring_write(ring, header);
4176         radeon_ring_write(ring,
4177 #ifdef __BIG_ENDIAN
4178                           (2 << 0) |
4179 #endif
4180                           (ib->gpu_addr & 0xFFFFFFFC));
4181         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4182         radeon_ring_write(ring, control);
4183 }
4184
4185 /**
4186  * cik_ib_test - basic gfx ring IB test
4187  *
4188  * @rdev: radeon_device pointer
4189  * @ring: radeon_ring structure holding ring information
4190  *
4191  * Allocate an IB and execute it on the gfx ring (CIK).
4192  * Provides a basic gfx ring test to verify that IBs are working.
4193  * Returns 0 on success, error on failure.
4194  */
4195 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4196 {
4197         struct radeon_ib ib;
4198         uint32_t scratch;
4199         uint32_t tmp = 0;
4200         unsigned i;
4201         int r;
4202
4203         r = radeon_scratch_get(rdev, &scratch);
4204         if (r) {
4205                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4206                 return r;
4207         }
4208         WREG32(scratch, 0xCAFEDEAD);
4209         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4210         if (r) {
4211                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4212                 radeon_scratch_free(rdev, scratch);
4213                 return r;
4214         }
4215         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4216         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4217         ib.ptr[2] = 0xDEADBEEF;
4218         ib.length_dw = 3;
4219         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4220         if (r) {
4221                 radeon_scratch_free(rdev, scratch);
4222                 radeon_ib_free(rdev, &ib);
4223                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4224                 return r;
4225         }
4226         r = radeon_fence_wait(ib.fence, false);
4227         if (r) {
4228                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4229                 radeon_scratch_free(rdev, scratch);
4230                 radeon_ib_free(rdev, &ib);
4231                 return r;
4232         }
4233         for (i = 0; i < rdev->usec_timeout; i++) {
4234                 tmp = RREG32(scratch);
4235                 if (tmp == 0xDEADBEEF)
4236                         break;
4237                 DRM_UDELAY(1);
4238         }
4239         if (i < rdev->usec_timeout) {
4240                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4241         } else {
4242                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4243                           scratch, tmp);
4244                 r = -EINVAL;
4245         }
4246         radeon_scratch_free(rdev, scratch);
4247         radeon_ib_free(rdev, &ib);
4248         return r;
4249 }
4250
4251 /*
4252  * CP.
4253  * On CIK, gfx and compute now have independant command processors.
4254  *
4255  * GFX
4256  * Gfx consists of a single ring and can process both gfx jobs and
4257  * compute jobs.  The gfx CP consists of three microengines (ME):
4258  * PFP - Pre-Fetch Parser
4259  * ME - Micro Engine
4260  * CE - Constant Engine
4261  * The PFP and ME make up what is considered the Drawing Engine (DE).
4262  * The CE is an asynchronous engine used for updating buffer desciptors
4263  * used by the DE so that they can be loaded into cache in parallel
4264  * while the DE is processing state update packets.
4265  *
4266  * Compute
4267  * The compute CP consists of two microengines (ME):
4268  * MEC1 - Compute MicroEngine 1
4269  * MEC2 - Compute MicroEngine 2
4270  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4271  * The queues are exposed to userspace and are programmed directly
4272  * by the compute runtime.
4273  */
4274 /**
4275  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4276  *
4277  * @rdev: radeon_device pointer
4278  * @enable: enable or disable the MEs
4279  *
4280  * Halts or unhalts the gfx MEs.
4281  */
4282 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4283 {
4284         if (enable)
4285                 WREG32(CP_ME_CNTL, 0);
4286         else {
4287                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4288                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4289                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4290                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4291         }
4292         udelay(50);
4293 }
4294
4295 /**
4296  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4297  *
4298  * @rdev: radeon_device pointer
4299  *
4300  * Loads the gfx PFP, ME, and CE ucode.
4301  * Returns 0 for success, -EINVAL if the ucode is not available.
4302  */
4303 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4304 {
4305         int i;
4306
4307         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4308                 return -EINVAL;
4309
4310         cik_cp_gfx_enable(rdev, false);
4311
4312         if (rdev->new_fw) {
4313                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4314                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4315                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4316                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4317                 const struct gfx_firmware_header_v1_0 *me_hdr =
4318                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4319                 const __le32 *fw_data;
4320                 u32 fw_size;
4321
4322                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4323                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4324                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4325
4326                 /* PFP */
4327                 fw_data = (const __le32 *)
4328                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4329                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4330                 WREG32(CP_PFP_UCODE_ADDR, 0);
4331                 for (i = 0; i < fw_size; i++)
4332                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4333                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4334
4335                 /* CE */
4336                 fw_data = (const __le32 *)
4337                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4338                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4339                 WREG32(CP_CE_UCODE_ADDR, 0);
4340                 for (i = 0; i < fw_size; i++)
4341                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4342                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4343
4344                 /* ME */
4345                 fw_data = (const __be32 *)
4346                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4347                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4348                 WREG32(CP_ME_RAM_WADDR, 0);
4349                 for (i = 0; i < fw_size; i++)
4350                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4351                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4352                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4353         } else {
4354                 const __be32 *fw_data;
4355
4356                 /* PFP */
4357                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4358                 WREG32(CP_PFP_UCODE_ADDR, 0);
4359                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4360                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4361                 WREG32(CP_PFP_UCODE_ADDR, 0);
4362
4363                 /* CE */
4364                 fw_data = (const __be32 *)rdev->ce_fw->data;
4365                 WREG32(CP_CE_UCODE_ADDR, 0);
4366                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4367                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4368                 WREG32(CP_CE_UCODE_ADDR, 0);
4369
4370                 /* ME */
4371                 fw_data = (const __be32 *)rdev->me_fw->data;
4372                 WREG32(CP_ME_RAM_WADDR, 0);
4373                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4374                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4375                 WREG32(CP_ME_RAM_WADDR, 0);
4376         }
4377
4378         return 0;
4379 }
4380
4381 /**
4382  * cik_cp_gfx_start - start the gfx ring
4383  *
4384  * @rdev: radeon_device pointer
4385  *
4386  * Enables the ring and loads the clear state context and other
4387  * packets required to init the ring.
4388  * Returns 0 for success, error for failure.
4389  */
4390 static int cik_cp_gfx_start(struct radeon_device *rdev)
4391 {
4392         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4393         int r, i;
4394
4395         /* init the CP */
4396         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4397         WREG32(CP_ENDIAN_SWAP, 0);
4398         WREG32(CP_DEVICE_ID, 1);
4399
4400         cik_cp_gfx_enable(rdev, true);
4401
4402         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4403         if (r) {
4404                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4405                 return r;
4406         }
4407
4408         /* init the CE partitions.  CE only used for gfx on CIK */
4409         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4410         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4411         radeon_ring_write(ring, 0x8000);
4412         radeon_ring_write(ring, 0x8000);
4413
4414         /* setup clear context state */
4415         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4416         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4417
4418         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4419         radeon_ring_write(ring, 0x80000000);
4420         radeon_ring_write(ring, 0x80000000);
4421
4422         for (i = 0; i < cik_default_size; i++)
4423                 radeon_ring_write(ring, cik_default_state[i]);
4424
4425         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4426         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4427
4428         /* set clear context state */
4429         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4430         radeon_ring_write(ring, 0);
4431
4432         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4433         radeon_ring_write(ring, 0x00000316);
4434         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4435         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4436
4437         radeon_ring_unlock_commit(rdev, ring, false);
4438
4439         return 0;
4440 }
4441
4442 /**
4443  * cik_cp_gfx_fini - stop the gfx ring
4444  *
4445  * @rdev: radeon_device pointer
4446  *
4447  * Stop the gfx ring and tear down the driver ring
4448  * info.
4449  */
4450 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4451 {
4452         cik_cp_gfx_enable(rdev, false);
4453         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4454 }
4455
4456 /**
4457  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4458  *
4459  * @rdev: radeon_device pointer
4460  *
4461  * Program the location and size of the gfx ring buffer
4462  * and test it to make sure it's working.
4463  * Returns 0 for success, error for failure.
4464  */
4465 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4466 {
4467         struct radeon_ring *ring;
4468         u32 tmp;
4469         u32 rb_bufsz;
4470         u64 rb_addr;
4471         int r;
4472
4473         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4474         if (rdev->family != CHIP_HAWAII)
4475                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4476
4477         /* Set the write pointer delay */
4478         WREG32(CP_RB_WPTR_DELAY, 0);
4479
4480         /* set the RB to use vmid 0 */
4481         WREG32(CP_RB_VMID, 0);
4482
4483         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4484
4485         /* ring 0 - compute and gfx */
4486         /* Set ring buffer size */
4487         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4488         rb_bufsz = order_base_2(ring->ring_size / 8);
4489         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4490 #ifdef __BIG_ENDIAN
4491         tmp |= BUF_SWAP_32BIT;
4492 #endif
4493         WREG32(CP_RB0_CNTL, tmp);
4494
4495         /* Initialize the ring buffer's read and write pointers */
4496         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4497         ring->wptr = 0;
4498         WREG32(CP_RB0_WPTR, ring->wptr);
4499
4500         /* set the wb address wether it's enabled or not */
4501         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4502         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4503
4504         /* scratch register shadowing is no longer supported */
4505         WREG32(SCRATCH_UMSK, 0);
4506
4507         if (!rdev->wb.enabled)
4508                 tmp |= RB_NO_UPDATE;
4509
4510         mdelay(1);
4511         WREG32(CP_RB0_CNTL, tmp);
4512
4513         rb_addr = ring->gpu_addr >> 8;
4514         WREG32(CP_RB0_BASE, rb_addr);
4515         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4516
4517         /* start the ring */
4518         cik_cp_gfx_start(rdev);
4519         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4520         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4521         if (r) {
4522                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4523                 return r;
4524         }
4525
4526         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4527                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4528
4529         return 0;
4530 }
4531
4532 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4533                      struct radeon_ring *ring)
4534 {
4535         u32 rptr;
4536
4537         if (rdev->wb.enabled)
4538                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4539         else
4540                 rptr = RREG32(CP_RB0_RPTR);
4541
4542         return rptr;
4543 }
4544
4545 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4546                      struct radeon_ring *ring)
4547 {
4548         u32 wptr;
4549
4550         wptr = RREG32(CP_RB0_WPTR);
4551
4552         return wptr;
4553 }
4554
4555 void cik_gfx_set_wptr(struct radeon_device *rdev,
4556                       struct radeon_ring *ring)
4557 {
4558         WREG32(CP_RB0_WPTR, ring->wptr);
4559         (void)RREG32(CP_RB0_WPTR);
4560 }
4561
4562 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4563                          struct radeon_ring *ring)
4564 {
4565         u32 rptr;
4566
4567         if (rdev->wb.enabled) {
4568                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4569         } else {
4570                 mutex_lock(&rdev->srbm_mutex);
4571                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4572                 rptr = RREG32(CP_HQD_PQ_RPTR);
4573                 cik_srbm_select(rdev, 0, 0, 0, 0);
4574                 mutex_unlock(&rdev->srbm_mutex);
4575         }
4576
4577         return rptr;
4578 }
4579
4580 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4581                          struct radeon_ring *ring)
4582 {
4583         u32 wptr;
4584
4585         if (rdev->wb.enabled) {
4586                 /* XXX check if swapping is necessary on BE */
4587                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4588         } else {
4589                 mutex_lock(&rdev->srbm_mutex);
4590                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4591                 wptr = RREG32(CP_HQD_PQ_WPTR);
4592                 cik_srbm_select(rdev, 0, 0, 0, 0);
4593                 mutex_unlock(&rdev->srbm_mutex);
4594         }
4595
4596         return wptr;
4597 }
4598
4599 void cik_compute_set_wptr(struct radeon_device *rdev,
4600                           struct radeon_ring *ring)
4601 {
4602         /* XXX check if swapping is necessary on BE */
4603         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4604         WDOORBELL32(ring->doorbell_index, ring->wptr);
4605 }
4606
4607 static void cik_compute_stop(struct radeon_device *rdev,
4608                              struct radeon_ring *ring)
4609 {
4610         u32 j, tmp;
4611
4612         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4613         /* Disable wptr polling. */
4614         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4615         tmp &= ~WPTR_POLL_EN;
4616         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4617         /* Disable HQD. */
4618         if (RREG32(CP_HQD_ACTIVE) & 1) {
4619                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4620                 for (j = 0; j < rdev->usec_timeout; j++) {
4621                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4622                                 break;
4623                         udelay(1);
4624                 }
4625                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4626                 WREG32(CP_HQD_PQ_RPTR, 0);
4627                 WREG32(CP_HQD_PQ_WPTR, 0);
4628         }
4629         cik_srbm_select(rdev, 0, 0, 0, 0);
4630 }
4631
4632 /**
4633  * cik_cp_compute_enable - enable/disable the compute CP MEs
4634  *
4635  * @rdev: radeon_device pointer
4636  * @enable: enable or disable the MEs
4637  *
4638  * Halts or unhalts the compute MEs.
4639  */
4640 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4641 {
4642         if (enable)
4643                 WREG32(CP_MEC_CNTL, 0);
4644         else {
4645                 /*
4646                  * To make hibernation reliable we need to clear compute ring
4647                  * configuration before halting the compute ring.
4648                  */
4649                 mutex_lock(&rdev->srbm_mutex);
4650                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4651                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4652                 mutex_unlock(&rdev->srbm_mutex);
4653
4654                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4655                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4656                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4657         }
4658         udelay(50);
4659 }
4660
4661 /**
4662  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4663  *
4664  * @rdev: radeon_device pointer
4665  *
4666  * Loads the compute MEC1&2 ucode.
4667  * Returns 0 for success, -EINVAL if the ucode is not available.
4668  */
4669 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4670 {
4671         int i;
4672
4673         if (!rdev->mec_fw)
4674                 return -EINVAL;
4675
4676         cik_cp_compute_enable(rdev, false);
4677
4678         if (rdev->new_fw) {
4679                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4680                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4681                 const __le32 *fw_data;
4682                 u32 fw_size;
4683
4684                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4685
4686                 /* MEC1 */
4687                 fw_data = (const __le32 *)
4688                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4689                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4690                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4691                 for (i = 0; i < fw_size; i++)
4692                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4693                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4694
4695                 /* MEC2 */
4696                 if (rdev->family == CHIP_KAVERI) {
4697                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4698                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4699
4700                         fw_data = (const __le32 *)
4701                                 (rdev->mec2_fw->data +
4702                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4703                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4704                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4705                         for (i = 0; i < fw_size; i++)
4706                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4707                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4708                 }
4709         } else {
4710                 const __be32 *fw_data;
4711
4712                 /* MEC1 */
4713                 fw_data = (const __be32 *)rdev->mec_fw->data;
4714                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4715                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4716                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4717                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4718
4719                 if (rdev->family == CHIP_KAVERI) {
4720                         /* MEC2 */
4721                         fw_data = (const __be32 *)rdev->mec_fw->data;
4722                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4723                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4724                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4725                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4726                 }
4727         }
4728
4729         return 0;
4730 }
4731
4732 /**
4733  * cik_cp_compute_start - start the compute queues
4734  *
4735  * @rdev: radeon_device pointer
4736  *
4737  * Enable the compute queues.
4738  * Returns 0 for success, error for failure.
4739  */
4740 static int cik_cp_compute_start(struct radeon_device *rdev)
4741 {
4742         cik_cp_compute_enable(rdev, true);
4743
4744         return 0;
4745 }
4746
4747 /**
4748  * cik_cp_compute_fini - stop the compute queues
4749  *
4750  * @rdev: radeon_device pointer
4751  *
4752  * Stop the compute queues and tear down the driver queue
4753  * info.
4754  */
4755 static void cik_cp_compute_fini(struct radeon_device *rdev)
4756 {
4757         int i, idx, r;
4758
4759         cik_cp_compute_enable(rdev, false);
4760
4761         for (i = 0; i < 2; i++) {
4762                 if (i == 0)
4763                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4764                 else
4765                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4766
4767                 if (rdev->ring[idx].mqd_obj) {
4768                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4769                         if (unlikely(r != 0))
4770                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4771
4772                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4773                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4774
4775                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4776                         rdev->ring[idx].mqd_obj = NULL;
4777                 }
4778         }
4779 }
4780
4781 static void cik_mec_fini(struct radeon_device *rdev)
4782 {
4783         int r;
4784
4785         if (rdev->mec.hpd_eop_obj) {
4786                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4787                 if (unlikely(r != 0))
4788                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4789                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4790                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4791
4792                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4793                 rdev->mec.hpd_eop_obj = NULL;
4794         }
4795 }
4796
4797 #define MEC_HPD_SIZE 2048
4798
4799 static int cik_mec_init(struct radeon_device *rdev)
4800 {
4801         int r;
4802         u32 *hpd;
4803
4804         /*
4805          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4806          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4807          * Nonetheless, we assign only 1 pipe because all other pipes will
4808          * be handled by KFD
4809          */
4810         rdev->mec.num_mec = 1;
4811         rdev->mec.num_pipe = 1;
4812         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4813
4814         if (rdev->mec.hpd_eop_obj == NULL) {
4815                 r = radeon_bo_create(rdev,
4816                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4817                                      PAGE_SIZE, true,
4818                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4819                                      &rdev->mec.hpd_eop_obj);
4820                 if (r) {
4821                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4822                         return r;
4823                 }
4824         }
4825
4826         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4827         if (unlikely(r != 0)) {
4828                 cik_mec_fini(rdev);
4829                 return r;
4830         }
4831         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4832                           &rdev->mec.hpd_eop_gpu_addr);
4833         if (r) {
4834                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4835                 cik_mec_fini(rdev);
4836                 return r;
4837         }
4838         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4839         if (r) {
4840                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4841                 cik_mec_fini(rdev);
4842                 return r;
4843         }
4844
4845         /* clear memory.  Not sure if this is required or not */
4846         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4847
4848         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4849         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4850
4851         return 0;
4852 }
4853
4854 struct hqd_registers
4855 {
4856         u32 cp_mqd_base_addr;
4857         u32 cp_mqd_base_addr_hi;
4858         u32 cp_hqd_active;
4859         u32 cp_hqd_vmid;
4860         u32 cp_hqd_persistent_state;
4861         u32 cp_hqd_pipe_priority;
4862         u32 cp_hqd_queue_priority;
4863         u32 cp_hqd_quantum;
4864         u32 cp_hqd_pq_base;
4865         u32 cp_hqd_pq_base_hi;
4866         u32 cp_hqd_pq_rptr;
4867         u32 cp_hqd_pq_rptr_report_addr;
4868         u32 cp_hqd_pq_rptr_report_addr_hi;
4869         u32 cp_hqd_pq_wptr_poll_addr;
4870         u32 cp_hqd_pq_wptr_poll_addr_hi;
4871         u32 cp_hqd_pq_doorbell_control;
4872         u32 cp_hqd_pq_wptr;
4873         u32 cp_hqd_pq_control;
4874         u32 cp_hqd_ib_base_addr;
4875         u32 cp_hqd_ib_base_addr_hi;
4876         u32 cp_hqd_ib_rptr;
4877         u32 cp_hqd_ib_control;
4878         u32 cp_hqd_iq_timer;
4879         u32 cp_hqd_iq_rptr;
4880         u32 cp_hqd_dequeue_request;
4881         u32 cp_hqd_dma_offload;
4882         u32 cp_hqd_sema_cmd;
4883         u32 cp_hqd_msg_type;
4884         u32 cp_hqd_atomic0_preop_lo;
4885         u32 cp_hqd_atomic0_preop_hi;
4886         u32 cp_hqd_atomic1_preop_lo;
4887         u32 cp_hqd_atomic1_preop_hi;
4888         u32 cp_hqd_hq_scheduler0;
4889         u32 cp_hqd_hq_scheduler1;
4890         u32 cp_mqd_control;
4891 };
4892
4893 struct bonaire_mqd
4894 {
4895         u32 header;
4896         u32 dispatch_initiator;
4897         u32 dimensions[3];
4898         u32 start_idx[3];
4899         u32 num_threads[3];
4900         u32 pipeline_stat_enable;
4901         u32 perf_counter_enable;
4902         u32 pgm[2];
4903         u32 tba[2];
4904         u32 tma[2];
4905         u32 pgm_rsrc[2];
4906         u32 vmid;
4907         u32 resource_limits;
4908         u32 static_thread_mgmt01[2];
4909         u32 tmp_ring_size;
4910         u32 static_thread_mgmt23[2];
4911         u32 restart[3];
4912         u32 thread_trace_enable;
4913         u32 reserved1;
4914         u32 user_data[16];
4915         u32 vgtcs_invoke_count[2];
4916         struct hqd_registers queue_state;
4917         u32 dequeue_cntr;
4918         u32 interrupt_queue[64];
4919 };
4920
4921 /**
4922  * cik_cp_compute_resume - setup the compute queue registers
4923  *
4924  * @rdev: radeon_device pointer
4925  *
4926  * Program the compute queues and test them to make sure they
4927  * are working.
4928  * Returns 0 for success, error for failure.
4929  */
4930 static int cik_cp_compute_resume(struct radeon_device *rdev)
4931 {
4932         int r, i, j, idx;
4933         u32 tmp;
4934         bool use_doorbell = true;
4935         u64 hqd_gpu_addr;
4936         u64 mqd_gpu_addr;
4937         u64 eop_gpu_addr;
4938         u64 wb_gpu_addr;
4939         u32 *buf;
4940         struct bonaire_mqd *mqd;
4941
4942         r = cik_cp_compute_start(rdev);
4943         if (r)
4944                 return r;
4945
4946         /* fix up chicken bits */
4947         tmp = RREG32(CP_CPF_DEBUG);
4948         tmp |= (1 << 23);
4949         WREG32(CP_CPF_DEBUG, tmp);
4950
4951         /* init the pipes */
4952         mutex_lock(&rdev->srbm_mutex);
4953
4954         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4955
4956         cik_srbm_select(rdev, 0, 0, 0, 0);
4957
4958         /* write the EOP addr */
4959         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4960         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4961
4962         /* set the VMID assigned */
4963         WREG32(CP_HPD_EOP_VMID, 0);
4964
4965         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4966         tmp = RREG32(CP_HPD_EOP_CONTROL);
4967         tmp &= ~EOP_SIZE_MASK;
4968         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4969         WREG32(CP_HPD_EOP_CONTROL, tmp);
4970
4971         mutex_unlock(&rdev->srbm_mutex);
4972
4973         /* init the queues.  Just two for now. */
4974         for (i = 0; i < 2; i++) {
4975                 if (i == 0)
4976                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4977                 else
4978                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4979
4980                 if (rdev->ring[idx].mqd_obj == NULL) {
4981                         r = radeon_bo_create(rdev,
4982                                              sizeof(struct bonaire_mqd),
4983                                              PAGE_SIZE, true,
4984                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4985                                              NULL, &rdev->ring[idx].mqd_obj);
4986                         if (r) {
4987                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4988                                 return r;
4989                         }
4990                 }
4991
4992                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4993                 if (unlikely(r != 0)) {
4994                         cik_cp_compute_fini(rdev);
4995                         return r;
4996                 }
4997                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4998                                   &mqd_gpu_addr);
4999                 if (r) {
5000                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
5001                         cik_cp_compute_fini(rdev);
5002                         return r;
5003                 }
5004                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
5005                 if (r) {
5006                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
5007                         cik_cp_compute_fini(rdev);
5008                         return r;
5009                 }
5010
5011                 /* init the mqd struct */
5012                 memset(buf, 0, sizeof(struct bonaire_mqd));
5013
5014                 mqd = (struct bonaire_mqd *)buf;
5015                 mqd->header = 0xC0310800;
5016                 mqd->static_thread_mgmt01[0] = 0xffffffff;
5017                 mqd->static_thread_mgmt01[1] = 0xffffffff;
5018                 mqd->static_thread_mgmt23[0] = 0xffffffff;
5019                 mqd->static_thread_mgmt23[1] = 0xffffffff;
5020
5021                 mutex_lock(&rdev->srbm_mutex);
5022                 cik_srbm_select(rdev, rdev->ring[idx].me,
5023                                 rdev->ring[idx].pipe,
5024                                 rdev->ring[idx].queue, 0);
5025
5026                 /* disable wptr polling */
5027                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5028                 tmp &= ~WPTR_POLL_EN;
5029                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5030
5031                 /* enable doorbell? */
5032                 mqd->queue_state.cp_hqd_pq_doorbell_control =
5033                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5034                 if (use_doorbell)
5035                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5036                 else
5037                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5038                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5039                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5040
5041                 /* disable the queue if it's active */
5042                 mqd->queue_state.cp_hqd_dequeue_request = 0;
5043                 mqd->queue_state.cp_hqd_pq_rptr = 0;
5044                 mqd->queue_state.cp_hqd_pq_wptr= 0;
5045                 if (RREG32(CP_HQD_ACTIVE) & 1) {
5046                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5047                         for (j = 0; j < rdev->usec_timeout; j++) {
5048                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
5049                                         break;
5050                                 udelay(1);
5051                         }
5052                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5053                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5054                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5055                 }
5056
5057                 /* set the pointer to the MQD */
5058                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5059                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5060                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5061                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5062                 /* set MQD vmid to 0 */
5063                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5064                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5065                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5066
5067                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5068                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5069                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5070                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5071                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5072                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5073
5074                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5075                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5076                 mqd->queue_state.cp_hqd_pq_control &=
5077                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5078
5079                 mqd->queue_state.cp_hqd_pq_control |=
5080                         order_base_2(rdev->ring[idx].ring_size / 8);
5081                 mqd->queue_state.cp_hqd_pq_control |=
5082                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5083 #ifdef __BIG_ENDIAN
5084                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5085 #endif
5086                 mqd->queue_state.cp_hqd_pq_control &=
5087                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5088                 mqd->queue_state.cp_hqd_pq_control |=
5089                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5090                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5091
5092                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5093                 if (i == 0)
5094                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5095                 else
5096                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5097                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5098                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5099                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5100                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5101                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5102
5103                 /* set the wb address wether it's enabled or not */
5104                 if (i == 0)
5105                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5106                 else
5107                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5108                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5109                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5110                         upper_32_bits(wb_gpu_addr) & 0xffff;
5111                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5112                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5113                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5114                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5115
5116                 /* enable the doorbell if requested */
5117                 if (use_doorbell) {
5118                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5119                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5120                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5121                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5122                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5123                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5124                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5125                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5126
5127                 } else {
5128                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5129                 }
5130                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5131                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5132
5133                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5134                 rdev->ring[idx].wptr = 0;
5135                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5136                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5137                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5138
5139                 /* set the vmid for the queue */
5140                 mqd->queue_state.cp_hqd_vmid = 0;
5141                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5142
5143                 /* activate the queue */
5144                 mqd->queue_state.cp_hqd_active = 1;
5145                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5146
5147                 cik_srbm_select(rdev, 0, 0, 0, 0);
5148                 mutex_unlock(&rdev->srbm_mutex);
5149
5150                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5151                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5152
5153                 rdev->ring[idx].ready = true;
5154                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5155                 if (r)
5156                         rdev->ring[idx].ready = false;
5157         }
5158
5159         return 0;
5160 }
5161
5162 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5163 {
5164         cik_cp_gfx_enable(rdev, enable);
5165         cik_cp_compute_enable(rdev, enable);
5166 }
5167
5168 static int cik_cp_load_microcode(struct radeon_device *rdev)
5169 {
5170         int r;
5171
5172         r = cik_cp_gfx_load_microcode(rdev);
5173         if (r)
5174                 return r;
5175         r = cik_cp_compute_load_microcode(rdev);
5176         if (r)
5177                 return r;
5178
5179         return 0;
5180 }
5181
5182 static void cik_cp_fini(struct radeon_device *rdev)
5183 {
5184         cik_cp_gfx_fini(rdev);
5185         cik_cp_compute_fini(rdev);
5186 }
5187
5188 static int cik_cp_resume(struct radeon_device *rdev)
5189 {
5190         int r;
5191
5192         cik_enable_gui_idle_interrupt(rdev, false);
5193
5194         r = cik_cp_load_microcode(rdev);
5195         if (r)
5196                 return r;
5197
5198         r = cik_cp_gfx_resume(rdev);
5199         if (r)
5200                 return r;
5201         r = cik_cp_compute_resume(rdev);
5202         if (r)
5203                 return r;
5204
5205         cik_enable_gui_idle_interrupt(rdev, true);
5206
5207         return 0;
5208 }
5209
5210 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5211 {
5212         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5213                 RREG32(GRBM_STATUS));
5214         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5215                 RREG32(GRBM_STATUS2));
5216         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5217                 RREG32(GRBM_STATUS_SE0));
5218         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5219                 RREG32(GRBM_STATUS_SE1));
5220         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5221                 RREG32(GRBM_STATUS_SE2));
5222         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5223                 RREG32(GRBM_STATUS_SE3));
5224         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5225                 RREG32(SRBM_STATUS));
5226         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5227                 RREG32(SRBM_STATUS2));
5228         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5229                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5230         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5231                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5232         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5233         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5234                  RREG32(CP_STALLED_STAT1));
5235         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5236                  RREG32(CP_STALLED_STAT2));
5237         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5238                  RREG32(CP_STALLED_STAT3));
5239         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5240                  RREG32(CP_CPF_BUSY_STAT));
5241         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5242                  RREG32(CP_CPF_STALLED_STAT1));
5243         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5244         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5245         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5246                  RREG32(CP_CPC_STALLED_STAT1));
5247         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5248 }
5249
5250 /**
5251  * cik_gpu_check_soft_reset - check which blocks are busy
5252  *
5253  * @rdev: radeon_device pointer
5254  *
5255  * Check which blocks are busy and return the relevant reset
5256  * mask to be used by cik_gpu_soft_reset().
5257  * Returns a mask of the blocks to be reset.
5258  */
5259 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5260 {
5261         u32 reset_mask = 0;
5262         u32 tmp;
5263
5264         /* GRBM_STATUS */
5265         tmp = RREG32(GRBM_STATUS);
5266         if (tmp & (PA_BUSY | SC_BUSY |
5267                    BCI_BUSY | SX_BUSY |
5268                    TA_BUSY | VGT_BUSY |
5269                    DB_BUSY | CB_BUSY |
5270                    GDS_BUSY | SPI_BUSY |
5271                    IA_BUSY | IA_BUSY_NO_DMA))
5272                 reset_mask |= RADEON_RESET_GFX;
5273
5274         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5275                 reset_mask |= RADEON_RESET_CP;
5276
5277         /* GRBM_STATUS2 */
5278         tmp = RREG32(GRBM_STATUS2);
5279         if (tmp & RLC_BUSY)
5280                 reset_mask |= RADEON_RESET_RLC;
5281
5282         /* SDMA0_STATUS_REG */
5283         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5284         if (!(tmp & SDMA_IDLE))
5285                 reset_mask |= RADEON_RESET_DMA;
5286
5287         /* SDMA1_STATUS_REG */
5288         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5289         if (!(tmp & SDMA_IDLE))
5290                 reset_mask |= RADEON_RESET_DMA1;
5291
5292         /* SRBM_STATUS2 */
5293         tmp = RREG32(SRBM_STATUS2);
5294         if (tmp & SDMA_BUSY)
5295                 reset_mask |= RADEON_RESET_DMA;
5296
5297         if (tmp & SDMA1_BUSY)
5298                 reset_mask |= RADEON_RESET_DMA1;
5299
5300         /* SRBM_STATUS */
5301         tmp = RREG32(SRBM_STATUS);
5302
5303         if (tmp & IH_BUSY)
5304                 reset_mask |= RADEON_RESET_IH;
5305
5306         if (tmp & SEM_BUSY)
5307                 reset_mask |= RADEON_RESET_SEM;
5308
5309         if (tmp & GRBM_RQ_PENDING)
5310                 reset_mask |= RADEON_RESET_GRBM;
5311
5312         if (tmp & VMC_BUSY)
5313                 reset_mask |= RADEON_RESET_VMC;
5314
5315         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5316                    MCC_BUSY | MCD_BUSY))
5317                 reset_mask |= RADEON_RESET_MC;
5318
5319         if (evergreen_is_display_hung(rdev))
5320                 reset_mask |= RADEON_RESET_DISPLAY;
5321
5322         /* Skip MC reset as it's mostly likely not hung, just busy */
5323         if (reset_mask & RADEON_RESET_MC) {
5324                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5325                 reset_mask &= ~RADEON_RESET_MC;
5326         }
5327
5328         return reset_mask;
5329 }
5330
5331 /**
5332  * cik_gpu_soft_reset - soft reset GPU
5333  *
5334  * @rdev: radeon_device pointer
5335  * @reset_mask: mask of which blocks to reset
5336  *
5337  * Soft reset the blocks specified in @reset_mask.
5338  */
5339 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5340 {
5341         struct evergreen_mc_save save;
5342         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343         u32 tmp;
5344
5345         if (reset_mask == 0)
5346                 return;
5347
5348         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5349
5350         cik_print_gpu_status_regs(rdev);
5351         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5352                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5353         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5354                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5355
5356         /* disable CG/PG */
5357         cik_fini_pg(rdev);
5358         cik_fini_cg(rdev);
5359
5360         /* stop the rlc */
5361         cik_rlc_stop(rdev);
5362
5363         /* Disable GFX parsing/prefetching */
5364         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5365
5366         /* Disable MEC parsing/prefetching */
5367         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5368
5369         if (reset_mask & RADEON_RESET_DMA) {
5370                 /* sdma0 */
5371                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5372                 tmp |= SDMA_HALT;
5373                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5374         }
5375         if (reset_mask & RADEON_RESET_DMA1) {
5376                 /* sdma1 */
5377                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5378                 tmp |= SDMA_HALT;
5379                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5380         }
5381
5382         evergreen_mc_stop(rdev, &save);
5383         if (evergreen_mc_wait_for_idle(rdev)) {
5384                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5385         }
5386
5387         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5388                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5389
5390         if (reset_mask & RADEON_RESET_CP) {
5391                 grbm_soft_reset |= SOFT_RESET_CP;
5392
5393                 srbm_soft_reset |= SOFT_RESET_GRBM;
5394         }
5395
5396         if (reset_mask & RADEON_RESET_DMA)
5397                 srbm_soft_reset |= SOFT_RESET_SDMA;
5398
5399         if (reset_mask & RADEON_RESET_DMA1)
5400                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5401
5402         if (reset_mask & RADEON_RESET_DISPLAY)
5403                 srbm_soft_reset |= SOFT_RESET_DC;
5404
5405         if (reset_mask & RADEON_RESET_RLC)
5406                 grbm_soft_reset |= SOFT_RESET_RLC;
5407
5408         if (reset_mask & RADEON_RESET_SEM)
5409                 srbm_soft_reset |= SOFT_RESET_SEM;
5410
5411         if (reset_mask & RADEON_RESET_IH)
5412                 srbm_soft_reset |= SOFT_RESET_IH;
5413
5414         if (reset_mask & RADEON_RESET_GRBM)
5415                 srbm_soft_reset |= SOFT_RESET_GRBM;
5416
5417         if (reset_mask & RADEON_RESET_VMC)
5418                 srbm_soft_reset |= SOFT_RESET_VMC;
5419
5420         if (!(rdev->flags & RADEON_IS_IGP)) {
5421                 if (reset_mask & RADEON_RESET_MC)
5422                         srbm_soft_reset |= SOFT_RESET_MC;
5423         }
5424
5425         if (grbm_soft_reset) {
5426                 tmp = RREG32(GRBM_SOFT_RESET);
5427                 tmp |= grbm_soft_reset;
5428                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5429                 WREG32(GRBM_SOFT_RESET, tmp);
5430                 tmp = RREG32(GRBM_SOFT_RESET);
5431
5432                 udelay(50);
5433
5434                 tmp &= ~grbm_soft_reset;
5435                 WREG32(GRBM_SOFT_RESET, tmp);
5436                 tmp = RREG32(GRBM_SOFT_RESET);
5437         }
5438
5439         if (srbm_soft_reset) {
5440                 tmp = RREG32(SRBM_SOFT_RESET);
5441                 tmp |= srbm_soft_reset;
5442                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5443                 WREG32(SRBM_SOFT_RESET, tmp);
5444                 tmp = RREG32(SRBM_SOFT_RESET);
5445
5446                 udelay(50);
5447
5448                 tmp &= ~srbm_soft_reset;
5449                 WREG32(SRBM_SOFT_RESET, tmp);
5450                 tmp = RREG32(SRBM_SOFT_RESET);
5451         }
5452
5453         /* Wait a little for things to settle down */
5454         udelay(50);
5455
5456         evergreen_mc_resume(rdev, &save);
5457         udelay(50);
5458
5459         cik_print_gpu_status_regs(rdev);
5460 }
5461
5462 struct kv_reset_save_regs {
5463         u32 gmcon_reng_execute;
5464         u32 gmcon_misc;
5465         u32 gmcon_misc3;
5466 };
5467
5468 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5469                                    struct kv_reset_save_regs *save)
5470 {
5471         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5472         save->gmcon_misc = RREG32(GMCON_MISC);
5473         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5474
5475         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5476         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5477                                                 STCTRL_STUTTER_EN));
5478 }
5479
5480 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5481                                       struct kv_reset_save_regs *save)
5482 {
5483         int i;
5484
5485         WREG32(GMCON_PGFSM_WRITE, 0);
5486         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5487
5488         for (i = 0; i < 5; i++)
5489                 WREG32(GMCON_PGFSM_WRITE, 0);
5490
5491         WREG32(GMCON_PGFSM_WRITE, 0);
5492         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5493
5494         for (i = 0; i < 5; i++)
5495                 WREG32(GMCON_PGFSM_WRITE, 0);
5496
5497         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5498         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5499
5500         for (i = 0; i < 5; i++)
5501                 WREG32(GMCON_PGFSM_WRITE, 0);
5502
5503         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5504         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5505
5506         for (i = 0; i < 5; i++)
5507                 WREG32(GMCON_PGFSM_WRITE, 0);
5508
5509         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5510         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5511
5512         for (i = 0; i < 5; i++)
5513                 WREG32(GMCON_PGFSM_WRITE, 0);
5514
5515         WREG32(GMCON_PGFSM_WRITE, 0);
5516         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5517
5518         for (i = 0; i < 5; i++)
5519                 WREG32(GMCON_PGFSM_WRITE, 0);
5520
5521         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5522         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5523
5524         for (i = 0; i < 5; i++)
5525                 WREG32(GMCON_PGFSM_WRITE, 0);
5526
5527         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5528         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5529
5530         for (i = 0; i < 5; i++)
5531                 WREG32(GMCON_PGFSM_WRITE, 0);
5532
5533         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5534         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5535
5536         for (i = 0; i < 5; i++)
5537                 WREG32(GMCON_PGFSM_WRITE, 0);
5538
5539         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5540         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5541
5542         for (i = 0; i < 5; i++)
5543                 WREG32(GMCON_PGFSM_WRITE, 0);
5544
5545         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5546         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5547
5548         WREG32(GMCON_MISC3, save->gmcon_misc3);
5549         WREG32(GMCON_MISC, save->gmcon_misc);
5550         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5551 }
5552
5553 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5554 {
5555         struct evergreen_mc_save save;
5556         struct kv_reset_save_regs kv_save = { 0 };
5557         u32 tmp, i;
5558
5559         dev_info(rdev->dev, "GPU pci config reset\n");
5560
5561         /* disable dpm? */
5562
5563         /* disable cg/pg */
5564         cik_fini_pg(rdev);
5565         cik_fini_cg(rdev);
5566
5567         /* Disable GFX parsing/prefetching */
5568         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5569
5570         /* Disable MEC parsing/prefetching */
5571         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5572
5573         /* sdma0 */
5574         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5575         tmp |= SDMA_HALT;
5576         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5577         /* sdma1 */
5578         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5579         tmp |= SDMA_HALT;
5580         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5581         /* XXX other engines? */
5582
5583         /* halt the rlc, disable cp internal ints */
5584         cik_rlc_stop(rdev);
5585
5586         udelay(50);
5587
5588         /* disable mem access */
5589         evergreen_mc_stop(rdev, &save);
5590         if (evergreen_mc_wait_for_idle(rdev)) {
5591                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5592         }
5593
5594         if (rdev->flags & RADEON_IS_IGP)
5595                 kv_save_regs_for_reset(rdev, &kv_save);
5596
5597         /* disable BM */
5598         pci_clear_master(rdev->pdev);
5599         /* reset */
5600         radeon_pci_config_reset(rdev);
5601
5602         udelay(100);
5603
5604         /* wait for asic to come out of reset */
5605         for (i = 0; i < rdev->usec_timeout; i++) {
5606                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5607                         break;
5608                 udelay(1);
5609         }
5610
5611         /* does asic init need to be run first??? */
5612         if (rdev->flags & RADEON_IS_IGP)
5613                 kv_restore_regs_for_reset(rdev, &kv_save);
5614 }
5615
5616 /**
5617  * cik_asic_reset - soft reset GPU
5618  *
5619  * @rdev: radeon_device pointer
5620  *
5621  * Look up which blocks are hung and attempt
5622  * to reset them.
5623  * Returns 0 for success.
5624  */
5625 int cik_asic_reset(struct radeon_device *rdev)
5626 {
5627         u32 reset_mask;
5628
5629         reset_mask = cik_gpu_check_soft_reset(rdev);
5630
5631         if (reset_mask)
5632                 r600_set_bios_scratch_engine_hung(rdev, true);
5633
5634         /* try soft reset */
5635         cik_gpu_soft_reset(rdev, reset_mask);
5636
5637         reset_mask = cik_gpu_check_soft_reset(rdev);
5638
5639         /* try pci config reset */
5640         if (reset_mask && radeon_hard_reset)
5641                 cik_gpu_pci_config_reset(rdev);
5642
5643         reset_mask = cik_gpu_check_soft_reset(rdev);
5644
5645         if (!reset_mask)
5646                 r600_set_bios_scratch_engine_hung(rdev, false);
5647
5648         return 0;
5649 }
5650
5651 /**
5652  * cik_gfx_is_lockup - check if the 3D engine is locked up
5653  *
5654  * @rdev: radeon_device pointer
5655  * @ring: radeon_ring structure holding ring information
5656  *
5657  * Check if the 3D engine is locked up (CIK).
5658  * Returns true if the engine is locked, false if not.
5659  */
5660 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5661 {
5662         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5663
5664         if (!(reset_mask & (RADEON_RESET_GFX |
5665                             RADEON_RESET_COMPUTE |
5666                             RADEON_RESET_CP))) {
5667                 radeon_ring_lockup_update(rdev, ring);
5668                 return false;
5669         }
5670         return radeon_ring_test_lockup(rdev, ring);
5671 }
5672
5673 /* MC */
5674 /**
5675  * cik_mc_program - program the GPU memory controller
5676  *
5677  * @rdev: radeon_device pointer
5678  *
5679  * Set the location of vram, gart, and AGP in the GPU's
5680  * physical address space (CIK).
5681  */
5682 static void cik_mc_program(struct radeon_device *rdev)
5683 {
5684         struct evergreen_mc_save save;
5685         u32 tmp;
5686         int i, j;
5687
5688         /* Initialize HDP */
5689         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5690                 WREG32((0x2c14 + j), 0x00000000);
5691                 WREG32((0x2c18 + j), 0x00000000);
5692                 WREG32((0x2c1c + j), 0x00000000);
5693                 WREG32((0x2c20 + j), 0x00000000);
5694                 WREG32((0x2c24 + j), 0x00000000);
5695         }
5696         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5697
5698         evergreen_mc_stop(rdev, &save);
5699         if (radeon_mc_wait_for_idle(rdev)) {
5700                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5701         }
5702         /* Lockout access through VGA aperture*/
5703         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5704         /* Update configuration */
5705         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5706                rdev->mc.vram_start >> 12);
5707         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5708                rdev->mc.vram_end >> 12);
5709         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5710                rdev->vram_scratch.gpu_addr >> 12);
5711         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5712         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5713         WREG32(MC_VM_FB_LOCATION, tmp);
5714         /* XXX double check these! */
5715         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5716         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5717         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5718         WREG32(MC_VM_AGP_BASE, 0);
5719         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5720         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5721         if (radeon_mc_wait_for_idle(rdev)) {
5722                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5723         }
5724         evergreen_mc_resume(rdev, &save);
5725         /* we need to own VRAM, so turn off the VGA renderer here
5726          * to stop it overwriting our objects */
5727         rv515_vga_render_disable(rdev);
5728 }
5729
5730 /**
5731  * cik_mc_init - initialize the memory controller driver params
5732  *
5733  * @rdev: radeon_device pointer
5734  *
5735  * Look up the amount of vram, vram width, and decide how to place
5736  * vram and gart within the GPU's physical address space (CIK).
5737  * Returns 0 for success.
5738  */
5739 static int cik_mc_init(struct radeon_device *rdev)
5740 {
5741         u32 tmp;
5742         int chansize, numchan;
5743
5744         /* Get VRAM informations */
5745         rdev->mc.vram_is_ddr = true;
5746         tmp = RREG32(MC_ARB_RAMCFG);
5747         if (tmp & CHANSIZE_MASK) {
5748                 chansize = 64;
5749         } else {
5750                 chansize = 32;
5751         }
5752         tmp = RREG32(MC_SHARED_CHMAP);
5753         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5754         case 0:
5755         default:
5756                 numchan = 1;
5757                 break;
5758         case 1:
5759                 numchan = 2;
5760                 break;
5761         case 2:
5762                 numchan = 4;
5763                 break;
5764         case 3:
5765                 numchan = 8;
5766                 break;
5767         case 4:
5768                 numchan = 3;
5769                 break;
5770         case 5:
5771                 numchan = 6;
5772                 break;
5773         case 6:
5774                 numchan = 10;
5775                 break;
5776         case 7:
5777                 numchan = 12;
5778                 break;
5779         case 8:
5780                 numchan = 16;
5781                 break;
5782         }
5783         rdev->mc.vram_width = numchan * chansize;
5784         /* Could aper size report 0 ? */
5785         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5786         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5787         /* size in MB on si */
5788         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5789         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5790         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5791         si_vram_gtt_location(rdev, &rdev->mc);
5792         radeon_update_bandwidth_info(rdev);
5793
5794         return 0;
5795 }
5796
5797 /*
5798  * GART
5799  * VMID 0 is the physical GPU addresses as used by the kernel.
5800  * VMIDs 1-15 are used for userspace clients and are handled
5801  * by the radeon vm/hsa code.
5802  */
5803 /**
5804  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5805  *
5806  * @rdev: radeon_device pointer
5807  *
5808  * Flush the TLB for the VMID 0 page table (CIK).
5809  */
5810 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5811 {
5812         /* flush hdp cache */
5813         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5814
5815         /* bits 0-15 are the VM contexts0-15 */
5816         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5817 }
5818
5819 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5820 {
5821         int i;
5822         uint32_t sh_mem_bases, sh_mem_config;
5823
5824         sh_mem_bases = 0x6000 | 0x6000 << 16;
5825         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5826         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5827
5828         mutex_lock(&rdev->srbm_mutex);
5829         for (i = 8; i < 16; i++) {
5830                 cik_srbm_select(rdev, 0, 0, 0, i);
5831                 /* CP and shaders */
5832                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5833                 WREG32(SH_MEM_APE1_BASE, 1);
5834                 WREG32(SH_MEM_APE1_LIMIT, 0);
5835                 WREG32(SH_MEM_BASES, sh_mem_bases);
5836         }
5837         cik_srbm_select(rdev, 0, 0, 0, 0);
5838         mutex_unlock(&rdev->srbm_mutex);
5839 }
5840
5841 /**
5842  * cik_pcie_gart_enable - gart enable
5843  *
5844  * @rdev: radeon_device pointer
5845  *
5846  * This sets up the TLBs, programs the page tables for VMID0,
5847  * sets up the hw for VMIDs 1-15 which are allocated on
5848  * demand, and sets up the global locations for the LDS, GDS,
5849  * and GPUVM for FSA64 clients (CIK).
5850  * Returns 0 for success, errors for failure.
5851  */
5852 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5853 {
5854         int r, i;
5855
5856         if (rdev->gart.robj == NULL) {
5857                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5858                 return -EINVAL;
5859         }
5860         r = radeon_gart_table_vram_pin(rdev);
5861         if (r)
5862                 return r;
5863         /* Setup TLB control */
5864         WREG32(MC_VM_MX_L1_TLB_CNTL,
5865                (0xA << 7) |
5866                ENABLE_L1_TLB |
5867                ENABLE_L1_FRAGMENT_PROCESSING |
5868                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5869                ENABLE_ADVANCED_DRIVER_MODEL |
5870                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5871         /* Setup L2 cache */
5872         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5873                ENABLE_L2_FRAGMENT_PROCESSING |
5874                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5875                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5876                EFFECTIVE_L2_QUEUE_SIZE(7) |
5877                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5878         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5879         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5880                BANK_SELECT(4) |
5881                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5882         /* setup context0 */
5883         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5884         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5885         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5886         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5887                         (u32)(rdev->dummy_page.addr >> 12));
5888         WREG32(VM_CONTEXT0_CNTL2, 0);
5889         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5890                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5891
5892         WREG32(0x15D4, 0);
5893         WREG32(0x15D8, 0);
5894         WREG32(0x15DC, 0);
5895
5896         /* restore context1-15 */
5897         /* set vm size, must be a multiple of 4 */
5898         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5899         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5900         for (i = 1; i < 16; i++) {
5901                 if (i < 8)
5902                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5903                                rdev->vm_manager.saved_table_addr[i]);
5904                 else
5905                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5906                                rdev->vm_manager.saved_table_addr[i]);
5907         }
5908
5909         /* enable context1-15 */
5910         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5911                (u32)(rdev->dummy_page.addr >> 12));
5912         WREG32(VM_CONTEXT1_CNTL2, 4);
5913         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5914                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5915                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5916                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5917                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5918                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5919                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5920                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5921                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5922                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5923                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5924                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5925                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5926                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5927
5928         if (rdev->family == CHIP_KAVERI) {
5929                 u32 tmp = RREG32(CHUB_CONTROL);
5930                 tmp &= ~BYPASS_VM;
5931                 WREG32(CHUB_CONTROL, tmp);
5932         }
5933
5934         /* XXX SH_MEM regs */
5935         /* where to put LDS, scratch, GPUVM in FSA64 space */
5936         mutex_lock(&rdev->srbm_mutex);
5937         for (i = 0; i < 16; i++) {
5938                 cik_srbm_select(rdev, 0, 0, 0, i);
5939                 /* CP and shaders */
5940                 WREG32(SH_MEM_CONFIG, 0);
5941                 WREG32(SH_MEM_APE1_BASE, 1);
5942                 WREG32(SH_MEM_APE1_LIMIT, 0);
5943                 WREG32(SH_MEM_BASES, 0);
5944                 /* SDMA GFX */
5945                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5946                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5947                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5948                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5949                 /* XXX SDMA RLC - todo */
5950         }
5951         cik_srbm_select(rdev, 0, 0, 0, 0);
5952         mutex_unlock(&rdev->srbm_mutex);
5953
5954         cik_pcie_init_compute_vmid(rdev);
5955
5956         cik_pcie_gart_tlb_flush(rdev);
5957         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5958                  (unsigned)(rdev->mc.gtt_size >> 20),
5959                  (unsigned long long)rdev->gart.table_addr);
5960         rdev->gart.ready = true;
5961         return 0;
5962 }
5963
5964 /**
5965  * cik_pcie_gart_disable - gart disable
5966  *
5967  * @rdev: radeon_device pointer
5968  *
5969  * This disables all VM page table (CIK).
5970  */
5971 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5972 {
5973         unsigned i;
5974
5975         for (i = 1; i < 16; ++i) {
5976                 uint32_t reg;
5977                 if (i < 8)
5978                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5979                 else
5980                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5981                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5982         }
5983
5984         /* Disable all tables */
5985         WREG32(VM_CONTEXT0_CNTL, 0);
5986         WREG32(VM_CONTEXT1_CNTL, 0);
5987         /* Setup TLB control */
5988         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5989                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5990         /* Setup L2 cache */
5991         WREG32(VM_L2_CNTL,
5992                ENABLE_L2_FRAGMENT_PROCESSING |
5993                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5994                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5995                EFFECTIVE_L2_QUEUE_SIZE(7) |
5996                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5997         WREG32(VM_L2_CNTL2, 0);
5998         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5999                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
6000         radeon_gart_table_vram_unpin(rdev);
6001 }
6002
6003 /**
6004  * cik_pcie_gart_fini - vm fini callback
6005  *
6006  * @rdev: radeon_device pointer
6007  *
6008  * Tears down the driver GART/VM setup (CIK).
6009  */
6010 static void cik_pcie_gart_fini(struct radeon_device *rdev)
6011 {
6012         cik_pcie_gart_disable(rdev);
6013         radeon_gart_table_vram_free(rdev);
6014         radeon_gart_fini(rdev);
6015 }
6016
6017 /* vm parser */
6018 /**
6019  * cik_ib_parse - vm ib_parse callback
6020  *
6021  * @rdev: radeon_device pointer
6022  * @ib: indirect buffer pointer
6023  *
6024  * CIK uses hw IB checking so this is a nop (CIK).
6025  */
6026 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6027 {
6028         return 0;
6029 }
6030
6031 /*
6032  * vm
6033  * VMID 0 is the physical GPU addresses as used by the kernel.
6034  * VMIDs 1-15 are used for userspace clients and are handled
6035  * by the radeon vm/hsa code.
6036  */
6037 /**
6038  * cik_vm_init - cik vm init callback
6039  *
6040  * @rdev: radeon_device pointer
6041  *
6042  * Inits cik specific vm parameters (number of VMs, base of vram for
6043  * VMIDs 1-15) (CIK).
6044  * Returns 0 for success.
6045  */
6046 int cik_vm_init(struct radeon_device *rdev)
6047 {
6048         /*
6049          * number of VMs
6050          * VMID 0 is reserved for System
6051          * radeon graphics/compute will use VMIDs 1-7
6052          * amdkfd will use VMIDs 8-15
6053          */
6054         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6055         /* base offset of vram pages */
6056         if (rdev->flags & RADEON_IS_IGP) {
6057                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
6058                 tmp <<= 22;
6059                 rdev->vm_manager.vram_base_offset = tmp;
6060         } else
6061                 rdev->vm_manager.vram_base_offset = 0;
6062
6063         return 0;
6064 }
6065
6066 /**
6067  * cik_vm_fini - cik vm fini callback
6068  *
6069  * @rdev: radeon_device pointer
6070  *
6071  * Tear down any asic specific VM setup (CIK).
6072  */
6073 void cik_vm_fini(struct radeon_device *rdev)
6074 {
6075 }
6076
6077 /**
6078  * cik_vm_decode_fault - print human readable fault info
6079  *
6080  * @rdev: radeon_device pointer
6081  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6082  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6083  *
6084  * Print human readable fault information (CIK).
6085  */
6086 static void cik_vm_decode_fault(struct radeon_device *rdev,
6087                                 u32 status, u32 addr, u32 mc_client)
6088 {
6089         u32 mc_id;
6090         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6091         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6092         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6093                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6094
6095         if (rdev->family == CHIP_HAWAII)
6096                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6097         else
6098                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6099
6100         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6101                protections, vmid, addr,
6102                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6103                block, mc_client, mc_id);
6104 }
6105
6106 /**
6107  * cik_vm_flush - cik vm flush using the CP
6108  *
6109  * @rdev: radeon_device pointer
6110  *
6111  * Update the page table base and flush the VM TLB
6112  * using the CP (CIK).
6113  */
6114 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6115                   unsigned vm_id, uint64_t pd_addr)
6116 {
6117         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6118
6119         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6120         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6121                                  WRITE_DATA_DST_SEL(0)));
6122         if (vm_id < 8) {
6123                 radeon_ring_write(ring,
6124                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6125         } else {
6126                 radeon_ring_write(ring,
6127                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6128         }
6129         radeon_ring_write(ring, 0);
6130         radeon_ring_write(ring, pd_addr >> 12);
6131
6132         /* update SH_MEM_* regs */
6133         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6134         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6135                                  WRITE_DATA_DST_SEL(0)));
6136         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6137         radeon_ring_write(ring, 0);
6138         radeon_ring_write(ring, VMID(vm_id));
6139
6140         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6141         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6142                                  WRITE_DATA_DST_SEL(0)));
6143         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6144         radeon_ring_write(ring, 0);
6145
6146         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6147         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6148         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6149         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6150
6151         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6152         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6153                                  WRITE_DATA_DST_SEL(0)));
6154         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6155         radeon_ring_write(ring, 0);
6156         radeon_ring_write(ring, VMID(0));
6157
6158         /* HDP flush */
6159         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6160
6161         /* bits 0-15 are the VM contexts0-15 */
6162         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6163         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6164                                  WRITE_DATA_DST_SEL(0)));
6165         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6166         radeon_ring_write(ring, 0);
6167         radeon_ring_write(ring, 1 << vm_id);
6168
6169         /* wait for the invalidate to complete */
6170         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6171         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6172                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6173                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6174         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6175         radeon_ring_write(ring, 0);
6176         radeon_ring_write(ring, 0); /* ref */
6177         radeon_ring_write(ring, 0); /* mask */
6178         radeon_ring_write(ring, 0x20); /* poll interval */
6179
6180         /* compute doesn't have PFP */
6181         if (usepfp) {
6182                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6183                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6184                 radeon_ring_write(ring, 0x0);
6185         }
6186 }
6187
6188 /*
6189  * RLC
6190  * The RLC is a multi-purpose microengine that handles a
6191  * variety of functions, the most important of which is
6192  * the interrupt controller.
6193  */
6194 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6195                                           bool enable)
6196 {
6197         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6198
6199         if (enable)
6200                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6201         else
6202                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6203         WREG32(CP_INT_CNTL_RING0, tmp);
6204 }
6205
6206 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6207 {
6208         u32 tmp;
6209
6210         tmp = RREG32(RLC_LB_CNTL);
6211         if (enable)
6212                 tmp |= LOAD_BALANCE_ENABLE;
6213         else
6214                 tmp &= ~LOAD_BALANCE_ENABLE;
6215         WREG32(RLC_LB_CNTL, tmp);
6216 }
6217
6218 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6219 {
6220         u32 i, j, k;
6221         u32 mask;
6222
6223         mutex_lock(&rdev->grbm_idx_mutex);
6224         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6225                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6226                         cik_select_se_sh(rdev, i, j);
6227                         for (k = 0; k < rdev->usec_timeout; k++) {
6228                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6229                                         break;
6230                                 udelay(1);
6231                         }
6232                 }
6233         }
6234         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6235         mutex_unlock(&rdev->grbm_idx_mutex);
6236
6237         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6238         for (k = 0; k < rdev->usec_timeout; k++) {
6239                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6240                         break;
6241                 udelay(1);
6242         }
6243 }
6244
6245 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6246 {
6247         u32 tmp;
6248
6249         tmp = RREG32(RLC_CNTL);
6250         if (tmp != rlc)
6251                 WREG32(RLC_CNTL, rlc);
6252 }
6253
6254 static u32 cik_halt_rlc(struct radeon_device *rdev)
6255 {
6256         u32 data, orig;
6257
6258         orig = data = RREG32(RLC_CNTL);
6259
6260         if (data & RLC_ENABLE) {
6261                 u32 i;
6262
6263                 data &= ~RLC_ENABLE;
6264                 WREG32(RLC_CNTL, data);
6265
6266                 for (i = 0; i < rdev->usec_timeout; i++) {
6267                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6268                                 break;
6269                         udelay(1);
6270                 }
6271
6272                 cik_wait_for_rlc_serdes(rdev);
6273         }
6274
6275         return orig;
6276 }
6277
6278 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6279 {
6280         u32 tmp, i, mask;
6281
6282         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6283         WREG32(RLC_GPR_REG2, tmp);
6284
6285         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6286         for (i = 0; i < rdev->usec_timeout; i++) {
6287                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6288                         break;
6289                 udelay(1);
6290         }
6291
6292         for (i = 0; i < rdev->usec_timeout; i++) {
6293                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6294                         break;
6295                 udelay(1);
6296         }
6297 }
6298
6299 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6300 {
6301         u32 tmp;
6302
6303         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6304         WREG32(RLC_GPR_REG2, tmp);
6305 }
6306
6307 /**
6308  * cik_rlc_stop - stop the RLC ME
6309  *
6310  * @rdev: radeon_device pointer
6311  *
6312  * Halt the RLC ME (MicroEngine) (CIK).
6313  */
6314 static void cik_rlc_stop(struct radeon_device *rdev)
6315 {
6316         WREG32(RLC_CNTL, 0);
6317
6318         cik_enable_gui_idle_interrupt(rdev, false);
6319
6320         cik_wait_for_rlc_serdes(rdev);
6321 }
6322
6323 /**
6324  * cik_rlc_start - start the RLC ME
6325  *
6326  * @rdev: radeon_device pointer
6327  *
6328  * Unhalt the RLC ME (MicroEngine) (CIK).
6329  */
6330 static void cik_rlc_start(struct radeon_device *rdev)
6331 {
6332         WREG32(RLC_CNTL, RLC_ENABLE);
6333
6334         cik_enable_gui_idle_interrupt(rdev, true);
6335
6336         udelay(50);
6337 }
6338
6339 /**
6340  * cik_rlc_resume - setup the RLC hw
6341  *
6342  * @rdev: radeon_device pointer
6343  *
6344  * Initialize the RLC registers, load the ucode,
6345  * and start the RLC (CIK).
6346  * Returns 0 for success, -EINVAL if the ucode is not available.
6347  */
6348 static int cik_rlc_resume(struct radeon_device *rdev)
6349 {
6350         u32 i, size, tmp;
6351
6352         if (!rdev->rlc_fw)
6353                 return -EINVAL;
6354
6355         cik_rlc_stop(rdev);
6356
6357         /* disable CG */
6358         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6359         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6360
6361         si_rlc_reset(rdev);
6362
6363         cik_init_pg(rdev);
6364
6365         cik_init_cg(rdev);
6366
6367         WREG32(RLC_LB_CNTR_INIT, 0);
6368         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6369
6370         mutex_lock(&rdev->grbm_idx_mutex);
6371         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6372         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6373         WREG32(RLC_LB_PARAMS, 0x00600408);
6374         WREG32(RLC_LB_CNTL, 0x80000004);
6375         mutex_unlock(&rdev->grbm_idx_mutex);
6376
6377         WREG32(RLC_MC_CNTL, 0);
6378         WREG32(RLC_UCODE_CNTL, 0);
6379
6380         if (rdev->new_fw) {
6381                 const struct rlc_firmware_header_v1_0 *hdr =
6382                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6383                 const __le32 *fw_data = (const __le32 *)
6384                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6385
6386                 radeon_ucode_print_rlc_hdr(&hdr->header);
6387
6388                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6389                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6390                 for (i = 0; i < size; i++)
6391                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6392                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6393         } else {
6394                 const __be32 *fw_data;
6395
6396                 switch (rdev->family) {
6397                 case CHIP_BONAIRE:
6398                 case CHIP_HAWAII:
6399                 default:
6400                         size = BONAIRE_RLC_UCODE_SIZE;
6401                         break;
6402                 case CHIP_KAVERI:
6403                         size = KV_RLC_UCODE_SIZE;
6404                         break;
6405                 case CHIP_KABINI:
6406                         size = KB_RLC_UCODE_SIZE;
6407                         break;
6408                 case CHIP_MULLINS:
6409                         size = ML_RLC_UCODE_SIZE;
6410                         break;
6411                 }
6412
6413                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6414                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6415                 for (i = 0; i < size; i++)
6416                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6417                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6418         }
6419
6420         /* XXX - find out what chips support lbpw */
6421         cik_enable_lbpw(rdev, false);
6422
6423         if (rdev->family == CHIP_BONAIRE)
6424                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6425
6426         cik_rlc_start(rdev);
6427
6428         return 0;
6429 }
6430
6431 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6432 {
6433         u32 data, orig, tmp, tmp2;
6434
6435         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6436
6437         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6438                 cik_enable_gui_idle_interrupt(rdev, true);
6439
6440                 tmp = cik_halt_rlc(rdev);
6441
6442                 mutex_lock(&rdev->grbm_idx_mutex);
6443                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6444                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6445                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6446                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6447                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6448                 mutex_unlock(&rdev->grbm_idx_mutex);
6449
6450                 cik_update_rlc(rdev, tmp);
6451
6452                 data |= CGCG_EN | CGLS_EN;
6453         } else {
6454                 cik_enable_gui_idle_interrupt(rdev, false);
6455
6456                 RREG32(CB_CGTT_SCLK_CTRL);
6457                 RREG32(CB_CGTT_SCLK_CTRL);
6458                 RREG32(CB_CGTT_SCLK_CTRL);
6459                 RREG32(CB_CGTT_SCLK_CTRL);
6460
6461                 data &= ~(CGCG_EN | CGLS_EN);
6462         }
6463
6464         if (orig != data)
6465                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6466
6467 }
6468
6469 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6470 {
6471         u32 data, orig, tmp = 0;
6472
6473         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6474                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6475                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6476                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6477                                 data |= CP_MEM_LS_EN;
6478                                 if (orig != data)
6479                                         WREG32(CP_MEM_SLP_CNTL, data);
6480                         }
6481                 }
6482
6483                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6484                 data |= 0x00000001;
6485                 data &= 0xfffffffd;
6486                 if (orig != data)
6487                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6488
6489                 tmp = cik_halt_rlc(rdev);
6490
6491                 mutex_lock(&rdev->grbm_idx_mutex);
6492                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6493                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6494                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6495                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6496                 WREG32(RLC_SERDES_WR_CTRL, data);
6497                 mutex_unlock(&rdev->grbm_idx_mutex);
6498
6499                 cik_update_rlc(rdev, tmp);
6500
6501                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6502                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6503                         data &= ~SM_MODE_MASK;
6504                         data |= SM_MODE(0x2);
6505                         data |= SM_MODE_ENABLE;
6506                         data &= ~CGTS_OVERRIDE;
6507                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6508                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6509                                 data &= ~CGTS_LS_OVERRIDE;
6510                         data &= ~ON_MONITOR_ADD_MASK;
6511                         data |= ON_MONITOR_ADD_EN;
6512                         data |= ON_MONITOR_ADD(0x96);
6513                         if (orig != data)
6514                                 WREG32(CGTS_SM_CTRL_REG, data);
6515                 }
6516         } else {
6517                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6518                 data |= 0x00000003;
6519                 if (orig != data)
6520                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6521
6522                 data = RREG32(RLC_MEM_SLP_CNTL);
6523                 if (data & RLC_MEM_LS_EN) {
6524                         data &= ~RLC_MEM_LS_EN;
6525                         WREG32(RLC_MEM_SLP_CNTL, data);
6526                 }
6527
6528                 data = RREG32(CP_MEM_SLP_CNTL);
6529                 if (data & CP_MEM_LS_EN) {
6530                         data &= ~CP_MEM_LS_EN;
6531                         WREG32(CP_MEM_SLP_CNTL, data);
6532                 }
6533
6534                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6535                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6536                 if (orig != data)
6537                         WREG32(CGTS_SM_CTRL_REG, data);
6538
6539                 tmp = cik_halt_rlc(rdev);
6540
6541                 mutex_lock(&rdev->grbm_idx_mutex);
6542                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6543                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6544                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6545                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6546                 WREG32(RLC_SERDES_WR_CTRL, data);
6547                 mutex_unlock(&rdev->grbm_idx_mutex);
6548
6549                 cik_update_rlc(rdev, tmp);
6550         }
6551 }
6552
6553 static const u32 mc_cg_registers[] =
6554 {
6555         MC_HUB_MISC_HUB_CG,
6556         MC_HUB_MISC_SIP_CG,
6557         MC_HUB_MISC_VM_CG,
6558         MC_XPB_CLK_GAT,
6559         ATC_MISC_CG,
6560         MC_CITF_MISC_WR_CG,
6561         MC_CITF_MISC_RD_CG,
6562         MC_CITF_MISC_VM_CG,
6563         VM_L2_CG,
6564 };
6565
6566 static void cik_enable_mc_ls(struct radeon_device *rdev,
6567                              bool enable)
6568 {
6569         int i;
6570         u32 orig, data;
6571
6572         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6573                 orig = data = RREG32(mc_cg_registers[i]);
6574                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6575                         data |= MC_LS_ENABLE;
6576                 else
6577                         data &= ~MC_LS_ENABLE;
6578                 if (data != orig)
6579                         WREG32(mc_cg_registers[i], data);
6580         }
6581 }
6582
6583 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6584                                bool enable)
6585 {
6586         int i;
6587         u32 orig, data;
6588
6589         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6590                 orig = data = RREG32(mc_cg_registers[i]);
6591                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6592                         data |= MC_CG_ENABLE;
6593                 else
6594                         data &= ~MC_CG_ENABLE;
6595                 if (data != orig)
6596                         WREG32(mc_cg_registers[i], data);
6597         }
6598 }
6599
6600 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6601                                  bool enable)
6602 {
6603         u32 orig, data;
6604
6605         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6606                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6607                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6608         } else {
6609                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6610                 data |= 0xff000000;
6611                 if (data != orig)
6612                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6613
6614                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6615                 data |= 0xff000000;
6616                 if (data != orig)
6617                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6618         }
6619 }
6620
6621 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6622                                  bool enable)
6623 {
6624         u32 orig, data;
6625
6626         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6627                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6628                 data |= 0x100;
6629                 if (orig != data)
6630                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6631
6632                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6633                 data |= 0x100;
6634                 if (orig != data)
6635                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6636         } else {
6637                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6638                 data &= ~0x100;
6639                 if (orig != data)
6640                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6641
6642                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6643                 data &= ~0x100;
6644                 if (orig != data)
6645                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6646         }
6647 }
6648
6649 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6650                                 bool enable)
6651 {
6652         u32 orig, data;
6653
6654         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6655                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6656                 data = 0xfff;
6657                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6658
6659                 orig = data = RREG32(UVD_CGC_CTRL);
6660                 data |= DCM;
6661                 if (orig != data)
6662                         WREG32(UVD_CGC_CTRL, data);
6663         } else {
6664                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6665                 data &= ~0xfff;
6666                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6667
6668                 orig = data = RREG32(UVD_CGC_CTRL);
6669                 data &= ~DCM;
6670                 if (orig != data)
6671                         WREG32(UVD_CGC_CTRL, data);
6672         }
6673 }
6674
6675 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6676                                bool enable)
6677 {
6678         u32 orig, data;
6679
6680         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6681
6682         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6683                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6684                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6685         else
6686                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6687                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6688
6689         if (orig != data)
6690                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6691 }
6692
6693 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6694                                 bool enable)
6695 {
6696         u32 orig, data;
6697
6698         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6699
6700         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6701                 data &= ~CLOCK_GATING_DIS;
6702         else
6703                 data |= CLOCK_GATING_DIS;
6704
6705         if (orig != data)
6706                 WREG32(HDP_HOST_PATH_CNTL, data);
6707 }
6708
6709 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6710                               bool enable)
6711 {
6712         u32 orig, data;
6713
6714         orig = data = RREG32(HDP_MEM_POWER_LS);
6715
6716         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6717                 data |= HDP_LS_ENABLE;
6718         else
6719                 data &= ~HDP_LS_ENABLE;
6720
6721         if (orig != data)
6722                 WREG32(HDP_MEM_POWER_LS, data);
6723 }
6724
6725 void cik_update_cg(struct radeon_device *rdev,
6726                    u32 block, bool enable)
6727 {
6728
6729         if (block & RADEON_CG_BLOCK_GFX) {
6730                 cik_enable_gui_idle_interrupt(rdev, false);
6731                 /* order matters! */
6732                 if (enable) {
6733                         cik_enable_mgcg(rdev, true);
6734                         cik_enable_cgcg(rdev, true);
6735                 } else {
6736                         cik_enable_cgcg(rdev, false);
6737                         cik_enable_mgcg(rdev, false);
6738                 }
6739                 cik_enable_gui_idle_interrupt(rdev, true);
6740         }
6741
6742         if (block & RADEON_CG_BLOCK_MC) {
6743                 if (!(rdev->flags & RADEON_IS_IGP)) {
6744                         cik_enable_mc_mgcg(rdev, enable);
6745                         cik_enable_mc_ls(rdev, enable);
6746                 }
6747         }
6748
6749         if (block & RADEON_CG_BLOCK_SDMA) {
6750                 cik_enable_sdma_mgcg(rdev, enable);
6751                 cik_enable_sdma_mgls(rdev, enable);
6752         }
6753
6754         if (block & RADEON_CG_BLOCK_BIF) {
6755                 cik_enable_bif_mgls(rdev, enable);
6756         }
6757
6758         if (block & RADEON_CG_BLOCK_UVD) {
6759                 if (rdev->has_uvd)
6760                         cik_enable_uvd_mgcg(rdev, enable);
6761         }
6762
6763         if (block & RADEON_CG_BLOCK_HDP) {
6764                 cik_enable_hdp_mgcg(rdev, enable);
6765                 cik_enable_hdp_ls(rdev, enable);
6766         }
6767
6768         if (block & RADEON_CG_BLOCK_VCE) {
6769                 vce_v2_0_enable_mgcg(rdev, enable);
6770         }
6771 }
6772
6773 static void cik_init_cg(struct radeon_device *rdev)
6774 {
6775
6776         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6777
6778         if (rdev->has_uvd)
6779                 si_init_uvd_internal_cg(rdev);
6780
6781         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6782                              RADEON_CG_BLOCK_SDMA |
6783                              RADEON_CG_BLOCK_BIF |
6784                              RADEON_CG_BLOCK_UVD |
6785                              RADEON_CG_BLOCK_HDP), true);
6786 }
6787
6788 static void cik_fini_cg(struct radeon_device *rdev)
6789 {
6790         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6791                              RADEON_CG_BLOCK_SDMA |
6792                              RADEON_CG_BLOCK_BIF |
6793                              RADEON_CG_BLOCK_UVD |
6794                              RADEON_CG_BLOCK_HDP), false);
6795
6796         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6797 }
6798
6799 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6800                                           bool enable)
6801 {
6802         u32 data, orig;
6803
6804         orig = data = RREG32(RLC_PG_CNTL);
6805         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6806                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6807         else
6808                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6809         if (orig != data)
6810                 WREG32(RLC_PG_CNTL, data);
6811 }
6812
6813 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6814                                           bool enable)
6815 {
6816         u32 data, orig;
6817
6818         orig = data = RREG32(RLC_PG_CNTL);
6819         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6820                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6821         else
6822                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6823         if (orig != data)
6824                 WREG32(RLC_PG_CNTL, data);
6825 }
6826
6827 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6828 {
6829         u32 data, orig;
6830
6831         orig = data = RREG32(RLC_PG_CNTL);
6832         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6833                 data &= ~DISABLE_CP_PG;
6834         else
6835                 data |= DISABLE_CP_PG;
6836         if (orig != data)
6837                 WREG32(RLC_PG_CNTL, data);
6838 }
6839
6840 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6841 {
6842         u32 data, orig;
6843
6844         orig = data = RREG32(RLC_PG_CNTL);
6845         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6846                 data &= ~DISABLE_GDS_PG;
6847         else
6848                 data |= DISABLE_GDS_PG;
6849         if (orig != data)
6850                 WREG32(RLC_PG_CNTL, data);
6851 }
6852
6853 #define CP_ME_TABLE_SIZE    96
6854 #define CP_ME_TABLE_OFFSET  2048
6855 #define CP_MEC_TABLE_OFFSET 4096
6856
6857 void cik_init_cp_pg_table(struct radeon_device *rdev)
6858 {
6859         volatile u32 *dst_ptr;
6860         int me, i, max_me = 4;
6861         u32 bo_offset = 0;
6862         u32 table_offset, table_size;
6863
6864         if (rdev->family == CHIP_KAVERI)
6865                 max_me = 5;
6866
6867         if (rdev->rlc.cp_table_ptr == NULL)
6868                 return;
6869
6870         /* write the cp table buffer */
6871         dst_ptr = rdev->rlc.cp_table_ptr;
6872         for (me = 0; me < max_me; me++) {
6873                 if (rdev->new_fw) {
6874                         const __le32 *fw_data;
6875                         const struct gfx_firmware_header_v1_0 *hdr;
6876
6877                         if (me == 0) {
6878                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6879                                 fw_data = (const __le32 *)
6880                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6881                                 table_offset = le32_to_cpu(hdr->jt_offset);
6882                                 table_size = le32_to_cpu(hdr->jt_size);
6883                         } else if (me == 1) {
6884                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6885                                 fw_data = (const __le32 *)
6886                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6887                                 table_offset = le32_to_cpu(hdr->jt_offset);
6888                                 table_size = le32_to_cpu(hdr->jt_size);
6889                         } else if (me == 2) {
6890                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6891                                 fw_data = (const __le32 *)
6892                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6893                                 table_offset = le32_to_cpu(hdr->jt_offset);
6894                                 table_size = le32_to_cpu(hdr->jt_size);
6895                         } else if (me == 3) {
6896                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6897                                 fw_data = (const __le32 *)
6898                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6899                                 table_offset = le32_to_cpu(hdr->jt_offset);
6900                                 table_size = le32_to_cpu(hdr->jt_size);
6901                         } else {
6902                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6903                                 fw_data = (const __le32 *)
6904                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6905                                 table_offset = le32_to_cpu(hdr->jt_offset);
6906                                 table_size = le32_to_cpu(hdr->jt_size);
6907                         }
6908
6909                         for (i = 0; i < table_size; i ++) {
6910                                 dst_ptr[bo_offset + i] =
6911                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6912                         }
6913                         bo_offset += table_size;
6914                 } else {
6915                         const __be32 *fw_data;
6916                         table_size = CP_ME_TABLE_SIZE;
6917
6918                         if (me == 0) {
6919                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6920                                 table_offset = CP_ME_TABLE_OFFSET;
6921                         } else if (me == 1) {
6922                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6923                                 table_offset = CP_ME_TABLE_OFFSET;
6924                         } else if (me == 2) {
6925                                 fw_data = (const __be32 *)rdev->me_fw->data;
6926                                 table_offset = CP_ME_TABLE_OFFSET;
6927                         } else {
6928                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6929                                 table_offset = CP_MEC_TABLE_OFFSET;
6930                         }
6931
6932                         for (i = 0; i < table_size; i ++) {
6933                                 dst_ptr[bo_offset + i] =
6934                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6935                         }
6936                         bo_offset += table_size;
6937                 }
6938         }
6939 }
6940
6941 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6942                                 bool enable)
6943 {
6944         u32 data, orig;
6945
6946         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6947                 orig = data = RREG32(RLC_PG_CNTL);
6948                 data |= GFX_PG_ENABLE;
6949                 if (orig != data)
6950                         WREG32(RLC_PG_CNTL, data);
6951
6952                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6953                 data |= AUTO_PG_EN;
6954                 if (orig != data)
6955                         WREG32(RLC_AUTO_PG_CTRL, data);
6956         } else {
6957                 orig = data = RREG32(RLC_PG_CNTL);
6958                 data &= ~GFX_PG_ENABLE;
6959                 if (orig != data)
6960                         WREG32(RLC_PG_CNTL, data);
6961
6962                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6963                 data &= ~AUTO_PG_EN;
6964                 if (orig != data)
6965                         WREG32(RLC_AUTO_PG_CTRL, data);
6966
6967                 data = RREG32(DB_RENDER_CONTROL);
6968         }
6969 }
6970
6971 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6972 {
6973         u32 mask = 0, tmp, tmp1;
6974         int i;
6975
6976         mutex_lock(&rdev->grbm_idx_mutex);
6977         cik_select_se_sh(rdev, se, sh);
6978         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6979         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6980         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6981         mutex_unlock(&rdev->grbm_idx_mutex);
6982
6983         tmp &= 0xffff0000;
6984
6985         tmp |= tmp1;
6986         tmp >>= 16;
6987
6988         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6989                 mask <<= 1;
6990                 mask |= 1;
6991         }
6992
6993         return (~tmp) & mask;
6994 }
6995
6996 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6997 {
6998         u32 i, j, k, active_cu_number = 0;
6999         u32 mask, counter, cu_bitmap;
7000         u32 tmp = 0;
7001
7002         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
7003                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
7004                         mask = 1;
7005                         cu_bitmap = 0;
7006                         counter = 0;
7007                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
7008                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
7009                                         if (counter < 2)
7010                                                 cu_bitmap |= mask;
7011                                         counter ++;
7012                                 }
7013                                 mask <<= 1;
7014                         }
7015
7016                         active_cu_number += counter;
7017                         tmp |= (cu_bitmap << (i * 16 + j * 8));
7018                 }
7019         }
7020
7021         WREG32(RLC_PG_AO_CU_MASK, tmp);
7022
7023         tmp = RREG32(RLC_MAX_PG_CU);
7024         tmp &= ~MAX_PU_CU_MASK;
7025         tmp |= MAX_PU_CU(active_cu_number);
7026         WREG32(RLC_MAX_PG_CU, tmp);
7027 }
7028
7029 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7030                                        bool enable)
7031 {
7032         u32 data, orig;
7033
7034         orig = data = RREG32(RLC_PG_CNTL);
7035         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7036                 data |= STATIC_PER_CU_PG_ENABLE;
7037         else
7038                 data &= ~STATIC_PER_CU_PG_ENABLE;
7039         if (orig != data)
7040                 WREG32(RLC_PG_CNTL, data);
7041 }
7042
7043 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7044                                         bool enable)
7045 {
7046         u32 data, orig;
7047
7048         orig = data = RREG32(RLC_PG_CNTL);
7049         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7050                 data |= DYN_PER_CU_PG_ENABLE;
7051         else
7052                 data &= ~DYN_PER_CU_PG_ENABLE;
7053         if (orig != data)
7054                 WREG32(RLC_PG_CNTL, data);
7055 }
7056
7057 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7058 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7059
7060 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7061 {
7062         u32 data, orig;
7063         u32 i;
7064
7065         if (rdev->rlc.cs_data) {
7066                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7067                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7068                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7069                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7070         } else {
7071                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7072                 for (i = 0; i < 3; i++)
7073                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
7074         }
7075         if (rdev->rlc.reg_list) {
7076                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7077                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
7078                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7079         }
7080
7081         orig = data = RREG32(RLC_PG_CNTL);
7082         data |= GFX_PG_SRC;
7083         if (orig != data)
7084                 WREG32(RLC_PG_CNTL, data);
7085
7086         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7087         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7088
7089         data = RREG32(CP_RB_WPTR_POLL_CNTL);
7090         data &= ~IDLE_POLL_COUNT_MASK;
7091         data |= IDLE_POLL_COUNT(0x60);
7092         WREG32(CP_RB_WPTR_POLL_CNTL, data);
7093
7094         data = 0x10101010;
7095         WREG32(RLC_PG_DELAY, data);
7096
7097         data = RREG32(RLC_PG_DELAY_2);
7098         data &= ~0xff;
7099         data |= 0x3;
7100         WREG32(RLC_PG_DELAY_2, data);
7101
7102         data = RREG32(RLC_AUTO_PG_CTRL);
7103         data &= ~GRBM_REG_SGIT_MASK;
7104         data |= GRBM_REG_SGIT(0x700);
7105         WREG32(RLC_AUTO_PG_CTRL, data);
7106
7107 }
7108
7109 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7110 {
7111         cik_enable_gfx_cgpg(rdev, enable);
7112         cik_enable_gfx_static_mgpg(rdev, enable);
7113         cik_enable_gfx_dynamic_mgpg(rdev, enable);
7114 }
7115
7116 u32 cik_get_csb_size(struct radeon_device *rdev)
7117 {
7118         u32 count = 0;
7119         const struct cs_section_def *sect = NULL;
7120         const struct cs_extent_def *ext = NULL;
7121
7122         if (rdev->rlc.cs_data == NULL)
7123                 return 0;
7124
7125         /* begin clear state */
7126         count += 2;
7127         /* context control state */
7128         count += 3;
7129
7130         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7131                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7132                         if (sect->id == SECT_CONTEXT)
7133                                 count += 2 + ext->reg_count;
7134                         else
7135                                 return 0;
7136                 }
7137         }
7138         /* pa_sc_raster_config/pa_sc_raster_config1 */
7139         count += 4;
7140         /* end clear state */
7141         count += 2;
7142         /* clear state */
7143         count += 2;
7144
7145         return count;
7146 }
7147
7148 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7149 {
7150         u32 count = 0, i;
7151         const struct cs_section_def *sect = NULL;
7152         const struct cs_extent_def *ext = NULL;
7153
7154         if (rdev->rlc.cs_data == NULL)
7155                 return;
7156         if (buffer == NULL)
7157                 return;
7158
7159         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7160         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7161
7162         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7163         buffer[count++] = cpu_to_le32(0x80000000);
7164         buffer[count++] = cpu_to_le32(0x80000000);
7165
7166         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7167                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7168                         if (sect->id == SECT_CONTEXT) {
7169                                 buffer[count++] =
7170                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7171                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7172                                 for (i = 0; i < ext->reg_count; i++)
7173                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7174                         } else {
7175                                 return;
7176                         }
7177                 }
7178         }
7179
7180         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7181         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7182         switch (rdev->family) {
7183         case CHIP_BONAIRE:
7184                 buffer[count++] = cpu_to_le32(0x16000012);
7185                 buffer[count++] = cpu_to_le32(0x00000000);
7186                 break;
7187         case CHIP_KAVERI:
7188                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7189                 buffer[count++] = cpu_to_le32(0x00000000);
7190                 break;
7191         case CHIP_KABINI:
7192         case CHIP_MULLINS:
7193                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7194                 buffer[count++] = cpu_to_le32(0x00000000);
7195                 break;
7196         case CHIP_HAWAII:
7197                 buffer[count++] = cpu_to_le32(0x3a00161a);
7198                 buffer[count++] = cpu_to_le32(0x0000002e);
7199                 break;
7200         default:
7201                 buffer[count++] = cpu_to_le32(0x00000000);
7202                 buffer[count++] = cpu_to_le32(0x00000000);
7203                 break;
7204         }
7205
7206         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7207         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7208
7209         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7210         buffer[count++] = cpu_to_le32(0);
7211 }
7212
7213 static void cik_init_pg(struct radeon_device *rdev)
7214 {
7215         if (rdev->pg_flags) {
7216                 cik_enable_sck_slowdown_on_pu(rdev, true);
7217                 cik_enable_sck_slowdown_on_pd(rdev, true);
7218                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7219                         cik_init_gfx_cgpg(rdev);
7220                         cik_enable_cp_pg(rdev, true);
7221                         cik_enable_gds_pg(rdev, true);
7222                 }
7223                 cik_init_ao_cu_mask(rdev);
7224                 cik_update_gfx_pg(rdev, true);
7225         }
7226 }
7227
7228 static void cik_fini_pg(struct radeon_device *rdev)
7229 {
7230         if (rdev->pg_flags) {
7231                 cik_update_gfx_pg(rdev, false);
7232                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7233                         cik_enable_cp_pg(rdev, false);
7234                         cik_enable_gds_pg(rdev, false);
7235                 }
7236         }
7237 }
7238
7239 /*
7240  * Interrupts
7241  * Starting with r6xx, interrupts are handled via a ring buffer.
7242  * Ring buffers are areas of GPU accessible memory that the GPU
7243  * writes interrupt vectors into and the host reads vectors out of.
7244  * There is a rptr (read pointer) that determines where the
7245  * host is currently reading, and a wptr (write pointer)
7246  * which determines where the GPU has written.  When the
7247  * pointers are equal, the ring is idle.  When the GPU
7248  * writes vectors to the ring buffer, it increments the
7249  * wptr.  When there is an interrupt, the host then starts
7250  * fetching commands and processing them until the pointers are
7251  * equal again at which point it updates the rptr.
7252  */
7253
7254 /**
7255  * cik_enable_interrupts - Enable the interrupt ring buffer
7256  *
7257  * @rdev: radeon_device pointer
7258  *
7259  * Enable the interrupt ring buffer (CIK).
7260  */
7261 static void cik_enable_interrupts(struct radeon_device *rdev)
7262 {
7263         u32 ih_cntl = RREG32(IH_CNTL);
7264         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7265
7266         ih_cntl |= ENABLE_INTR;
7267         ih_rb_cntl |= IH_RB_ENABLE;
7268         WREG32(IH_CNTL, ih_cntl);
7269         WREG32(IH_RB_CNTL, ih_rb_cntl);
7270         rdev->ih.enabled = true;
7271 }
7272
7273 /**
7274  * cik_disable_interrupts - Disable the interrupt ring buffer
7275  *
7276  * @rdev: radeon_device pointer
7277  *
7278  * Disable the interrupt ring buffer (CIK).
7279  */
7280 static void cik_disable_interrupts(struct radeon_device *rdev)
7281 {
7282         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7283         u32 ih_cntl = RREG32(IH_CNTL);
7284
7285         ih_rb_cntl &= ~IH_RB_ENABLE;
7286         ih_cntl &= ~ENABLE_INTR;
7287         WREG32(IH_RB_CNTL, ih_rb_cntl);
7288         WREG32(IH_CNTL, ih_cntl);
7289         /* set rptr, wptr to 0 */
7290         WREG32(IH_RB_RPTR, 0);
7291         WREG32(IH_RB_WPTR, 0);
7292         rdev->ih.enabled = false;
7293         rdev->ih.rptr = 0;
7294 }
7295
7296 /**
7297  * cik_disable_interrupt_state - Disable all interrupt sources
7298  *
7299  * @rdev: radeon_device pointer
7300  *
7301  * Clear all interrupt enable bits used by the driver (CIK).
7302  */
7303 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7304 {
7305         u32 tmp;
7306
7307         /* gfx ring */
7308         tmp = RREG32(CP_INT_CNTL_RING0) &
7309                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7310         WREG32(CP_INT_CNTL_RING0, tmp);
7311         /* sdma */
7312         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7313         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7314         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7315         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7316         /* compute queues */
7317         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7318         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7319         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7320         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7321         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7322         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7323         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7324         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7325         /* grbm */
7326         WREG32(GRBM_INT_CNTL, 0);
7327         /* SRBM */
7328         WREG32(SRBM_INT_CNTL, 0);
7329         /* vline/vblank, etc. */
7330         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7331         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7332         if (rdev->num_crtc >= 4) {
7333                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7334                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7335         }
7336         if (rdev->num_crtc >= 6) {
7337                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7338                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7339         }
7340         /* pflip */
7341         if (rdev->num_crtc >= 2) {
7342                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7343                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7344         }
7345         if (rdev->num_crtc >= 4) {
7346                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7347                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7348         }
7349         if (rdev->num_crtc >= 6) {
7350                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7351                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7352         }
7353
7354         /* dac hotplug */
7355         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7356
7357         /* digital hotplug */
7358         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7359         WREG32(DC_HPD1_INT_CONTROL, tmp);
7360         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7361         WREG32(DC_HPD2_INT_CONTROL, tmp);
7362         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7363         WREG32(DC_HPD3_INT_CONTROL, tmp);
7364         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7365         WREG32(DC_HPD4_INT_CONTROL, tmp);
7366         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7367         WREG32(DC_HPD5_INT_CONTROL, tmp);
7368         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7369         WREG32(DC_HPD6_INT_CONTROL, tmp);
7370
7371 }
7372
7373 /**
7374  * cik_irq_init - init and enable the interrupt ring
7375  *
7376  * @rdev: radeon_device pointer
7377  *
7378  * Allocate a ring buffer for the interrupt controller,
7379  * enable the RLC, disable interrupts, enable the IH
7380  * ring buffer and enable it (CIK).
7381  * Called at device load and reume.
7382  * Returns 0 for success, errors for failure.
7383  */
7384 static int cik_irq_init(struct radeon_device *rdev)
7385 {
7386         int ret = 0;
7387         int rb_bufsz;
7388         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7389
7390         /* allocate ring */
7391         ret = r600_ih_ring_alloc(rdev);
7392         if (ret)
7393                 return ret;
7394
7395         /* disable irqs */
7396         cik_disable_interrupts(rdev);
7397
7398         /* init rlc */
7399         ret = cik_rlc_resume(rdev);
7400         if (ret) {
7401                 r600_ih_ring_fini(rdev);
7402                 return ret;
7403         }
7404
7405         /* setup interrupt control */
7406         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7407         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7408         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7409         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7410          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7411          */
7412         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7413         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7414         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7415         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7416
7417         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7418         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7419
7420         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7421                       IH_WPTR_OVERFLOW_CLEAR |
7422                       (rb_bufsz << 1));
7423
7424         if (rdev->wb.enabled)
7425                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7426
7427         /* set the writeback address whether it's enabled or not */
7428         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7429         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7430
7431         WREG32(IH_RB_CNTL, ih_rb_cntl);
7432
7433         /* set rptr, wptr to 0 */
7434         WREG32(IH_RB_RPTR, 0);
7435         WREG32(IH_RB_WPTR, 0);
7436
7437         /* Default settings for IH_CNTL (disabled at first) */
7438         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7439         /* RPTR_REARM only works if msi's are enabled */
7440         if (rdev->msi_enabled)
7441                 ih_cntl |= RPTR_REARM;
7442         WREG32(IH_CNTL, ih_cntl);
7443
7444         /* force the active interrupt state to all disabled */
7445         cik_disable_interrupt_state(rdev);
7446
7447         pci_set_master(rdev->pdev);
7448
7449         /* enable irqs */
7450         cik_enable_interrupts(rdev);
7451
7452         return ret;
7453 }
7454
7455 /**
7456  * cik_irq_set - enable/disable interrupt sources
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Enable interrupt sources on the GPU (vblanks, hpd,
7461  * etc.) (CIK).
7462  * Returns 0 for success, errors for failure.
7463  */
7464 int cik_irq_set(struct radeon_device *rdev)
7465 {
7466         u32 cp_int_cntl;
7467         u32 cp_m1p0;
7468         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7469         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7470         u32 grbm_int_cntl = 0;
7471         u32 dma_cntl, dma_cntl1;
7472
7473         if (!rdev->irq.installed) {
7474                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7475                 return -EINVAL;
7476         }
7477         /* don't enable anything if the ih is disabled */
7478         if (!rdev->ih.enabled) {
7479                 cik_disable_interrupts(rdev);
7480                 /* force the active interrupt state to all disabled */
7481                 cik_disable_interrupt_state(rdev);
7482                 return 0;
7483         }
7484
7485         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7486                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7487         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7488
7489         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7490         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7491         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7492         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7493         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7494         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7495
7496         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7497         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7498
7499         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7500
7501         /* enable CP interrupts on all rings */
7502         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7503                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7504                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7505         }
7506         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7507                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7508                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7509                 if (ring->me == 1) {
7510                         switch (ring->pipe) {
7511                         case 0:
7512                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7513                                 break;
7514                         default:
7515                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7516                                 break;
7517                         }
7518                 } else {
7519                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7520                 }
7521         }
7522         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7523                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7524                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7525                 if (ring->me == 1) {
7526                         switch (ring->pipe) {
7527                         case 0:
7528                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7529                                 break;
7530                         default:
7531                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7532                                 break;
7533                         }
7534                 } else {
7535                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7536                 }
7537         }
7538
7539         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7540                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7541                 dma_cntl |= TRAP_ENABLE;
7542         }
7543
7544         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7545                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7546                 dma_cntl1 |= TRAP_ENABLE;
7547         }
7548
7549         if (rdev->irq.crtc_vblank_int[0] ||
7550             atomic_read(&rdev->irq.pflip[0])) {
7551                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7552                 crtc1 |= VBLANK_INTERRUPT_MASK;
7553         }
7554         if (rdev->irq.crtc_vblank_int[1] ||
7555             atomic_read(&rdev->irq.pflip[1])) {
7556                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7557                 crtc2 |= VBLANK_INTERRUPT_MASK;
7558         }
7559         if (rdev->irq.crtc_vblank_int[2] ||
7560             atomic_read(&rdev->irq.pflip[2])) {
7561                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7562                 crtc3 |= VBLANK_INTERRUPT_MASK;
7563         }
7564         if (rdev->irq.crtc_vblank_int[3] ||
7565             atomic_read(&rdev->irq.pflip[3])) {
7566                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7567                 crtc4 |= VBLANK_INTERRUPT_MASK;
7568         }
7569         if (rdev->irq.crtc_vblank_int[4] ||
7570             atomic_read(&rdev->irq.pflip[4])) {
7571                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7572                 crtc5 |= VBLANK_INTERRUPT_MASK;
7573         }
7574         if (rdev->irq.crtc_vblank_int[5] ||
7575             atomic_read(&rdev->irq.pflip[5])) {
7576                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7577                 crtc6 |= VBLANK_INTERRUPT_MASK;
7578         }
7579         if (rdev->irq.hpd[0]) {
7580                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7581                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7582         }
7583         if (rdev->irq.hpd[1]) {
7584                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7585                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7586         }
7587         if (rdev->irq.hpd[2]) {
7588                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7589                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7590         }
7591         if (rdev->irq.hpd[3]) {
7592                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7593                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7594         }
7595         if (rdev->irq.hpd[4]) {
7596                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7597                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7598         }
7599         if (rdev->irq.hpd[5]) {
7600                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7601                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7602         }
7603
7604         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7605
7606         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7607         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7608
7609         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7610
7611         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7612
7613         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7614         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7615         if (rdev->num_crtc >= 4) {
7616                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7617                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7618         }
7619         if (rdev->num_crtc >= 6) {
7620                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7621                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7622         }
7623
7624         if (rdev->num_crtc >= 2) {
7625                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7626                        GRPH_PFLIP_INT_MASK);
7627                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7628                        GRPH_PFLIP_INT_MASK);
7629         }
7630         if (rdev->num_crtc >= 4) {
7631                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7632                        GRPH_PFLIP_INT_MASK);
7633                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7634                        GRPH_PFLIP_INT_MASK);
7635         }
7636         if (rdev->num_crtc >= 6) {
7637                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7638                        GRPH_PFLIP_INT_MASK);
7639                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7640                        GRPH_PFLIP_INT_MASK);
7641         }
7642
7643         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7644         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7645         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7646         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7647         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7648         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7649
7650         /* posting read */
7651         RREG32(SRBM_STATUS);
7652
7653         return 0;
7654 }
7655
7656 /**
7657  * cik_irq_ack - ack interrupt sources
7658  *
7659  * @rdev: radeon_device pointer
7660  *
7661  * Ack interrupt sources on the GPU (vblanks, hpd,
7662  * etc.) (CIK).  Certain interrupts sources are sw
7663  * generated and do not require an explicit ack.
7664  */
7665 static inline void cik_irq_ack(struct radeon_device *rdev)
7666 {
7667         u32 tmp;
7668
7669         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7670         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7671         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7672         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7673         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7674         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7675         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7676
7677         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7678                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7679         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7680                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7681         if (rdev->num_crtc >= 4) {
7682                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7683                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7684                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7685                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7686         }
7687         if (rdev->num_crtc >= 6) {
7688                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7689                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7690                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7691                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7692         }
7693
7694         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7695                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7696                        GRPH_PFLIP_INT_CLEAR);
7697         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7698                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7699                        GRPH_PFLIP_INT_CLEAR);
7700         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7701                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7702         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7703                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7704         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7705                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7706         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7707                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7708
7709         if (rdev->num_crtc >= 4) {
7710                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7711                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7712                                GRPH_PFLIP_INT_CLEAR);
7713                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7714                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7715                                GRPH_PFLIP_INT_CLEAR);
7716                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7717                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7718                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7719                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7720                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7721                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7722                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7723                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7724         }
7725
7726         if (rdev->num_crtc >= 6) {
7727                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7728                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7729                                GRPH_PFLIP_INT_CLEAR);
7730                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7731                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7732                                GRPH_PFLIP_INT_CLEAR);
7733                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7734                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7735                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7736                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7737                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7738                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7739                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7740                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7741         }
7742
7743         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7744                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7745                 tmp |= DC_HPDx_INT_ACK;
7746                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7747         }
7748         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7749                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7750                 tmp |= DC_HPDx_INT_ACK;
7751                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7752         }
7753         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7754                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7755                 tmp |= DC_HPDx_INT_ACK;
7756                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7757         }
7758         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7759                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7760                 tmp |= DC_HPDx_INT_ACK;
7761                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7762         }
7763         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7764                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7765                 tmp |= DC_HPDx_INT_ACK;
7766                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7767         }
7768         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7769                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7770                 tmp |= DC_HPDx_INT_ACK;
7771                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7772         }
7773         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7774                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7775                 tmp |= DC_HPDx_RX_INT_ACK;
7776                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7777         }
7778         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7779                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7780                 tmp |= DC_HPDx_RX_INT_ACK;
7781                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7782         }
7783         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7784                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7785                 tmp |= DC_HPDx_RX_INT_ACK;
7786                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7787         }
7788         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7789                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7790                 tmp |= DC_HPDx_RX_INT_ACK;
7791                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7792         }
7793         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7794                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7795                 tmp |= DC_HPDx_RX_INT_ACK;
7796                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7797         }
7798         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7799                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7800                 tmp |= DC_HPDx_RX_INT_ACK;
7801                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7802         }
7803 }
7804
7805 /**
7806  * cik_irq_disable - disable interrupts
7807  *
7808  * @rdev: radeon_device pointer
7809  *
7810  * Disable interrupts on the hw (CIK).
7811  */
7812 static void cik_irq_disable(struct radeon_device *rdev)
7813 {
7814         cik_disable_interrupts(rdev);
7815         /* Wait and acknowledge irq */
7816         mdelay(1);
7817         cik_irq_ack(rdev);
7818         cik_disable_interrupt_state(rdev);
7819 }
7820
7821 /**
7822  * cik_irq_disable - disable interrupts for suspend
7823  *
7824  * @rdev: radeon_device pointer
7825  *
7826  * Disable interrupts and stop the RLC (CIK).
7827  * Used for suspend.
7828  */
7829 static void cik_irq_suspend(struct radeon_device *rdev)
7830 {
7831         cik_irq_disable(rdev);
7832         cik_rlc_stop(rdev);
7833 }
7834
7835 /**
7836  * cik_irq_fini - tear down interrupt support
7837  *
7838  * @rdev: radeon_device pointer
7839  *
7840  * Disable interrupts on the hw and free the IH ring
7841  * buffer (CIK).
7842  * Used for driver unload.
7843  */
7844 static void cik_irq_fini(struct radeon_device *rdev)
7845 {
7846         cik_irq_suspend(rdev);
7847         r600_ih_ring_fini(rdev);
7848 }
7849
7850 /**
7851  * cik_get_ih_wptr - get the IH ring buffer wptr
7852  *
7853  * @rdev: radeon_device pointer
7854  *
7855  * Get the IH ring buffer wptr from either the register
7856  * or the writeback memory buffer (CIK).  Also check for
7857  * ring buffer overflow and deal with it.
7858  * Used by cik_irq_process().
7859  * Returns the value of the wptr.
7860  */
7861 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7862 {
7863         u32 wptr, tmp;
7864
7865         if (rdev->wb.enabled)
7866                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7867         else
7868                 wptr = RREG32(IH_RB_WPTR);
7869
7870         if (wptr & RB_OVERFLOW) {
7871                 wptr &= ~RB_OVERFLOW;
7872                 /* When a ring buffer overflow happen start parsing interrupt
7873                  * from the last not overwritten vector (wptr + 16). Hopefully
7874                  * this should allow us to catchup.
7875                  */
7876                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7877                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7878                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7879                 tmp = RREG32(IH_RB_CNTL);
7880                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7881                 WREG32(IH_RB_CNTL, tmp);
7882         }
7883         return (wptr & rdev->ih.ptr_mask);
7884 }
7885
7886 /*        CIK IV Ring
7887  * Each IV ring entry is 128 bits:
7888  * [7:0]    - interrupt source id
7889  * [31:8]   - reserved
7890  * [59:32]  - interrupt source data
7891  * [63:60]  - reserved
7892  * [71:64]  - RINGID
7893  *            CP:
7894  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7895  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7896  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7897  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7898  *            PIPE_ID - ME0 0=3D
7899  *                    - ME1&2 compute dispatcher (4 pipes each)
7900  *            SDMA:
7901  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7902  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7903  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7904  * [79:72]  - VMID
7905  * [95:80]  - PASID
7906  * [127:96] - reserved
7907  */
7908 /**
7909  * cik_irq_process - interrupt handler
7910  *
7911  * @rdev: radeon_device pointer
7912  *
7913  * Interrupt hander (CIK).  Walk the IH ring,
7914  * ack interrupts and schedule work to handle
7915  * interrupt events.
7916  * Returns irq process return code.
7917  */
7918 int cik_irq_process(struct radeon_device *rdev)
7919 {
7920         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7921         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7922         u32 wptr;
7923         u32 rptr;
7924         u32 src_id, src_data, ring_id;
7925         u8 me_id, pipe_id, queue_id;
7926         u32 ring_index;
7927         bool queue_hotplug = false;
7928         bool queue_dp = false;
7929         bool queue_reset = false;
7930         u32 addr, status, mc_client;
7931         bool queue_thermal = false;
7932
7933         if (!rdev->ih.enabled || rdev->shutdown)
7934                 return IRQ_NONE;
7935
7936         wptr = cik_get_ih_wptr(rdev);
7937
7938 restart_ih:
7939         /* is somebody else already processing irqs? */
7940         if (atomic_xchg(&rdev->ih.lock, 1))
7941                 return IRQ_NONE;
7942
7943         rptr = rdev->ih.rptr;
7944         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7945
7946         /* Order reading of wptr vs. reading of IH ring data */
7947         rmb();
7948
7949         /* display interrupts */
7950         cik_irq_ack(rdev);
7951
7952         while (rptr != wptr) {
7953                 /* wptr/rptr are in bytes! */
7954                 ring_index = rptr / 4;
7955
7956                 radeon_kfd_interrupt(rdev,
7957                                 (const void *) &rdev->ih.ring[ring_index]);
7958
7959                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7960                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7961                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7962
7963                 switch (src_id) {
7964                 case 1: /* D1 vblank/vline */
7965                         switch (src_data) {
7966                         case 0: /* D1 vblank */
7967                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7968                                         if (rdev->irq.crtc_vblank_int[0]) {
7969                                                 drm_handle_vblank(rdev->ddev, 0);
7970                                                 rdev->pm.vblank_sync = true;
7971                                                 wake_up(&rdev->irq.vblank_queue);
7972                                         }
7973                                         if (atomic_read(&rdev->irq.pflip[0]))
7974                                                 radeon_crtc_handle_vblank(rdev, 0);
7975                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7976                                         DRM_DEBUG("IH: D1 vblank\n");
7977                                 }
7978                                 break;
7979                         case 1: /* D1 vline */
7980                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7981                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7982                                         DRM_DEBUG("IH: D1 vline\n");
7983                                 }
7984                                 break;
7985                         default:
7986                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7987                                 break;
7988                         }
7989                         break;
7990                 case 2: /* D2 vblank/vline */
7991                         switch (src_data) {
7992                         case 0: /* D2 vblank */
7993                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7994                                         if (rdev->irq.crtc_vblank_int[1]) {
7995                                                 drm_handle_vblank(rdev->ddev, 1);
7996                                                 rdev->pm.vblank_sync = true;
7997                                                 wake_up(&rdev->irq.vblank_queue);
7998                                         }
7999                                         if (atomic_read(&rdev->irq.pflip[1]))
8000                                                 radeon_crtc_handle_vblank(rdev, 1);
8001                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
8002                                         DRM_DEBUG("IH: D2 vblank\n");
8003                                 }
8004                                 break;
8005                         case 1: /* D2 vline */
8006                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
8007                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
8008                                         DRM_DEBUG("IH: D2 vline\n");
8009                                 }
8010                                 break;
8011                         default:
8012                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8013                                 break;
8014                         }
8015                         break;
8016                 case 3: /* D3 vblank/vline */
8017                         switch (src_data) {
8018                         case 0: /* D3 vblank */
8019                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
8020                                         if (rdev->irq.crtc_vblank_int[2]) {
8021                                                 drm_handle_vblank(rdev->ddev, 2);
8022                                                 rdev->pm.vblank_sync = true;
8023                                                 wake_up(&rdev->irq.vblank_queue);
8024                                         }
8025                                         if (atomic_read(&rdev->irq.pflip[2]))
8026                                                 radeon_crtc_handle_vblank(rdev, 2);
8027                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8028                                         DRM_DEBUG("IH: D3 vblank\n");
8029                                 }
8030                                 break;
8031                         case 1: /* D3 vline */
8032                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
8033                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8034                                         DRM_DEBUG("IH: D3 vline\n");
8035                                 }
8036                                 break;
8037                         default:
8038                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8039                                 break;
8040                         }
8041                         break;
8042                 case 4: /* D4 vblank/vline */
8043                         switch (src_data) {
8044                         case 0: /* D4 vblank */
8045                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
8046                                         if (rdev->irq.crtc_vblank_int[3]) {
8047                                                 drm_handle_vblank(rdev->ddev, 3);
8048                                                 rdev->pm.vblank_sync = true;
8049                                                 wake_up(&rdev->irq.vblank_queue);
8050                                         }
8051                                         if (atomic_read(&rdev->irq.pflip[3]))
8052                                                 radeon_crtc_handle_vblank(rdev, 3);
8053                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8054                                         DRM_DEBUG("IH: D4 vblank\n");
8055                                 }
8056                                 break;
8057                         case 1: /* D4 vline */
8058                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
8059                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8060                                         DRM_DEBUG("IH: D4 vline\n");
8061                                 }
8062                                 break;
8063                         default:
8064                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8065                                 break;
8066                         }
8067                         break;
8068                 case 5: /* D5 vblank/vline */
8069                         switch (src_data) {
8070                         case 0: /* D5 vblank */
8071                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
8072                                         if (rdev->irq.crtc_vblank_int[4]) {
8073                                                 drm_handle_vblank(rdev->ddev, 4);
8074                                                 rdev->pm.vblank_sync = true;
8075                                                 wake_up(&rdev->irq.vblank_queue);
8076                                         }
8077                                         if (atomic_read(&rdev->irq.pflip[4]))
8078                                                 radeon_crtc_handle_vblank(rdev, 4);
8079                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8080                                         DRM_DEBUG("IH: D5 vblank\n");
8081                                 }
8082                                 break;
8083                         case 1: /* D5 vline */
8084                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
8085                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8086                                         DRM_DEBUG("IH: D5 vline\n");
8087                                 }
8088                                 break;
8089                         default:
8090                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8091                                 break;
8092                         }
8093                         break;
8094                 case 6: /* D6 vblank/vline */
8095                         switch (src_data) {
8096                         case 0: /* D6 vblank */
8097                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
8098                                         if (rdev->irq.crtc_vblank_int[5]) {
8099                                                 drm_handle_vblank(rdev->ddev, 5);
8100                                                 rdev->pm.vblank_sync = true;
8101                                                 wake_up(&rdev->irq.vblank_queue);
8102                                         }
8103                                         if (atomic_read(&rdev->irq.pflip[5]))
8104                                                 radeon_crtc_handle_vblank(rdev, 5);
8105                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8106                                         DRM_DEBUG("IH: D6 vblank\n");
8107                                 }
8108                                 break;
8109                         case 1: /* D6 vline */
8110                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
8111                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8112                                         DRM_DEBUG("IH: D6 vline\n");
8113                                 }
8114                                 break;
8115                         default:
8116                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8117                                 break;
8118                         }
8119                         break;
8120                 case 8: /* D1 page flip */
8121                 case 10: /* D2 page flip */
8122                 case 12: /* D3 page flip */
8123                 case 14: /* D4 page flip */
8124                 case 16: /* D5 page flip */
8125                 case 18: /* D6 page flip */
8126                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8127                         if (radeon_use_pflipirq > 0)
8128                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8129                         break;
8130                 case 42: /* HPD hotplug */
8131                         switch (src_data) {
8132                         case 0:
8133                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8134                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8135                                         queue_hotplug = true;
8136                                         DRM_DEBUG("IH: HPD1\n");
8137                                 }
8138                                 break;
8139                         case 1:
8140                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8141                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8142                                         queue_hotplug = true;
8143                                         DRM_DEBUG("IH: HPD2\n");
8144                                 }
8145                                 break;
8146                         case 2:
8147                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8148                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8149                                         queue_hotplug = true;
8150                                         DRM_DEBUG("IH: HPD3\n");
8151                                 }
8152                                 break;
8153                         case 3:
8154                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8155                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8156                                         queue_hotplug = true;
8157                                         DRM_DEBUG("IH: HPD4\n");
8158                                 }
8159                                 break;
8160                         case 4:
8161                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8162                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8163                                         queue_hotplug = true;
8164                                         DRM_DEBUG("IH: HPD5\n");
8165                                 }
8166                                 break;
8167                         case 5:
8168                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8169                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8170                                         queue_hotplug = true;
8171                                         DRM_DEBUG("IH: HPD6\n");
8172                                 }
8173                                 break;
8174                         case 6:
8175                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
8176                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8177                                         queue_dp = true;
8178                                         DRM_DEBUG("IH: HPD_RX 1\n");
8179                                 }
8180                                 break;
8181                         case 7:
8182                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
8183                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8184                                         queue_dp = true;
8185                                         DRM_DEBUG("IH: HPD_RX 2\n");
8186                                 }
8187                                 break;
8188                         case 8:
8189                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
8190                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8191                                         queue_dp = true;
8192                                         DRM_DEBUG("IH: HPD_RX 3\n");
8193                                 }
8194                                 break;
8195                         case 9:
8196                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
8197                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8198                                         queue_dp = true;
8199                                         DRM_DEBUG("IH: HPD_RX 4\n");
8200                                 }
8201                                 break;
8202                         case 10:
8203                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
8204                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8205                                         queue_dp = true;
8206                                         DRM_DEBUG("IH: HPD_RX 5\n");
8207                                 }
8208                                 break;
8209                         case 11:
8210                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
8211                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8212                                         queue_dp = true;
8213                                         DRM_DEBUG("IH: HPD_RX 6\n");
8214                                 }
8215                                 break;
8216                         default:
8217                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8218                                 break;
8219                         }
8220                         break;
8221                 case 96:
8222                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8223                         WREG32(SRBM_INT_ACK, 0x1);
8224                         break;
8225                 case 124: /* UVD */
8226                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8227                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8228                         break;
8229                 case 146:
8230                 case 147:
8231                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8232                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8233                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8234                         /* reset addr and status */
8235                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8236                         if (addr == 0x0 && status == 0x0)
8237                                 break;
8238                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8239                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8240                                 addr);
8241                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8242                                 status);
8243                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8244                         break;
8245                 case 167: /* VCE */
8246                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8247                         switch (src_data) {
8248                         case 0:
8249                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8250                                 break;
8251                         case 1:
8252                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8253                                 break;
8254                         default:
8255                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8256                                 break;
8257                         }
8258                         break;
8259                 case 176: /* GFX RB CP_INT */
8260                 case 177: /* GFX IB CP_INT */
8261                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8262                         break;
8263                 case 181: /* CP EOP event */
8264                         DRM_DEBUG("IH: CP EOP\n");
8265                         /* XXX check the bitfield order! */
8266                         me_id = (ring_id & 0x60) >> 5;
8267                         pipe_id = (ring_id & 0x18) >> 3;
8268                         queue_id = (ring_id & 0x7) >> 0;
8269                         switch (me_id) {
8270                         case 0:
8271                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8272                                 break;
8273                         case 1:
8274                         case 2:
8275                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8276                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8277                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8278                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8279                                 break;
8280                         }
8281                         break;
8282                 case 184: /* CP Privileged reg access */
8283                         DRM_ERROR("Illegal register access in command stream\n");
8284                         /* XXX check the bitfield order! */
8285                         me_id = (ring_id & 0x60) >> 5;
8286                         pipe_id = (ring_id & 0x18) >> 3;
8287                         queue_id = (ring_id & 0x7) >> 0;
8288                         switch (me_id) {
8289                         case 0:
8290                                 /* This results in a full GPU reset, but all we need to do is soft
8291                                  * reset the CP for gfx
8292                                  */
8293                                 queue_reset = true;
8294                                 break;
8295                         case 1:
8296                                 /* XXX compute */
8297                                 queue_reset = true;
8298                                 break;
8299                         case 2:
8300                                 /* XXX compute */
8301                                 queue_reset = true;
8302                                 break;
8303                         }
8304                         break;
8305                 case 185: /* CP Privileged inst */
8306                         DRM_ERROR("Illegal instruction in command stream\n");
8307                         /* XXX check the bitfield order! */
8308                         me_id = (ring_id & 0x60) >> 5;
8309                         pipe_id = (ring_id & 0x18) >> 3;
8310                         queue_id = (ring_id & 0x7) >> 0;
8311                         switch (me_id) {
8312                         case 0:
8313                                 /* This results in a full GPU reset, but all we need to do is soft
8314                                  * reset the CP for gfx
8315                                  */
8316                                 queue_reset = true;
8317                                 break;
8318                         case 1:
8319                                 /* XXX compute */
8320                                 queue_reset = true;
8321                                 break;
8322                         case 2:
8323                                 /* XXX compute */
8324                                 queue_reset = true;
8325                                 break;
8326                         }
8327                         break;
8328                 case 224: /* SDMA trap event */
8329                         /* XXX check the bitfield order! */
8330                         me_id = (ring_id & 0x3) >> 0;
8331                         queue_id = (ring_id & 0xc) >> 2;
8332                         DRM_DEBUG("IH: SDMA trap\n");
8333                         switch (me_id) {
8334                         case 0:
8335                                 switch (queue_id) {
8336                                 case 0:
8337                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8338                                         break;
8339                                 case 1:
8340                                         /* XXX compute */
8341                                         break;
8342                                 case 2:
8343                                         /* XXX compute */
8344                                         break;
8345                                 }
8346                                 break;
8347                         case 1:
8348                                 switch (queue_id) {
8349                                 case 0:
8350                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8351                                         break;
8352                                 case 1:
8353                                         /* XXX compute */
8354                                         break;
8355                                 case 2:
8356                                         /* XXX compute */
8357                                         break;
8358                                 }
8359                                 break;
8360                         }
8361                         break;
8362                 case 230: /* thermal low to high */
8363                         DRM_DEBUG("IH: thermal low to high\n");
8364                         rdev->pm.dpm.thermal.high_to_low = false;
8365                         queue_thermal = true;
8366                         break;
8367                 case 231: /* thermal high to low */
8368                         DRM_DEBUG("IH: thermal high to low\n");
8369                         rdev->pm.dpm.thermal.high_to_low = true;
8370                         queue_thermal = true;
8371                         break;
8372                 case 233: /* GUI IDLE */
8373                         DRM_DEBUG("IH: GUI idle\n");
8374                         break;
8375                 case 241: /* SDMA Privileged inst */
8376                 case 247: /* SDMA Privileged inst */
8377                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8378                         /* XXX check the bitfield order! */
8379                         me_id = (ring_id & 0x3) >> 0;
8380                         queue_id = (ring_id & 0xc) >> 2;
8381                         switch (me_id) {
8382                         case 0:
8383                                 switch (queue_id) {
8384                                 case 0:
8385                                         queue_reset = true;
8386                                         break;
8387                                 case 1:
8388                                         /* XXX compute */
8389                                         queue_reset = true;
8390                                         break;
8391                                 case 2:
8392                                         /* XXX compute */
8393                                         queue_reset = true;
8394                                         break;
8395                                 }
8396                                 break;
8397                         case 1:
8398                                 switch (queue_id) {
8399                                 case 0:
8400                                         queue_reset = true;
8401                                         break;
8402                                 case 1:
8403                                         /* XXX compute */
8404                                         queue_reset = true;
8405                                         break;
8406                                 case 2:
8407                                         /* XXX compute */
8408                                         queue_reset = true;
8409                                         break;
8410                                 }
8411                                 break;
8412                         }
8413                         break;
8414                 default:
8415                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8416                         break;
8417                 }
8418
8419                 /* wptr/rptr are in bytes! */
8420                 rptr += 16;
8421                 rptr &= rdev->ih.ptr_mask;
8422                 WREG32(IH_RB_RPTR, rptr);
8423         }
8424         if (queue_dp)
8425                 schedule_work(&rdev->dp_work);
8426         if (queue_hotplug)
8427                 schedule_work(&rdev->hotplug_work);
8428         if (queue_reset) {
8429                 rdev->needs_reset = true;
8430                 wake_up_all(&rdev->fence_queue);
8431         }
8432         if (queue_thermal)
8433                 schedule_work(&rdev->pm.dpm.thermal.work);
8434         rdev->ih.rptr = rptr;
8435         atomic_set(&rdev->ih.lock, 0);
8436
8437         /* make sure wptr hasn't changed while processing */
8438         wptr = cik_get_ih_wptr(rdev);
8439         if (wptr != rptr)
8440                 goto restart_ih;
8441
8442         return IRQ_HANDLED;
8443 }
8444
8445 /*
8446  * startup/shutdown callbacks
8447  */
8448 /**
8449  * cik_startup - program the asic to a functional state
8450  *
8451  * @rdev: radeon_device pointer
8452  *
8453  * Programs the asic to a functional state (CIK).
8454  * Called by cik_init() and cik_resume().
8455  * Returns 0 for success, error for failure.
8456  */
8457 static int cik_startup(struct radeon_device *rdev)
8458 {
8459         struct radeon_ring *ring;
8460         u32 nop;
8461         int r;
8462
8463         /* enable pcie gen2/3 link */
8464         cik_pcie_gen3_enable(rdev);
8465         /* enable aspm */
8466         cik_program_aspm(rdev);
8467
8468         /* scratch needs to be initialized before MC */
8469         r = r600_vram_scratch_init(rdev);
8470         if (r)
8471                 return r;
8472
8473         cik_mc_program(rdev);
8474
8475         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8476                 r = ci_mc_load_microcode(rdev);
8477                 if (r) {
8478                         DRM_ERROR("Failed to load MC firmware!\n");
8479                         return r;
8480                 }
8481         }
8482
8483         r = cik_pcie_gart_enable(rdev);
8484         if (r)
8485                 return r;
8486         cik_gpu_init(rdev);
8487
8488         /* allocate rlc buffers */
8489         if (rdev->flags & RADEON_IS_IGP) {
8490                 if (rdev->family == CHIP_KAVERI) {
8491                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8492                         rdev->rlc.reg_list_size =
8493                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8494                 } else {
8495                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8496                         rdev->rlc.reg_list_size =
8497                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8498                 }
8499         }
8500         rdev->rlc.cs_data = ci_cs_data;
8501         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8502         r = sumo_rlc_init(rdev);
8503         if (r) {
8504                 DRM_ERROR("Failed to init rlc BOs!\n");
8505                 return r;
8506         }
8507
8508         /* allocate wb buffer */
8509         r = radeon_wb_init(rdev);
8510         if (r)
8511                 return r;
8512
8513         /* allocate mec buffers */
8514         r = cik_mec_init(rdev);
8515         if (r) {
8516                 DRM_ERROR("Failed to init MEC BOs!\n");
8517                 return r;
8518         }
8519
8520         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8521         if (r) {
8522                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8523                 return r;
8524         }
8525
8526         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8527         if (r) {
8528                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8529                 return r;
8530         }
8531
8532         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8533         if (r) {
8534                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8535                 return r;
8536         }
8537
8538         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8539         if (r) {
8540                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8541                 return r;
8542         }
8543
8544         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8545         if (r) {
8546                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8547                 return r;
8548         }
8549
8550         r = radeon_uvd_resume(rdev);
8551         if (!r) {
8552                 r = uvd_v4_2_resume(rdev);
8553                 if (!r) {
8554                         r = radeon_fence_driver_start_ring(rdev,
8555                                                            R600_RING_TYPE_UVD_INDEX);
8556                         if (r)
8557                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8558                 }
8559         }
8560         if (r)
8561                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8562
8563         r = radeon_vce_resume(rdev);
8564         if (!r) {
8565                 r = vce_v2_0_resume(rdev);
8566                 if (!r)
8567                         r = radeon_fence_driver_start_ring(rdev,
8568                                                            TN_RING_TYPE_VCE1_INDEX);
8569                 if (!r)
8570                         r = radeon_fence_driver_start_ring(rdev,
8571                                                            TN_RING_TYPE_VCE2_INDEX);
8572         }
8573         if (r) {
8574                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8575                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8576                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8577         }
8578
8579         /* Enable IRQ */
8580         if (!rdev->irq.installed) {
8581                 r = radeon_irq_kms_init(rdev);
8582                 if (r)
8583                         return r;
8584         }
8585
8586         r = cik_irq_init(rdev);
8587         if (r) {
8588                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8589                 radeon_irq_kms_fini(rdev);
8590                 return r;
8591         }
8592         cik_irq_set(rdev);
8593
8594         if (rdev->family == CHIP_HAWAII) {
8595                 if (rdev->new_fw)
8596                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8597                 else
8598                         nop = RADEON_CP_PACKET2;
8599         } else {
8600                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8601         }
8602
8603         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8604         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8605                              nop);
8606         if (r)
8607                 return r;
8608
8609         /* set up the compute queues */
8610         /* type-2 packets are deprecated on MEC, use type-3 instead */
8611         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8612         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8613                              nop);
8614         if (r)
8615                 return r;
8616         ring->me = 1; /* first MEC */
8617         ring->pipe = 0; /* first pipe */
8618         ring->queue = 0; /* first queue */
8619         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8620
8621         /* type-2 packets are deprecated on MEC, use type-3 instead */
8622         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8623         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8624                              nop);
8625         if (r)
8626                 return r;
8627         /* dGPU only have 1 MEC */
8628         ring->me = 1; /* first MEC */
8629         ring->pipe = 0; /* first pipe */
8630         ring->queue = 1; /* second queue */
8631         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8632
8633         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8634         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8635                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8636         if (r)
8637                 return r;
8638
8639         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8640         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8641                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8642         if (r)
8643                 return r;
8644
8645         r = cik_cp_resume(rdev);
8646         if (r)
8647                 return r;
8648
8649         r = cik_sdma_resume(rdev);
8650         if (r)
8651                 return r;
8652
8653         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8654         if (ring->ring_size) {
8655                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8656                                      RADEON_CP_PACKET2);
8657                 if (!r)
8658                         r = uvd_v1_0_init(rdev);
8659                 if (r)
8660                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8661         }
8662
8663         r = -ENOENT;
8664
8665         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8666         if (ring->ring_size)
8667                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8668                                      VCE_CMD_NO_OP);
8669
8670         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8671         if (ring->ring_size)
8672                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8673                                      VCE_CMD_NO_OP);
8674
8675         if (!r)
8676                 r = vce_v1_0_init(rdev);
8677         else if (r != -ENOENT)
8678                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8679
8680         r = radeon_ib_pool_init(rdev);
8681         if (r) {
8682                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8683                 return r;
8684         }
8685
8686         r = radeon_vm_manager_init(rdev);
8687         if (r) {
8688                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8689                 return r;
8690         }
8691
8692         r = radeon_audio_init(rdev);
8693         if (r)
8694                 return r;
8695
8696         r = radeon_kfd_resume(rdev);
8697         if (r)
8698                 return r;
8699
8700         return 0;
8701 }
8702
8703 /**
8704  * cik_resume - resume the asic to a functional state
8705  *
8706  * @rdev: radeon_device pointer
8707  *
8708  * Programs the asic to a functional state (CIK).
8709  * Called at resume.
8710  * Returns 0 for success, error for failure.
8711  */
8712 int cik_resume(struct radeon_device *rdev)
8713 {
8714         int r;
8715
8716         /* post card */
8717         atom_asic_init(rdev->mode_info.atom_context);
8718
8719         /* init golden registers */
8720         cik_init_golden_registers(rdev);
8721
8722         if (rdev->pm.pm_method == PM_METHOD_DPM)
8723                 radeon_pm_resume(rdev);
8724
8725         rdev->accel_working = true;
8726         r = cik_startup(rdev);
8727         if (r) {
8728                 DRM_ERROR("cik startup failed on resume\n");
8729                 rdev->accel_working = false;
8730                 return r;
8731         }
8732
8733         return r;
8734
8735 }
8736
8737 /**
8738  * cik_suspend - suspend the asic
8739  *
8740  * @rdev: radeon_device pointer
8741  *
8742  * Bring the chip into a state suitable for suspend (CIK).
8743  * Called at suspend.
8744  * Returns 0 for success.
8745  */
8746 int cik_suspend(struct radeon_device *rdev)
8747 {
8748         radeon_kfd_suspend(rdev);
8749         radeon_pm_suspend(rdev);
8750         radeon_audio_fini(rdev);
8751         radeon_vm_manager_fini(rdev);
8752         cik_cp_enable(rdev, false);
8753         cik_sdma_enable(rdev, false);
8754         uvd_v1_0_fini(rdev);
8755         radeon_uvd_suspend(rdev);
8756         radeon_vce_suspend(rdev);
8757         cik_fini_pg(rdev);
8758         cik_fini_cg(rdev);
8759         cik_irq_suspend(rdev);
8760         radeon_wb_disable(rdev);
8761         cik_pcie_gart_disable(rdev);
8762         return 0;
8763 }
8764
8765 /* Plan is to move initialization in that function and use
8766  * helper function so that radeon_device_init pretty much
8767  * do nothing more than calling asic specific function. This
8768  * should also allow to remove a bunch of callback function
8769  * like vram_info.
8770  */
8771 /**
8772  * cik_init - asic specific driver and hw init
8773  *
8774  * @rdev: radeon_device pointer
8775  *
8776  * Setup asic specific driver variables and program the hw
8777  * to a functional state (CIK).
8778  * Called at driver startup.
8779  * Returns 0 for success, errors for failure.
8780  */
8781 int cik_init(struct radeon_device *rdev)
8782 {
8783         struct radeon_ring *ring;
8784         int r;
8785
8786         /* Read BIOS */
8787         if (!radeon_get_bios(rdev)) {
8788                 if (ASIC_IS_AVIVO(rdev))
8789                         return -EINVAL;
8790         }
8791         /* Must be an ATOMBIOS */
8792         if (!rdev->is_atom_bios) {
8793                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8794                 return -EINVAL;
8795         }
8796         r = radeon_atombios_init(rdev);
8797         if (r)
8798                 return r;
8799
8800         /* Post card if necessary */
8801         if (!radeon_card_posted(rdev)) {
8802                 if (!rdev->bios) {
8803                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8804                         return -EINVAL;
8805                 }
8806                 DRM_INFO("GPU not posted. posting now...\n");
8807                 atom_asic_init(rdev->mode_info.atom_context);
8808         }
8809         /* init golden registers */
8810         cik_init_golden_registers(rdev);
8811         /* Initialize scratch registers */
8812         cik_scratch_init(rdev);
8813         /* Initialize surface registers */
8814         radeon_surface_init(rdev);
8815         /* Initialize clocks */
8816         radeon_get_clock_info(rdev->ddev);
8817
8818         /* Fence driver */
8819         r = radeon_fence_driver_init(rdev);
8820         if (r)
8821                 return r;
8822
8823         /* initialize memory controller */
8824         r = cik_mc_init(rdev);
8825         if (r)
8826                 return r;
8827         /* Memory manager */
8828         r = radeon_bo_init(rdev);
8829         if (r)
8830                 return r;
8831
8832         if (rdev->flags & RADEON_IS_IGP) {
8833                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8834                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8835                         r = cik_init_microcode(rdev);
8836                         if (r) {
8837                                 DRM_ERROR("Failed to load firmware!\n");
8838                                 return r;
8839                         }
8840                 }
8841         } else {
8842                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8843                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8844                     !rdev->mc_fw) {
8845                         r = cik_init_microcode(rdev);
8846                         if (r) {
8847                                 DRM_ERROR("Failed to load firmware!\n");
8848                                 return r;
8849                         }
8850                 }
8851         }
8852
8853         /* Initialize power management */
8854         radeon_pm_init(rdev);
8855
8856         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8857         ring->ring_obj = NULL;
8858         r600_ring_init(rdev, ring, 1024 * 1024);
8859
8860         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8861         ring->ring_obj = NULL;
8862         r600_ring_init(rdev, ring, 1024 * 1024);
8863         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8864         if (r)
8865                 return r;
8866
8867         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8868         ring->ring_obj = NULL;
8869         r600_ring_init(rdev, ring, 1024 * 1024);
8870         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8871         if (r)
8872                 return r;
8873
8874         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8875         ring->ring_obj = NULL;
8876         r600_ring_init(rdev, ring, 256 * 1024);
8877
8878         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8879         ring->ring_obj = NULL;
8880         r600_ring_init(rdev, ring, 256 * 1024);
8881
8882         r = radeon_uvd_init(rdev);
8883         if (!r) {
8884                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8885                 ring->ring_obj = NULL;
8886                 r600_ring_init(rdev, ring, 4096);
8887         }
8888
8889         r = radeon_vce_init(rdev);
8890         if (!r) {
8891                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8892                 ring->ring_obj = NULL;
8893                 r600_ring_init(rdev, ring, 4096);
8894
8895                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8896                 ring->ring_obj = NULL;
8897                 r600_ring_init(rdev, ring, 4096);
8898         }
8899
8900         rdev->ih.ring_obj = NULL;
8901         r600_ih_ring_init(rdev, 64 * 1024);
8902
8903         r = r600_pcie_gart_init(rdev);
8904         if (r)
8905                 return r;
8906
8907         rdev->accel_working = true;
8908         r = cik_startup(rdev);
8909         if (r) {
8910                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8911                 cik_cp_fini(rdev);
8912                 cik_sdma_fini(rdev);
8913                 cik_irq_fini(rdev);
8914                 sumo_rlc_fini(rdev);
8915                 cik_mec_fini(rdev);
8916                 radeon_wb_fini(rdev);
8917                 radeon_ib_pool_fini(rdev);
8918                 radeon_vm_manager_fini(rdev);
8919                 radeon_irq_kms_fini(rdev);
8920                 cik_pcie_gart_fini(rdev);
8921                 rdev->accel_working = false;
8922         }
8923
8924         /* Don't start up if the MC ucode is missing.
8925          * The default clocks and voltages before the MC ucode
8926          * is loaded are not suffient for advanced operations.
8927          */
8928         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8929                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8930                 return -EINVAL;
8931         }
8932
8933         return 0;
8934 }
8935
8936 /**
8937  * cik_fini - asic specific driver and hw fini
8938  *
8939  * @rdev: radeon_device pointer
8940  *
8941  * Tear down the asic specific driver variables and program the hw
8942  * to an idle state (CIK).
8943  * Called at driver unload.
8944  */
8945 void cik_fini(struct radeon_device *rdev)
8946 {
8947         radeon_pm_fini(rdev);
8948         cik_cp_fini(rdev);
8949         cik_sdma_fini(rdev);
8950         cik_fini_pg(rdev);
8951         cik_fini_cg(rdev);
8952         cik_irq_fini(rdev);
8953         sumo_rlc_fini(rdev);
8954         cik_mec_fini(rdev);
8955         radeon_wb_fini(rdev);
8956         radeon_vm_manager_fini(rdev);
8957         radeon_ib_pool_fini(rdev);
8958         radeon_irq_kms_fini(rdev);
8959         uvd_v1_0_fini(rdev);
8960         radeon_uvd_fini(rdev);
8961         radeon_vce_fini(rdev);
8962         cik_pcie_gart_fini(rdev);
8963         r600_vram_scratch_fini(rdev);
8964         radeon_gem_fini(rdev);
8965         radeon_fence_driver_fini(rdev);
8966         radeon_bo_fini(rdev);
8967         radeon_atombios_fini(rdev);
8968         kfree(rdev->bios);
8969         rdev->bios = NULL;
8970 }
8971
8972 void dce8_program_fmt(struct drm_encoder *encoder)
8973 {
8974         struct drm_device *dev = encoder->dev;
8975         struct radeon_device *rdev = dev->dev_private;
8976         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8977         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8978         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8979         int bpc = 0;
8980         u32 tmp = 0;
8981         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8982
8983         if (connector) {
8984                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8985                 bpc = radeon_get_monitor_bpc(connector);
8986                 dither = radeon_connector->dither;
8987         }
8988
8989         /* LVDS/eDP FMT is set up by atom */
8990         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8991                 return;
8992
8993         /* not needed for analog */
8994         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8995             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8996                 return;
8997
8998         if (bpc == 0)
8999                 return;
9000
9001         switch (bpc) {
9002         case 6:
9003                 if (dither == RADEON_FMT_DITHER_ENABLE)
9004                         /* XXX sort out optimal dither settings */
9005                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9006                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9007                 else
9008                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9009                 break;
9010         case 8:
9011                 if (dither == RADEON_FMT_DITHER_ENABLE)
9012                         /* XXX sort out optimal dither settings */
9013                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9014                                 FMT_RGB_RANDOM_ENABLE |
9015                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9016                 else
9017                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9018                 break;
9019         case 10:
9020                 if (dither == RADEON_FMT_DITHER_ENABLE)
9021                         /* XXX sort out optimal dither settings */
9022                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9023                                 FMT_RGB_RANDOM_ENABLE |
9024                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9025                 else
9026                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9027                 break;
9028         default:
9029                 /* not needed */
9030                 break;
9031         }
9032
9033         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9034 }
9035
9036 /* display watermark setup */
9037 /**
9038  * dce8_line_buffer_adjust - Set up the line buffer
9039  *
9040  * @rdev: radeon_device pointer
9041  * @radeon_crtc: the selected display controller
9042  * @mode: the current display mode on the selected display
9043  * controller
9044  *
9045  * Setup up the line buffer allocation for
9046  * the selected display controller (CIK).
9047  * Returns the line buffer size in pixels.
9048  */
9049 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9050                                    struct radeon_crtc *radeon_crtc,
9051                                    struct drm_display_mode *mode)
9052 {
9053         u32 tmp, buffer_alloc, i;
9054         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9055         /*
9056          * Line Buffer Setup
9057          * There are 6 line buffers, one for each display controllers.
9058          * There are 3 partitions per LB. Select the number of partitions
9059          * to enable based on the display width.  For display widths larger
9060          * than 4096, you need use to use 2 display controllers and combine
9061          * them using the stereo blender.
9062          */
9063         if (radeon_crtc->base.enabled && mode) {
9064                 if (mode->crtc_hdisplay < 1920) {
9065                         tmp = 1;
9066                         buffer_alloc = 2;
9067                 } else if (mode->crtc_hdisplay < 2560) {
9068                         tmp = 2;
9069                         buffer_alloc = 2;
9070                 } else if (mode->crtc_hdisplay < 4096) {
9071                         tmp = 0;
9072                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9073                 } else {
9074                         DRM_DEBUG_KMS("Mode too big for LB!\n");
9075                         tmp = 0;
9076                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9077                 }
9078         } else {
9079                 tmp = 1;
9080                 buffer_alloc = 0;
9081         }
9082
9083         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9084                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9085
9086         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9087                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9088         for (i = 0; i < rdev->usec_timeout; i++) {
9089                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9090                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
9091                         break;
9092                 udelay(1);
9093         }
9094
9095         if (radeon_crtc->base.enabled && mode) {
9096                 switch (tmp) {
9097                 case 0:
9098                 default:
9099                         return 4096 * 2;
9100                 case 1:
9101                         return 1920 * 2;
9102                 case 2:
9103                         return 2560 * 2;
9104                 }
9105         }
9106
9107         /* controller not enabled, so no lb used */
9108         return 0;
9109 }
9110
9111 /**
9112  * cik_get_number_of_dram_channels - get the number of dram channels
9113  *
9114  * @rdev: radeon_device pointer
9115  *
9116  * Look up the number of video ram channels (CIK).
9117  * Used for display watermark bandwidth calculations
9118  * Returns the number of dram channels
9119  */
9120 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9121 {
9122         u32 tmp = RREG32(MC_SHARED_CHMAP);
9123
9124         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9125         case 0:
9126         default:
9127                 return 1;
9128         case 1:
9129                 return 2;
9130         case 2:
9131                 return 4;
9132         case 3:
9133                 return 8;
9134         case 4:
9135                 return 3;
9136         case 5:
9137                 return 6;
9138         case 6:
9139                 return 10;
9140         case 7:
9141                 return 12;
9142         case 8:
9143                 return 16;
9144         }
9145 }
9146
9147 struct dce8_wm_params {
9148         u32 dram_channels; /* number of dram channels */
9149         u32 yclk;          /* bandwidth per dram data pin in kHz */
9150         u32 sclk;          /* engine clock in kHz */
9151         u32 disp_clk;      /* display clock in kHz */
9152         u32 src_width;     /* viewport width */
9153         u32 active_time;   /* active display time in ns */
9154         u32 blank_time;    /* blank time in ns */
9155         bool interlaced;    /* mode is interlaced */
9156         fixed20_12 vsc;    /* vertical scale ratio */
9157         u32 num_heads;     /* number of active crtcs */
9158         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9159         u32 lb_size;       /* line buffer allocated to pipe */
9160         u32 vtaps;         /* vertical scaler taps */
9161 };
9162
9163 /**
9164  * dce8_dram_bandwidth - get the dram bandwidth
9165  *
9166  * @wm: watermark calculation data
9167  *
9168  * Calculate the raw dram bandwidth (CIK).
9169  * Used for display watermark bandwidth calculations
9170  * Returns the dram bandwidth in MBytes/s
9171  */
9172 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9173 {
9174         /* Calculate raw DRAM Bandwidth */
9175         fixed20_12 dram_efficiency; /* 0.7 */
9176         fixed20_12 yclk, dram_channels, bandwidth;
9177         fixed20_12 a;
9178
9179         a.full = dfixed_const(1000);
9180         yclk.full = dfixed_const(wm->yclk);
9181         yclk.full = dfixed_div(yclk, a);
9182         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9183         a.full = dfixed_const(10);
9184         dram_efficiency.full = dfixed_const(7);
9185         dram_efficiency.full = dfixed_div(dram_efficiency, a);
9186         bandwidth.full = dfixed_mul(dram_channels, yclk);
9187         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9188
9189         return dfixed_trunc(bandwidth);
9190 }
9191
9192 /**
9193  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9194  *
9195  * @wm: watermark calculation data
9196  *
9197  * Calculate the dram bandwidth used for display (CIK).
9198  * Used for display watermark bandwidth calculations
9199  * Returns the dram bandwidth for display in MBytes/s
9200  */
9201 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9202 {
9203         /* Calculate DRAM Bandwidth and the part allocated to display. */
9204         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9205         fixed20_12 yclk, dram_channels, bandwidth;
9206         fixed20_12 a;
9207
9208         a.full = dfixed_const(1000);
9209         yclk.full = dfixed_const(wm->yclk);
9210         yclk.full = dfixed_div(yclk, a);
9211         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9212         a.full = dfixed_const(10);
9213         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9214         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9215         bandwidth.full = dfixed_mul(dram_channels, yclk);
9216         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9217
9218         return dfixed_trunc(bandwidth);
9219 }
9220
9221 /**
9222  * dce8_data_return_bandwidth - get the data return bandwidth
9223  *
9224  * @wm: watermark calculation data
9225  *
9226  * Calculate the data return bandwidth used for display (CIK).
9227  * Used for display watermark bandwidth calculations
9228  * Returns the data return bandwidth in MBytes/s
9229  */
9230 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9231 {
9232         /* Calculate the display Data return Bandwidth */
9233         fixed20_12 return_efficiency; /* 0.8 */
9234         fixed20_12 sclk, bandwidth;
9235         fixed20_12 a;
9236
9237         a.full = dfixed_const(1000);
9238         sclk.full = dfixed_const(wm->sclk);
9239         sclk.full = dfixed_div(sclk, a);
9240         a.full = dfixed_const(10);
9241         return_efficiency.full = dfixed_const(8);
9242         return_efficiency.full = dfixed_div(return_efficiency, a);
9243         a.full = dfixed_const(32);
9244         bandwidth.full = dfixed_mul(a, sclk);
9245         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9246
9247         return dfixed_trunc(bandwidth);
9248 }
9249
9250 /**
9251  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9252  *
9253  * @wm: watermark calculation data
9254  *
9255  * Calculate the dmif bandwidth used for display (CIK).
9256  * Used for display watermark bandwidth calculations
9257  * Returns the dmif bandwidth in MBytes/s
9258  */
9259 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9260 {
9261         /* Calculate the DMIF Request Bandwidth */
9262         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9263         fixed20_12 disp_clk, bandwidth;
9264         fixed20_12 a, b;
9265
9266         a.full = dfixed_const(1000);
9267         disp_clk.full = dfixed_const(wm->disp_clk);
9268         disp_clk.full = dfixed_div(disp_clk, a);
9269         a.full = dfixed_const(32);
9270         b.full = dfixed_mul(a, disp_clk);
9271
9272         a.full = dfixed_const(10);
9273         disp_clk_request_efficiency.full = dfixed_const(8);
9274         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9275
9276         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9277
9278         return dfixed_trunc(bandwidth);
9279 }
9280
9281 /**
9282  * dce8_available_bandwidth - get the min available bandwidth
9283  *
9284  * @wm: watermark calculation data
9285  *
9286  * Calculate the min available bandwidth used for display (CIK).
9287  * Used for display watermark bandwidth calculations
9288  * Returns the min available bandwidth in MBytes/s
9289  */
9290 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9291 {
9292         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9293         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9294         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9295         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9296
9297         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9298 }
9299
9300 /**
9301  * dce8_average_bandwidth - get the average available bandwidth
9302  *
9303  * @wm: watermark calculation data
9304  *
9305  * Calculate the average available bandwidth used for display (CIK).
9306  * Used for display watermark bandwidth calculations
9307  * Returns the average available bandwidth in MBytes/s
9308  */
9309 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9310 {
9311         /* Calculate the display mode Average Bandwidth
9312          * DisplayMode should contain the source and destination dimensions,
9313          * timing, etc.
9314          */
9315         fixed20_12 bpp;
9316         fixed20_12 line_time;
9317         fixed20_12 src_width;
9318         fixed20_12 bandwidth;
9319         fixed20_12 a;
9320
9321         a.full = dfixed_const(1000);
9322         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9323         line_time.full = dfixed_div(line_time, a);
9324         bpp.full = dfixed_const(wm->bytes_per_pixel);
9325         src_width.full = dfixed_const(wm->src_width);
9326         bandwidth.full = dfixed_mul(src_width, bpp);
9327         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9328         bandwidth.full = dfixed_div(bandwidth, line_time);
9329
9330         return dfixed_trunc(bandwidth);
9331 }
9332
9333 /**
9334  * dce8_latency_watermark - get the latency watermark
9335  *
9336  * @wm: watermark calculation data
9337  *
9338  * Calculate the latency watermark (CIK).
9339  * Used for display watermark bandwidth calculations
9340  * Returns the latency watermark in ns
9341  */
9342 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9343 {
9344         /* First calculate the latency in ns */
9345         u32 mc_latency = 2000; /* 2000 ns. */
9346         u32 available_bandwidth = dce8_available_bandwidth(wm);
9347         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9348         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9349         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9350         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9351                 (wm->num_heads * cursor_line_pair_return_time);
9352         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9353         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9354         u32 tmp, dmif_size = 12288;
9355         fixed20_12 a, b, c;
9356
9357         if (wm->num_heads == 0)
9358                 return 0;
9359
9360         a.full = dfixed_const(2);
9361         b.full = dfixed_const(1);
9362         if ((wm->vsc.full > a.full) ||
9363             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9364             (wm->vtaps >= 5) ||
9365             ((wm->vsc.full >= a.full) && wm->interlaced))
9366                 max_src_lines_per_dst_line = 4;
9367         else
9368                 max_src_lines_per_dst_line = 2;
9369
9370         a.full = dfixed_const(available_bandwidth);
9371         b.full = dfixed_const(wm->num_heads);
9372         a.full = dfixed_div(a, b);
9373
9374         b.full = dfixed_const(mc_latency + 512);
9375         c.full = dfixed_const(wm->disp_clk);
9376         b.full = dfixed_div(b, c);
9377
9378         c.full = dfixed_const(dmif_size);
9379         b.full = dfixed_div(c, b);
9380
9381         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9382
9383         b.full = dfixed_const(1000);
9384         c.full = dfixed_const(wm->disp_clk);
9385         b.full = dfixed_div(c, b);
9386         c.full = dfixed_const(wm->bytes_per_pixel);
9387         b.full = dfixed_mul(b, c);
9388
9389         lb_fill_bw = min(tmp, dfixed_trunc(b));
9390
9391         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9392         b.full = dfixed_const(1000);
9393         c.full = dfixed_const(lb_fill_bw);
9394         b.full = dfixed_div(c, b);
9395         a.full = dfixed_div(a, b);
9396         line_fill_time = dfixed_trunc(a);
9397
9398         if (line_fill_time < wm->active_time)
9399                 return latency;
9400         else
9401                 return latency + (line_fill_time - wm->active_time);
9402
9403 }
9404
9405 /**
9406  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9407  * average and available dram bandwidth
9408  *
9409  * @wm: watermark calculation data
9410  *
9411  * Check if the display average bandwidth fits in the display
9412  * dram bandwidth (CIK).
9413  * Used for display watermark bandwidth calculations
9414  * Returns true if the display fits, false if not.
9415  */
9416 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9417 {
9418         if (dce8_average_bandwidth(wm) <=
9419             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9420                 return true;
9421         else
9422                 return false;
9423 }
9424
9425 /**
9426  * dce8_average_bandwidth_vs_available_bandwidth - check
9427  * average and available bandwidth
9428  *
9429  * @wm: watermark calculation data
9430  *
9431  * Check if the display average bandwidth fits in the display
9432  * available bandwidth (CIK).
9433  * Used for display watermark bandwidth calculations
9434  * Returns true if the display fits, false if not.
9435  */
9436 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9437 {
9438         if (dce8_average_bandwidth(wm) <=
9439             (dce8_available_bandwidth(wm) / wm->num_heads))
9440                 return true;
9441         else
9442                 return false;
9443 }
9444
9445 /**
9446  * dce8_check_latency_hiding - check latency hiding
9447  *
9448  * @wm: watermark calculation data
9449  *
9450  * Check latency hiding (CIK).
9451  * Used for display watermark bandwidth calculations
9452  * Returns true if the display fits, false if not.
9453  */
9454 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9455 {
9456         u32 lb_partitions = wm->lb_size / wm->src_width;
9457         u32 line_time = wm->active_time + wm->blank_time;
9458         u32 latency_tolerant_lines;
9459         u32 latency_hiding;
9460         fixed20_12 a;
9461
9462         a.full = dfixed_const(1);
9463         if (wm->vsc.full > a.full)
9464                 latency_tolerant_lines = 1;
9465         else {
9466                 if (lb_partitions <= (wm->vtaps + 1))
9467                         latency_tolerant_lines = 1;
9468                 else
9469                         latency_tolerant_lines = 2;
9470         }
9471
9472         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9473
9474         if (dce8_latency_watermark(wm) <= latency_hiding)
9475                 return true;
9476         else
9477                 return false;
9478 }
9479
9480 /**
9481  * dce8_program_watermarks - program display watermarks
9482  *
9483  * @rdev: radeon_device pointer
9484  * @radeon_crtc: the selected display controller
9485  * @lb_size: line buffer size
9486  * @num_heads: number of display controllers in use
9487  *
9488  * Calculate and program the display watermarks for the
9489  * selected display controller (CIK).
9490  */
9491 static void dce8_program_watermarks(struct radeon_device *rdev,
9492                                     struct radeon_crtc *radeon_crtc,
9493                                     u32 lb_size, u32 num_heads)
9494 {
9495         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9496         struct dce8_wm_params wm_low, wm_high;
9497         u32 pixel_period;
9498         u32 line_time = 0;
9499         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9500         u32 tmp, wm_mask;
9501
9502         if (radeon_crtc->base.enabled && num_heads && mode) {
9503                 pixel_period = 1000000 / (u32)mode->clock;
9504                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9505
9506                 /* watermark for high clocks */
9507                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9508                     rdev->pm.dpm_enabled) {
9509                         wm_high.yclk =
9510                                 radeon_dpm_get_mclk(rdev, false) * 10;
9511                         wm_high.sclk =
9512                                 radeon_dpm_get_sclk(rdev, false) * 10;
9513                 } else {
9514                         wm_high.yclk = rdev->pm.current_mclk * 10;
9515                         wm_high.sclk = rdev->pm.current_sclk * 10;
9516                 }
9517
9518                 wm_high.disp_clk = mode->clock;
9519                 wm_high.src_width = mode->crtc_hdisplay;
9520                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9521                 wm_high.blank_time = line_time - wm_high.active_time;
9522                 wm_high.interlaced = false;
9523                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9524                         wm_high.interlaced = true;
9525                 wm_high.vsc = radeon_crtc->vsc;
9526                 wm_high.vtaps = 1;
9527                 if (radeon_crtc->rmx_type != RMX_OFF)
9528                         wm_high.vtaps = 2;
9529                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9530                 wm_high.lb_size = lb_size;
9531                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9532                 wm_high.num_heads = num_heads;
9533
9534                 /* set for high clocks */
9535                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9536
9537                 /* possibly force display priority to high */
9538                 /* should really do this at mode validation time... */
9539                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9540                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9541                     !dce8_check_latency_hiding(&wm_high) ||
9542                     (rdev->disp_priority == 2)) {
9543                         DRM_DEBUG_KMS("force priority to high\n");
9544                 }
9545
9546                 /* watermark for low clocks */
9547                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9548                     rdev->pm.dpm_enabled) {
9549                         wm_low.yclk =
9550                                 radeon_dpm_get_mclk(rdev, true) * 10;
9551                         wm_low.sclk =
9552                                 radeon_dpm_get_sclk(rdev, true) * 10;
9553                 } else {
9554                         wm_low.yclk = rdev->pm.current_mclk * 10;
9555                         wm_low.sclk = rdev->pm.current_sclk * 10;
9556                 }
9557
9558                 wm_low.disp_clk = mode->clock;
9559                 wm_low.src_width = mode->crtc_hdisplay;
9560                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9561                 wm_low.blank_time = line_time - wm_low.active_time;
9562                 wm_low.interlaced = false;
9563                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9564                         wm_low.interlaced = true;
9565                 wm_low.vsc = radeon_crtc->vsc;
9566                 wm_low.vtaps = 1;
9567                 if (radeon_crtc->rmx_type != RMX_OFF)
9568                         wm_low.vtaps = 2;
9569                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9570                 wm_low.lb_size = lb_size;
9571                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9572                 wm_low.num_heads = num_heads;
9573
9574                 /* set for low clocks */
9575                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9576
9577                 /* possibly force display priority to high */
9578                 /* should really do this at mode validation time... */
9579                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9580                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9581                     !dce8_check_latency_hiding(&wm_low) ||
9582                     (rdev->disp_priority == 2)) {
9583                         DRM_DEBUG_KMS("force priority to high\n");
9584                 }
9585         }
9586
9587         /* select wm A */
9588         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9589         tmp = wm_mask;
9590         tmp &= ~LATENCY_WATERMARK_MASK(3);
9591         tmp |= LATENCY_WATERMARK_MASK(1);
9592         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9593         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9594                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9595                 LATENCY_HIGH_WATERMARK(line_time)));
9596         /* select wm B */
9597         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9598         tmp &= ~LATENCY_WATERMARK_MASK(3);
9599         tmp |= LATENCY_WATERMARK_MASK(2);
9600         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9601         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9602                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9603                 LATENCY_HIGH_WATERMARK(line_time)));
9604         /* restore original selection */
9605         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9606
9607         /* save values for DPM */
9608         radeon_crtc->line_time = line_time;
9609         radeon_crtc->wm_high = latency_watermark_a;
9610         radeon_crtc->wm_low = latency_watermark_b;
9611 }
9612
9613 /**
9614  * dce8_bandwidth_update - program display watermarks
9615  *
9616  * @rdev: radeon_device pointer
9617  *
9618  * Calculate and program the display watermarks and line
9619  * buffer allocation (CIK).
9620  */
9621 void dce8_bandwidth_update(struct radeon_device *rdev)
9622 {
9623         struct drm_display_mode *mode = NULL;
9624         u32 num_heads = 0, lb_size;
9625         int i;
9626
9627         if (!rdev->mode_info.mode_config_initialized)
9628                 return;
9629
9630         radeon_update_display_priority(rdev);
9631
9632         for (i = 0; i < rdev->num_crtc; i++) {
9633                 if (rdev->mode_info.crtcs[i]->base.enabled)
9634                         num_heads++;
9635         }
9636         for (i = 0; i < rdev->num_crtc; i++) {
9637                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9638                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9639                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9640         }
9641 }
9642
9643 /**
9644  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9645  *
9646  * @rdev: radeon_device pointer
9647  *
9648  * Fetches a GPU clock counter snapshot (SI).
9649  * Returns the 64 bit clock counter snapshot.
9650  */
9651 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9652 {
9653         uint64_t clock;
9654
9655         mutex_lock(&rdev->gpu_clock_mutex);
9656         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9657         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9658                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9659         mutex_unlock(&rdev->gpu_clock_mutex);
9660         return clock;
9661 }
9662
9663 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9664                               u32 cntl_reg, u32 status_reg)
9665 {
9666         int r, i;
9667         struct atom_clock_dividers dividers;
9668         uint32_t tmp;
9669
9670         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9671                                            clock, false, &dividers);
9672         if (r)
9673                 return r;
9674
9675         tmp = RREG32_SMC(cntl_reg);
9676         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9677         tmp |= dividers.post_divider;
9678         WREG32_SMC(cntl_reg, tmp);
9679
9680         for (i = 0; i < 100; i++) {
9681                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9682                         break;
9683                 mdelay(10);
9684         }
9685         if (i == 100)
9686                 return -ETIMEDOUT;
9687
9688         return 0;
9689 }
9690
9691 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9692 {
9693         int r = 0;
9694
9695         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9696         if (r)
9697                 return r;
9698
9699         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9700         return r;
9701 }
9702
9703 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9704 {
9705         int r, i;
9706         struct atom_clock_dividers dividers;
9707         u32 tmp;
9708
9709         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9710                                            ecclk, false, &dividers);
9711         if (r)
9712                 return r;
9713
9714         for (i = 0; i < 100; i++) {
9715                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9716                         break;
9717                 mdelay(10);
9718         }
9719         if (i == 100)
9720                 return -ETIMEDOUT;
9721
9722         tmp = RREG32_SMC(CG_ECLK_CNTL);
9723         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9724         tmp |= dividers.post_divider;
9725         WREG32_SMC(CG_ECLK_CNTL, tmp);
9726
9727         for (i = 0; i < 100; i++) {
9728                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9729                         break;
9730                 mdelay(10);
9731         }
9732         if (i == 100)
9733                 return -ETIMEDOUT;
9734
9735         return 0;
9736 }
9737
9738 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9739 {
9740         struct pci_dev *root = rdev->pdev->bus->self;
9741         int bridge_pos, gpu_pos;
9742         u32 speed_cntl, mask, current_data_rate;
9743         int ret, i;
9744         u16 tmp16;
9745
9746         if (pci_is_root_bus(rdev->pdev->bus))
9747                 return;
9748
9749         if (radeon_pcie_gen2 == 0)
9750                 return;
9751
9752         if (rdev->flags & RADEON_IS_IGP)
9753                 return;
9754
9755         if (!(rdev->flags & RADEON_IS_PCIE))
9756                 return;
9757
9758         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9759         if (ret != 0)
9760                 return;
9761
9762         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9763                 return;
9764
9765         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9766         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9767                 LC_CURRENT_DATA_RATE_SHIFT;
9768         if (mask & DRM_PCIE_SPEED_80) {
9769                 if (current_data_rate == 2) {
9770                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9771                         return;
9772                 }
9773                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9774         } else if (mask & DRM_PCIE_SPEED_50) {
9775                 if (current_data_rate == 1) {
9776                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9777                         return;
9778                 }
9779                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9780         }
9781
9782         bridge_pos = pci_pcie_cap(root);
9783         if (!bridge_pos)
9784                 return;
9785
9786         gpu_pos = pci_pcie_cap(rdev->pdev);
9787         if (!gpu_pos)
9788                 return;
9789
9790         if (mask & DRM_PCIE_SPEED_80) {
9791                 /* re-try equalization if gen3 is not already enabled */
9792                 if (current_data_rate != 2) {
9793                         u16 bridge_cfg, gpu_cfg;
9794                         u16 bridge_cfg2, gpu_cfg2;
9795                         u32 max_lw, current_lw, tmp;
9796
9797                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9798                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9799
9800                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9801                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9802
9803                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9804                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9805
9806                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9807                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9808                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9809
9810                         if (current_lw < max_lw) {
9811                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9812                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9813                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9814                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9815                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9816                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9817                                 }
9818                         }
9819
9820                         for (i = 0; i < 10; i++) {
9821                                 /* check status */
9822                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9823                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9824                                         break;
9825
9826                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9827                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9828
9829                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9830                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9831
9832                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9833                                 tmp |= LC_SET_QUIESCE;
9834                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9835
9836                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9837                                 tmp |= LC_REDO_EQ;
9838                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9839
9840                                 mdelay(100);
9841
9842                                 /* linkctl */
9843                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9844                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9845                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9846                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9847
9848                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9849                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9850                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9851                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9852
9853                                 /* linkctl2 */
9854                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9855                                 tmp16 &= ~((1 << 4) | (7 << 9));
9856                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9857                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9858
9859                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9860                                 tmp16 &= ~((1 << 4) | (7 << 9));
9861                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9862                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9863
9864                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9865                                 tmp &= ~LC_SET_QUIESCE;
9866                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9867                         }
9868                 }
9869         }
9870
9871         /* set the link speed */
9872         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9873         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9874         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9875
9876         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9877         tmp16 &= ~0xf;
9878         if (mask & DRM_PCIE_SPEED_80)
9879                 tmp16 |= 3; /* gen3 */
9880         else if (mask & DRM_PCIE_SPEED_50)
9881                 tmp16 |= 2; /* gen2 */
9882         else
9883                 tmp16 |= 1; /* gen1 */
9884         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9885
9886         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9887         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9888         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9889
9890         for (i = 0; i < rdev->usec_timeout; i++) {
9891                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9892                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9893                         break;
9894                 udelay(1);
9895         }
9896 }
9897
9898 static void cik_program_aspm(struct radeon_device *rdev)
9899 {
9900         u32 data, orig;
9901         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9902         bool disable_clkreq = false;
9903
9904         if (radeon_aspm == 0)
9905                 return;
9906
9907         /* XXX double check IGPs */
9908         if (rdev->flags & RADEON_IS_IGP)
9909                 return;
9910
9911         if (!(rdev->flags & RADEON_IS_PCIE))
9912                 return;
9913
9914         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9915         data &= ~LC_XMIT_N_FTS_MASK;
9916         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9917         if (orig != data)
9918                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9919
9920         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9921         data |= LC_GO_TO_RECOVERY;
9922         if (orig != data)
9923                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9924
9925         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9926         data |= P_IGNORE_EDB_ERR;
9927         if (orig != data)
9928                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9929
9930         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9931         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9932         data |= LC_PMI_TO_L1_DIS;
9933         if (!disable_l0s)
9934                 data |= LC_L0S_INACTIVITY(7);
9935
9936         if (!disable_l1) {
9937                 data |= LC_L1_INACTIVITY(7);
9938                 data &= ~LC_PMI_TO_L1_DIS;
9939                 if (orig != data)
9940                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9941
9942                 if (!disable_plloff_in_l1) {
9943                         bool clk_req_support;
9944
9945                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9946                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9947                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9948                         if (orig != data)
9949                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9950
9951                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9952                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9953                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9954                         if (orig != data)
9955                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9956
9957                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9958                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9959                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9960                         if (orig != data)
9961                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9962
9963                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9964                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9965                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9966                         if (orig != data)
9967                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9968
9969                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9970                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9971                         data |= LC_DYN_LANES_PWR_STATE(3);
9972                         if (orig != data)
9973                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9974
9975                         if (!disable_clkreq &&
9976                             !pci_is_root_bus(rdev->pdev->bus)) {
9977                                 struct pci_dev *root = rdev->pdev->bus->self;
9978                                 u32 lnkcap;
9979
9980                                 clk_req_support = false;
9981                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9982                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9983                                         clk_req_support = true;
9984                         } else {
9985                                 clk_req_support = false;
9986                         }
9987
9988                         if (clk_req_support) {
9989                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9990                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9991                                 if (orig != data)
9992                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9993
9994                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9995                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9996                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9997                                 if (orig != data)
9998                                         WREG32_SMC(THM_CLK_CNTL, data);
9999
10000                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
10001                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10002                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10003                                 if (orig != data)
10004                                         WREG32_SMC(MISC_CLK_CTRL, data);
10005
10006                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10007                                 data &= ~BCLK_AS_XCLK;
10008                                 if (orig != data)
10009                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
10010
10011                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10012                                 data &= ~FORCE_BIF_REFCLK_EN;
10013                                 if (orig != data)
10014                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10015
10016                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10017                                 data &= ~MPLL_CLKOUT_SEL_MASK;
10018                                 data |= MPLL_CLKOUT_SEL(4);
10019                                 if (orig != data)
10020                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10021                         }
10022                 }
10023         } else {
10024                 if (orig != data)
10025                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10026         }
10027
10028         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10029         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10030         if (orig != data)
10031                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
10032
10033         if (!disable_l0s) {
10034                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10035                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10036                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10037                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10038                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10039                                 data &= ~LC_L0S_INACTIVITY_MASK;
10040                                 if (orig != data)
10041                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10042                         }
10043                 }
10044         }
10045 }