3beac4ecf6a6457edaa88af59cbaff98f9de113e
[cascardo/linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
57
58 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
67
68 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
69 MODULE_FIRMWARE("radeon/hawaii_me.bin");
70 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
72 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
74 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
75 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
77
78 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
84
85 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
86 MODULE_FIRMWARE("radeon/kaveri_me.bin");
87 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
88 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
89 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
90 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
91 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
92
93 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
94 MODULE_FIRMWARE("radeon/KABINI_me.bin");
95 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
96 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
97 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
98 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
99
100 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
101 MODULE_FIRMWARE("radeon/kabini_me.bin");
102 MODULE_FIRMWARE("radeon/kabini_ce.bin");
103 MODULE_FIRMWARE("radeon/kabini_mec.bin");
104 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
105 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
106
107 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
113
114 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
115 MODULE_FIRMWARE("radeon/mullins_me.bin");
116 MODULE_FIRMWARE("radeon/mullins_ce.bin");
117 MODULE_FIRMWARE("radeon/mullins_mec.bin");
118 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
119 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
120
121 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
122 extern void r600_ih_ring_fini(struct radeon_device *rdev);
123 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
125 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
126 extern void sumo_rlc_fini(struct radeon_device *rdev);
127 extern int sumo_rlc_init(struct radeon_device *rdev);
128 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
129 extern void si_rlc_reset(struct radeon_device *rdev);
130 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
131 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
132 extern int cik_sdma_resume(struct radeon_device *rdev);
133 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
134 extern void cik_sdma_fini(struct radeon_device *rdev);
135 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
136 static void cik_rlc_stop(struct radeon_device *rdev);
137 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
138 static void cik_program_aspm(struct radeon_device *rdev);
139 static void cik_init_pg(struct radeon_device *rdev);
140 static void cik_init_cg(struct radeon_device *rdev);
141 static void cik_fini_pg(struct radeon_device *rdev);
142 static void cik_fini_cg(struct radeon_device *rdev);
143 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
144                                           bool enable);
145
146 /**
147  * cik_get_allowed_info_register - fetch the register for the info ioctl
148  *
149  * @rdev: radeon_device pointer
150  * @reg: register offset in bytes
151  * @val: register value
152  *
153  * Returns 0 for success or -EINVAL for an invalid register
154  *
155  */
156 int cik_get_allowed_info_register(struct radeon_device *rdev,
157                                   u32 reg, u32 *val)
158 {
159         switch (reg) {
160         case GRBM_STATUS:
161         case GRBM_STATUS2:
162         case GRBM_STATUS_SE0:
163         case GRBM_STATUS_SE1:
164         case GRBM_STATUS_SE2:
165         case GRBM_STATUS_SE3:
166         case SRBM_STATUS:
167         case SRBM_STATUS2:
168         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
169         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
170         case UVD_STATUS:
171         /* TODO VCE */
172                 *val = RREG32(reg);
173                 return 0;
174         default:
175                 return -EINVAL;
176         }
177 }
178
179 /*
180  * Indirect registers accessor
181  */
182 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
183 {
184         unsigned long flags;
185         u32 r;
186
187         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
188         WREG32(CIK_DIDT_IND_INDEX, (reg));
189         r = RREG32(CIK_DIDT_IND_DATA);
190         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
191         return r;
192 }
193
194 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
195 {
196         unsigned long flags;
197
198         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
199         WREG32(CIK_DIDT_IND_INDEX, (reg));
200         WREG32(CIK_DIDT_IND_DATA, (v));
201         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
202 }
203
204 /* get temperature in millidegrees */
205 int ci_get_temp(struct radeon_device *rdev)
206 {
207         u32 temp;
208         int actual_temp = 0;
209
210         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
211                 CTF_TEMP_SHIFT;
212
213         if (temp & 0x200)
214                 actual_temp = 255;
215         else
216                 actual_temp = temp & 0x1ff;
217
218         actual_temp = actual_temp * 1000;
219
220         return actual_temp;
221 }
222
223 /* get temperature in millidegrees */
224 int kv_get_temp(struct radeon_device *rdev)
225 {
226         u32 temp;
227         int actual_temp = 0;
228
229         temp = RREG32_SMC(0xC0300E0C);
230
231         if (temp)
232                 actual_temp = (temp / 8) - 49;
233         else
234                 actual_temp = 0;
235
236         actual_temp = actual_temp * 1000;
237
238         return actual_temp;
239 }
240
241 /*
242  * Indirect registers accessor
243  */
244 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
245 {
246         unsigned long flags;
247         u32 r;
248
249         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
250         WREG32(PCIE_INDEX, reg);
251         (void)RREG32(PCIE_INDEX);
252         r = RREG32(PCIE_DATA);
253         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
254         return r;
255 }
256
257 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
258 {
259         unsigned long flags;
260
261         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
262         WREG32(PCIE_INDEX, reg);
263         (void)RREG32(PCIE_INDEX);
264         WREG32(PCIE_DATA, v);
265         (void)RREG32(PCIE_DATA);
266         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
267 }
268
269 static const u32 spectre_rlc_save_restore_register_list[] =
270 {
271         (0x0e00 << 16) | (0xc12c >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc140 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc150 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc15c >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc168 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc170 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc178 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc204 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc2b4 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2b8 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2bc >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc2c0 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0x8228 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x829c >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0x869c >> 2),
300         0x00000000,
301         (0x0600 << 16) | (0x98f4 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0x98f8 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0x9900 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc260 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x90e8 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x3c000 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x3c00c >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x8c1c >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x9700 >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0xcd20 >> 2),
320         0x00000000,
321         (0x4e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x5e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x6e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x7e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x8e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0x9e00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0xae00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0xbe00 << 16) | (0xcd20 >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x89bc >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x8900 >> 2),
340         0x00000000,
341         0x3,
342         (0x0e00 << 16) | (0xc130 >> 2),
343         0x00000000,
344         (0x0e00 << 16) | (0xc134 >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc1fc >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc208 >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc264 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc268 >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc26c >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc270 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc274 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc278 >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc27c >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc280 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc284 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc288 >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc28c >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc290 >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc294 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc298 >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc29c >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc2a0 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a4 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2a8 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2ac  >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0xc2b0 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x301d0 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30238 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30250 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30254 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x30258 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x3025c >> 2),
401         0x00000000,
402         (0x4e00 << 16) | (0xc900 >> 2),
403         0x00000000,
404         (0x5e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x6e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x7e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x8e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0x9e00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0xae00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0xbe00 << 16) | (0xc900 >> 2),
417         0x00000000,
418         (0x4e00 << 16) | (0xc904 >> 2),
419         0x00000000,
420         (0x5e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x6e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x7e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x8e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0x9e00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0xae00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0xbe00 << 16) | (0xc904 >> 2),
433         0x00000000,
434         (0x4e00 << 16) | (0xc908 >> 2),
435         0x00000000,
436         (0x5e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x6e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x7e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x8e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0x9e00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0xae00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0xbe00 << 16) | (0xc908 >> 2),
449         0x00000000,
450         (0x4e00 << 16) | (0xc90c >> 2),
451         0x00000000,
452         (0x5e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x6e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x7e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x8e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0x9e00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0xae00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0xbe00 << 16) | (0xc90c >> 2),
465         0x00000000,
466         (0x4e00 << 16) | (0xc910 >> 2),
467         0x00000000,
468         (0x5e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x6e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x7e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x8e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0x9e00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0xae00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0xbe00 << 16) | (0xc910 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0xc99c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0x9834 >> 2),
485         0x00000000,
486         (0x0000 << 16) | (0x30f00 >> 2),
487         0x00000000,
488         (0x0001 << 16) | (0x30f00 >> 2),
489         0x00000000,
490         (0x0000 << 16) | (0x30f04 >> 2),
491         0x00000000,
492         (0x0001 << 16) | (0x30f04 >> 2),
493         0x00000000,
494         (0x0000 << 16) | (0x30f08 >> 2),
495         0x00000000,
496         (0x0001 << 16) | (0x30f08 >> 2),
497         0x00000000,
498         (0x0000 << 16) | (0x30f0c >> 2),
499         0x00000000,
500         (0x0001 << 16) | (0x30f0c >> 2),
501         0x00000000,
502         (0x0600 << 16) | (0x9b7c >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x8a14 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x8a18 >> 2),
507         0x00000000,
508         (0x0600 << 16) | (0x30a00 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x8bf0 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8bcc >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x8b24 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x30a04 >> 2),
517         0x00000000,
518         (0x0600 << 16) | (0x30a10 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a14 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a18 >> 2),
523         0x00000000,
524         (0x0600 << 16) | (0x30a2c >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xc700 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc704 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc708 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xc768 >> 2),
533         0x00000000,
534         (0x0400 << 16) | (0xc770 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc774 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc778 >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc77c >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc780 >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc784 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc788 >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc78c >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc798 >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc79c >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc7a0 >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a4 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7a8 >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7ac >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7b0 >> 2),
563         0x00000000,
564         (0x0400 << 16) | (0xc7b4 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x9100 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x3c010 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x92a8 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92ac >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92b4 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92b8 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92bc >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92c0 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c4 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92c8 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92cc >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x92d0 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8c00 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c04 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c20 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c38 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0x8c3c >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xae00 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0x9604 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xac08 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac0c >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac10 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac14 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac58 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac68 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac6c >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac70 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac74 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac78 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac7c >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac80 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac84 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac88 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0xac8c >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x970c >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x9714 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x9718 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x971c >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x31068 >> 2),
643         0x00000000,
644         (0x4e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x5e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x6e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x7e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x8e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0x9e00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0xae00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0xbe00 << 16) | (0x31068 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xcd10 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xcd14 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x88b0 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b4 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88b8 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x88bc >> 2),
671         0x00000000,
672         (0x0400 << 16) | (0x89c0 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x88c4 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88c8 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88d0 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d4 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x88d8 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x8980 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x30938 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x3093c >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x30940 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x89a0 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30900 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30904 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x89b4 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x3c210 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c214 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x3c218 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x8904 >> 2),
707         0x00000000,
708         0x5,
709         (0x0e00 << 16) | (0x8c28 >> 2),
710         (0x0e00 << 16) | (0x8c2c >> 2),
711         (0x0e00 << 16) | (0x8c30 >> 2),
712         (0x0e00 << 16) | (0x8c34 >> 2),
713         (0x0e00 << 16) | (0x9600 >> 2),
714 };
715
716 static const u32 kalindi_rlc_save_restore_register_list[] =
717 {
718         (0x0e00 << 16) | (0xc12c >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc140 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc150 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc15c >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc168 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc170 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc204 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc2b4 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2b8 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2bc >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc2c0 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x8228 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x829c >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x869c >> 2),
745         0x00000000,
746         (0x0600 << 16) | (0x98f4 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x98f8 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0x9900 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0xc260 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x90e8 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x3c000 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x3c00c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x8c1c >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x9700 >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0xcd20 >> 2),
765         0x00000000,
766         (0x4e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x5e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x6e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x7e00 << 16) | (0xcd20 >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x89bc >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x8900 >> 2),
777         0x00000000,
778         0x3,
779         (0x0e00 << 16) | (0xc130 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc134 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc1fc >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc208 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc264 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc268 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc26c >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc270 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc274 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc28c >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc290 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc294 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc298 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc2a0 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a4 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2a8 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xc2ac >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x301d0 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x30238 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30250 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30254 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x30258 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0x3025c >> 2),
824         0x00000000,
825         (0x4e00 << 16) | (0xc900 >> 2),
826         0x00000000,
827         (0x5e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x6e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x7e00 << 16) | (0xc900 >> 2),
832         0x00000000,
833         (0x4e00 << 16) | (0xc904 >> 2),
834         0x00000000,
835         (0x5e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x6e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x7e00 << 16) | (0xc904 >> 2),
840         0x00000000,
841         (0x4e00 << 16) | (0xc908 >> 2),
842         0x00000000,
843         (0x5e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x6e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x7e00 << 16) | (0xc908 >> 2),
848         0x00000000,
849         (0x4e00 << 16) | (0xc90c >> 2),
850         0x00000000,
851         (0x5e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x6e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x7e00 << 16) | (0xc90c >> 2),
856         0x00000000,
857         (0x4e00 << 16) | (0xc910 >> 2),
858         0x00000000,
859         (0x5e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x6e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x7e00 << 16) | (0xc910 >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0xc99c >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x9834 >> 2),
868         0x00000000,
869         (0x0000 << 16) | (0x30f00 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f04 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f08 >> 2),
874         0x00000000,
875         (0x0000 << 16) | (0x30f0c >> 2),
876         0x00000000,
877         (0x0600 << 16) | (0x9b7c >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x8a14 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x8a18 >> 2),
882         0x00000000,
883         (0x0600 << 16) | (0x30a00 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x8bf0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8bcc >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x8b24 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x30a04 >> 2),
892         0x00000000,
893         (0x0600 << 16) | (0x30a10 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a14 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a18 >> 2),
898         0x00000000,
899         (0x0600 << 16) | (0x30a2c >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xc700 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc704 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc708 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0xc768 >> 2),
908         0x00000000,
909         (0x0400 << 16) | (0xc770 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc774 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc798 >> 2),
914         0x00000000,
915         (0x0400 << 16) | (0xc79c >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x9100 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x3c010 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x8c00 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c04 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c20 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c38 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x8c3c >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0xae00 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0x9604 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0xac08 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac0c >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac10 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac14 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac58 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac68 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac6c >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac70 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac74 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac78 >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac7c >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac80 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac84 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac88 >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0xac8c >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x970c >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x9714 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x9718 >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x971c >> 2),
972         0x00000000,
973         (0x0e00 << 16) | (0x31068 >> 2),
974         0x00000000,
975         (0x4e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x5e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x6e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x7e00 << 16) | (0x31068 >> 2),
982         0x00000000,
983         (0x0e00 << 16) | (0xcd10 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0xcd14 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0x88b0 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b4 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88b8 >> 2),
992         0x00000000,
993         (0x0e00 << 16) | (0x88bc >> 2),
994         0x00000000,
995         (0x0400 << 16) | (0x89c0 >> 2),
996         0x00000000,
997         (0x0e00 << 16) | (0x88c4 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88c8 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88d0 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d4 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x88d8 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x8980 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x30938 >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x3093c >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x30940 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x89a0 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x30900 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x30904 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x89b4 >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x3e1fc >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3c210 >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c214 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x3c218 >> 2),
1030         0x00000000,
1031         (0x0e00 << 16) | (0x8904 >> 2),
1032         0x00000000,
1033         0x5,
1034         (0x0e00 << 16) | (0x8c28 >> 2),
1035         (0x0e00 << 16) | (0x8c2c >> 2),
1036         (0x0e00 << 16) | (0x8c30 >> 2),
1037         (0x0e00 << 16) | (0x8c34 >> 2),
1038         (0x0e00 << 16) | (0x9600 >> 2),
1039 };
1040
1041 static const u32 bonaire_golden_spm_registers[] =
1042 {
1043         0x30800, 0xe0ffffff, 0xe0000000
1044 };
1045
1046 static const u32 bonaire_golden_common_registers[] =
1047 {
1048         0xc770, 0xffffffff, 0x00000800,
1049         0xc774, 0xffffffff, 0x00000800,
1050         0xc798, 0xffffffff, 0x00007fbf,
1051         0xc79c, 0xffffffff, 0x00007faf
1052 };
1053
1054 static const u32 bonaire_golden_registers[] =
1055 {
1056         0x3354, 0x00000333, 0x00000333,
1057         0x3350, 0x000c0fc0, 0x00040200,
1058         0x9a10, 0x00010000, 0x00058208,
1059         0x3c000, 0xffff1fff, 0x00140000,
1060         0x3c200, 0xfdfc0fff, 0x00000100,
1061         0x3c234, 0x40000000, 0x40000200,
1062         0x9830, 0xffffffff, 0x00000000,
1063         0x9834, 0xf00fffff, 0x00000400,
1064         0x9838, 0x0002021c, 0x00020200,
1065         0xc78, 0x00000080, 0x00000000,
1066         0x5bb0, 0x000000f0, 0x00000070,
1067         0x5bc0, 0xf0311fff, 0x80300000,
1068         0x98f8, 0x73773777, 0x12010001,
1069         0x350c, 0x00810000, 0x408af000,
1070         0x7030, 0x31000111, 0x00000011,
1071         0x2f48, 0x73773777, 0x12010001,
1072         0x220c, 0x00007fb6, 0x0021a1b1,
1073         0x2210, 0x00007fb6, 0x002021b1,
1074         0x2180, 0x00007fb6, 0x00002191,
1075         0x2218, 0x00007fb6, 0x002121b1,
1076         0x221c, 0x00007fb6, 0x002021b1,
1077         0x21dc, 0x00007fb6, 0x00002191,
1078         0x21e0, 0x00007fb6, 0x00002191,
1079         0x3628, 0x0000003f, 0x0000000a,
1080         0x362c, 0x0000003f, 0x0000000a,
1081         0x2ae4, 0x00073ffe, 0x000022a2,
1082         0x240c, 0x000007ff, 0x00000000,
1083         0x8a14, 0xf000003f, 0x00000007,
1084         0x8bf0, 0x00002001, 0x00000001,
1085         0x8b24, 0xffffffff, 0x00ffffff,
1086         0x30a04, 0x0000ff0f, 0x00000000,
1087         0x28a4c, 0x07ffffff, 0x06000000,
1088         0x4d8, 0x00000fff, 0x00000100,
1089         0x3e78, 0x00000001, 0x00000002,
1090         0x9100, 0x03000000, 0x0362c688,
1091         0x8c00, 0x000000ff, 0x00000001,
1092         0xe40, 0x00001fff, 0x00001fff,
1093         0x9060, 0x0000007f, 0x00000020,
1094         0x9508, 0x00010000, 0x00010000,
1095         0xac14, 0x000003ff, 0x000000f3,
1096         0xac0c, 0xffffffff, 0x00001032
1097 };
1098
1099 static const u32 bonaire_mgcg_cgcg_init[] =
1100 {
1101         0xc420, 0xffffffff, 0xfffffffc,
1102         0x30800, 0xffffffff, 0xe0000000,
1103         0x3c2a0, 0xffffffff, 0x00000100,
1104         0x3c208, 0xffffffff, 0x00000100,
1105         0x3c2c0, 0xffffffff, 0xc0000100,
1106         0x3c2c8, 0xffffffff, 0xc0000100,
1107         0x3c2c4, 0xffffffff, 0xc0000100,
1108         0x55e4, 0xffffffff, 0x00600100,
1109         0x3c280, 0xffffffff, 0x00000100,
1110         0x3c214, 0xffffffff, 0x06000100,
1111         0x3c220, 0xffffffff, 0x00000100,
1112         0x3c218, 0xffffffff, 0x06000100,
1113         0x3c204, 0xffffffff, 0x00000100,
1114         0x3c2e0, 0xffffffff, 0x00000100,
1115         0x3c224, 0xffffffff, 0x00000100,
1116         0x3c200, 0xffffffff, 0x00000100,
1117         0x3c230, 0xffffffff, 0x00000100,
1118         0x3c234, 0xffffffff, 0x00000100,
1119         0x3c250, 0xffffffff, 0x00000100,
1120         0x3c254, 0xffffffff, 0x00000100,
1121         0x3c258, 0xffffffff, 0x00000100,
1122         0x3c25c, 0xffffffff, 0x00000100,
1123         0x3c260, 0xffffffff, 0x00000100,
1124         0x3c27c, 0xffffffff, 0x00000100,
1125         0x3c278, 0xffffffff, 0x00000100,
1126         0x3c210, 0xffffffff, 0x06000100,
1127         0x3c290, 0xffffffff, 0x00000100,
1128         0x3c274, 0xffffffff, 0x00000100,
1129         0x3c2b4, 0xffffffff, 0x00000100,
1130         0x3c2b0, 0xffffffff, 0x00000100,
1131         0x3c270, 0xffffffff, 0x00000100,
1132         0x30800, 0xffffffff, 0xe0000000,
1133         0x3c020, 0xffffffff, 0x00010000,
1134         0x3c024, 0xffffffff, 0x00030002,
1135         0x3c028, 0xffffffff, 0x00040007,
1136         0x3c02c, 0xffffffff, 0x00060005,
1137         0x3c030, 0xffffffff, 0x00090008,
1138         0x3c034, 0xffffffff, 0x00010000,
1139         0x3c038, 0xffffffff, 0x00030002,
1140         0x3c03c, 0xffffffff, 0x00040007,
1141         0x3c040, 0xffffffff, 0x00060005,
1142         0x3c044, 0xffffffff, 0x00090008,
1143         0x3c048, 0xffffffff, 0x00010000,
1144         0x3c04c, 0xffffffff, 0x00030002,
1145         0x3c050, 0xffffffff, 0x00040007,
1146         0x3c054, 0xffffffff, 0x00060005,
1147         0x3c058, 0xffffffff, 0x00090008,
1148         0x3c05c, 0xffffffff, 0x00010000,
1149         0x3c060, 0xffffffff, 0x00030002,
1150         0x3c064, 0xffffffff, 0x00040007,
1151         0x3c068, 0xffffffff, 0x00060005,
1152         0x3c06c, 0xffffffff, 0x00090008,
1153         0x3c070, 0xffffffff, 0x00010000,
1154         0x3c074, 0xffffffff, 0x00030002,
1155         0x3c078, 0xffffffff, 0x00040007,
1156         0x3c07c, 0xffffffff, 0x00060005,
1157         0x3c080, 0xffffffff, 0x00090008,
1158         0x3c084, 0xffffffff, 0x00010000,
1159         0x3c088, 0xffffffff, 0x00030002,
1160         0x3c08c, 0xffffffff, 0x00040007,
1161         0x3c090, 0xffffffff, 0x00060005,
1162         0x3c094, 0xffffffff, 0x00090008,
1163         0x3c098, 0xffffffff, 0x00010000,
1164         0x3c09c, 0xffffffff, 0x00030002,
1165         0x3c0a0, 0xffffffff, 0x00040007,
1166         0x3c0a4, 0xffffffff, 0x00060005,
1167         0x3c0a8, 0xffffffff, 0x00090008,
1168         0x3c000, 0xffffffff, 0x96e00200,
1169         0x8708, 0xffffffff, 0x00900100,
1170         0xc424, 0xffffffff, 0x0020003f,
1171         0x38, 0xffffffff, 0x0140001c,
1172         0x3c, 0x000f0000, 0x000f0000,
1173         0x220, 0xffffffff, 0xC060000C,
1174         0x224, 0xc0000fff, 0x00000100,
1175         0xf90, 0xffffffff, 0x00000100,
1176         0xf98, 0x00000101, 0x00000000,
1177         0x20a8, 0xffffffff, 0x00000104,
1178         0x55e4, 0xff000fff, 0x00000100,
1179         0x30cc, 0xc0000fff, 0x00000104,
1180         0xc1e4, 0x00000001, 0x00000001,
1181         0xd00c, 0xff000ff0, 0x00000100,
1182         0xd80c, 0xff000ff0, 0x00000100
1183 };
1184
1185 static const u32 spectre_golden_spm_registers[] =
1186 {
1187         0x30800, 0xe0ffffff, 0xe0000000
1188 };
1189
1190 static const u32 spectre_golden_common_registers[] =
1191 {
1192         0xc770, 0xffffffff, 0x00000800,
1193         0xc774, 0xffffffff, 0x00000800,
1194         0xc798, 0xffffffff, 0x00007fbf,
1195         0xc79c, 0xffffffff, 0x00007faf
1196 };
1197
1198 static const u32 spectre_golden_registers[] =
1199 {
1200         0x3c000, 0xffff1fff, 0x96940200,
1201         0x3c00c, 0xffff0001, 0xff000000,
1202         0x3c200, 0xfffc0fff, 0x00000100,
1203         0x6ed8, 0x00010101, 0x00010000,
1204         0x9834, 0xf00fffff, 0x00000400,
1205         0x9838, 0xfffffffc, 0x00020200,
1206         0x5bb0, 0x000000f0, 0x00000070,
1207         0x5bc0, 0xf0311fff, 0x80300000,
1208         0x98f8, 0x73773777, 0x12010001,
1209         0x9b7c, 0x00ff0000, 0x00fc0000,
1210         0x2f48, 0x73773777, 0x12010001,
1211         0x8a14, 0xf000003f, 0x00000007,
1212         0x8b24, 0xffffffff, 0x00ffffff,
1213         0x28350, 0x3f3f3fff, 0x00000082,
1214         0x28354, 0x0000003f, 0x00000000,
1215         0x3e78, 0x00000001, 0x00000002,
1216         0x913c, 0xffff03df, 0x00000004,
1217         0xc768, 0x00000008, 0x00000008,
1218         0x8c00, 0x000008ff, 0x00000800,
1219         0x9508, 0x00010000, 0x00010000,
1220         0xac0c, 0xffffffff, 0x54763210,
1221         0x214f8, 0x01ff01ff, 0x00000002,
1222         0x21498, 0x007ff800, 0x00200000,
1223         0x2015c, 0xffffffff, 0x00000f40,
1224         0x30934, 0xffffffff, 0x00000001
1225 };
1226
1227 static const u32 spectre_mgcg_cgcg_init[] =
1228 {
1229         0xc420, 0xffffffff, 0xfffffffc,
1230         0x30800, 0xffffffff, 0xe0000000,
1231         0x3c2a0, 0xffffffff, 0x00000100,
1232         0x3c208, 0xffffffff, 0x00000100,
1233         0x3c2c0, 0xffffffff, 0x00000100,
1234         0x3c2c8, 0xffffffff, 0x00000100,
1235         0x3c2c4, 0xffffffff, 0x00000100,
1236         0x55e4, 0xffffffff, 0x00600100,
1237         0x3c280, 0xffffffff, 0x00000100,
1238         0x3c214, 0xffffffff, 0x06000100,
1239         0x3c220, 0xffffffff, 0x00000100,
1240         0x3c218, 0xffffffff, 0x06000100,
1241         0x3c204, 0xffffffff, 0x00000100,
1242         0x3c2e0, 0xffffffff, 0x00000100,
1243         0x3c224, 0xffffffff, 0x00000100,
1244         0x3c200, 0xffffffff, 0x00000100,
1245         0x3c230, 0xffffffff, 0x00000100,
1246         0x3c234, 0xffffffff, 0x00000100,
1247         0x3c250, 0xffffffff, 0x00000100,
1248         0x3c254, 0xffffffff, 0x00000100,
1249         0x3c258, 0xffffffff, 0x00000100,
1250         0x3c25c, 0xffffffff, 0x00000100,
1251         0x3c260, 0xffffffff, 0x00000100,
1252         0x3c27c, 0xffffffff, 0x00000100,
1253         0x3c278, 0xffffffff, 0x00000100,
1254         0x3c210, 0xffffffff, 0x06000100,
1255         0x3c290, 0xffffffff, 0x00000100,
1256         0x3c274, 0xffffffff, 0x00000100,
1257         0x3c2b4, 0xffffffff, 0x00000100,
1258         0x3c2b0, 0xffffffff, 0x00000100,
1259         0x3c270, 0xffffffff, 0x00000100,
1260         0x30800, 0xffffffff, 0xe0000000,
1261         0x3c020, 0xffffffff, 0x00010000,
1262         0x3c024, 0xffffffff, 0x00030002,
1263         0x3c028, 0xffffffff, 0x00040007,
1264         0x3c02c, 0xffffffff, 0x00060005,
1265         0x3c030, 0xffffffff, 0x00090008,
1266         0x3c034, 0xffffffff, 0x00010000,
1267         0x3c038, 0xffffffff, 0x00030002,
1268         0x3c03c, 0xffffffff, 0x00040007,
1269         0x3c040, 0xffffffff, 0x00060005,
1270         0x3c044, 0xffffffff, 0x00090008,
1271         0x3c048, 0xffffffff, 0x00010000,
1272         0x3c04c, 0xffffffff, 0x00030002,
1273         0x3c050, 0xffffffff, 0x00040007,
1274         0x3c054, 0xffffffff, 0x00060005,
1275         0x3c058, 0xffffffff, 0x00090008,
1276         0x3c05c, 0xffffffff, 0x00010000,
1277         0x3c060, 0xffffffff, 0x00030002,
1278         0x3c064, 0xffffffff, 0x00040007,
1279         0x3c068, 0xffffffff, 0x00060005,
1280         0x3c06c, 0xffffffff, 0x00090008,
1281         0x3c070, 0xffffffff, 0x00010000,
1282         0x3c074, 0xffffffff, 0x00030002,
1283         0x3c078, 0xffffffff, 0x00040007,
1284         0x3c07c, 0xffffffff, 0x00060005,
1285         0x3c080, 0xffffffff, 0x00090008,
1286         0x3c084, 0xffffffff, 0x00010000,
1287         0x3c088, 0xffffffff, 0x00030002,
1288         0x3c08c, 0xffffffff, 0x00040007,
1289         0x3c090, 0xffffffff, 0x00060005,
1290         0x3c094, 0xffffffff, 0x00090008,
1291         0x3c098, 0xffffffff, 0x00010000,
1292         0x3c09c, 0xffffffff, 0x00030002,
1293         0x3c0a0, 0xffffffff, 0x00040007,
1294         0x3c0a4, 0xffffffff, 0x00060005,
1295         0x3c0a8, 0xffffffff, 0x00090008,
1296         0x3c0ac, 0xffffffff, 0x00010000,
1297         0x3c0b0, 0xffffffff, 0x00030002,
1298         0x3c0b4, 0xffffffff, 0x00040007,
1299         0x3c0b8, 0xffffffff, 0x00060005,
1300         0x3c0bc, 0xffffffff, 0x00090008,
1301         0x3c000, 0xffffffff, 0x96e00200,
1302         0x8708, 0xffffffff, 0x00900100,
1303         0xc424, 0xffffffff, 0x0020003f,
1304         0x38, 0xffffffff, 0x0140001c,
1305         0x3c, 0x000f0000, 0x000f0000,
1306         0x220, 0xffffffff, 0xC060000C,
1307         0x224, 0xc0000fff, 0x00000100,
1308         0xf90, 0xffffffff, 0x00000100,
1309         0xf98, 0x00000101, 0x00000000,
1310         0x20a8, 0xffffffff, 0x00000104,
1311         0x55e4, 0xff000fff, 0x00000100,
1312         0x30cc, 0xc0000fff, 0x00000104,
1313         0xc1e4, 0x00000001, 0x00000001,
1314         0xd00c, 0xff000ff0, 0x00000100,
1315         0xd80c, 0xff000ff0, 0x00000100
1316 };
1317
1318 static const u32 kalindi_golden_spm_registers[] =
1319 {
1320         0x30800, 0xe0ffffff, 0xe0000000
1321 };
1322
1323 static const u32 kalindi_golden_common_registers[] =
1324 {
1325         0xc770, 0xffffffff, 0x00000800,
1326         0xc774, 0xffffffff, 0x00000800,
1327         0xc798, 0xffffffff, 0x00007fbf,
1328         0xc79c, 0xffffffff, 0x00007faf
1329 };
1330
1331 static const u32 kalindi_golden_registers[] =
1332 {
1333         0x3c000, 0xffffdfff, 0x6e944040,
1334         0x55e4, 0xff607fff, 0xfc000100,
1335         0x3c220, 0xff000fff, 0x00000100,
1336         0x3c224, 0xff000fff, 0x00000100,
1337         0x3c200, 0xfffc0fff, 0x00000100,
1338         0x6ed8, 0x00010101, 0x00010000,
1339         0x9830, 0xffffffff, 0x00000000,
1340         0x9834, 0xf00fffff, 0x00000400,
1341         0x5bb0, 0x000000f0, 0x00000070,
1342         0x5bc0, 0xf0311fff, 0x80300000,
1343         0x98f8, 0x73773777, 0x12010001,
1344         0x98fc, 0xffffffff, 0x00000010,
1345         0x9b7c, 0x00ff0000, 0x00fc0000,
1346         0x8030, 0x00001f0f, 0x0000100a,
1347         0x2f48, 0x73773777, 0x12010001,
1348         0x2408, 0x000fffff, 0x000c007f,
1349         0x8a14, 0xf000003f, 0x00000007,
1350         0x8b24, 0x3fff3fff, 0x00ffcfff,
1351         0x30a04, 0x0000ff0f, 0x00000000,
1352         0x28a4c, 0x07ffffff, 0x06000000,
1353         0x4d8, 0x00000fff, 0x00000100,
1354         0x3e78, 0x00000001, 0x00000002,
1355         0xc768, 0x00000008, 0x00000008,
1356         0x8c00, 0x000000ff, 0x00000003,
1357         0x214f8, 0x01ff01ff, 0x00000002,
1358         0x21498, 0x007ff800, 0x00200000,
1359         0x2015c, 0xffffffff, 0x00000f40,
1360         0x88c4, 0x001f3ae3, 0x00000082,
1361         0x88d4, 0x0000001f, 0x00000010,
1362         0x30934, 0xffffffff, 0x00000000
1363 };
1364
1365 static const u32 kalindi_mgcg_cgcg_init[] =
1366 {
1367         0xc420, 0xffffffff, 0xfffffffc,
1368         0x30800, 0xffffffff, 0xe0000000,
1369         0x3c2a0, 0xffffffff, 0x00000100,
1370         0x3c208, 0xffffffff, 0x00000100,
1371         0x3c2c0, 0xffffffff, 0x00000100,
1372         0x3c2c8, 0xffffffff, 0x00000100,
1373         0x3c2c4, 0xffffffff, 0x00000100,
1374         0x55e4, 0xffffffff, 0x00600100,
1375         0x3c280, 0xffffffff, 0x00000100,
1376         0x3c214, 0xffffffff, 0x06000100,
1377         0x3c220, 0xffffffff, 0x00000100,
1378         0x3c218, 0xffffffff, 0x06000100,
1379         0x3c204, 0xffffffff, 0x00000100,
1380         0x3c2e0, 0xffffffff, 0x00000100,
1381         0x3c224, 0xffffffff, 0x00000100,
1382         0x3c200, 0xffffffff, 0x00000100,
1383         0x3c230, 0xffffffff, 0x00000100,
1384         0x3c234, 0xffffffff, 0x00000100,
1385         0x3c250, 0xffffffff, 0x00000100,
1386         0x3c254, 0xffffffff, 0x00000100,
1387         0x3c258, 0xffffffff, 0x00000100,
1388         0x3c25c, 0xffffffff, 0x00000100,
1389         0x3c260, 0xffffffff, 0x00000100,
1390         0x3c27c, 0xffffffff, 0x00000100,
1391         0x3c278, 0xffffffff, 0x00000100,
1392         0x3c210, 0xffffffff, 0x06000100,
1393         0x3c290, 0xffffffff, 0x00000100,
1394         0x3c274, 0xffffffff, 0x00000100,
1395         0x3c2b4, 0xffffffff, 0x00000100,
1396         0x3c2b0, 0xffffffff, 0x00000100,
1397         0x3c270, 0xffffffff, 0x00000100,
1398         0x30800, 0xffffffff, 0xe0000000,
1399         0x3c020, 0xffffffff, 0x00010000,
1400         0x3c024, 0xffffffff, 0x00030002,
1401         0x3c028, 0xffffffff, 0x00040007,
1402         0x3c02c, 0xffffffff, 0x00060005,
1403         0x3c030, 0xffffffff, 0x00090008,
1404         0x3c034, 0xffffffff, 0x00010000,
1405         0x3c038, 0xffffffff, 0x00030002,
1406         0x3c03c, 0xffffffff, 0x00040007,
1407         0x3c040, 0xffffffff, 0x00060005,
1408         0x3c044, 0xffffffff, 0x00090008,
1409         0x3c000, 0xffffffff, 0x96e00200,
1410         0x8708, 0xffffffff, 0x00900100,
1411         0xc424, 0xffffffff, 0x0020003f,
1412         0x38, 0xffffffff, 0x0140001c,
1413         0x3c, 0x000f0000, 0x000f0000,
1414         0x220, 0xffffffff, 0xC060000C,
1415         0x224, 0xc0000fff, 0x00000100,
1416         0x20a8, 0xffffffff, 0x00000104,
1417         0x55e4, 0xff000fff, 0x00000100,
1418         0x30cc, 0xc0000fff, 0x00000104,
1419         0xc1e4, 0x00000001, 0x00000001,
1420         0xd00c, 0xff000ff0, 0x00000100,
1421         0xd80c, 0xff000ff0, 0x00000100
1422 };
1423
1424 static const u32 hawaii_golden_spm_registers[] =
1425 {
1426         0x30800, 0xe0ffffff, 0xe0000000
1427 };
1428
1429 static const u32 hawaii_golden_common_registers[] =
1430 {
1431         0x30800, 0xffffffff, 0xe0000000,
1432         0x28350, 0xffffffff, 0x3a00161a,
1433         0x28354, 0xffffffff, 0x0000002e,
1434         0x9a10, 0xffffffff, 0x00018208,
1435         0x98f8, 0xffffffff, 0x12011003
1436 };
1437
1438 static const u32 hawaii_golden_registers[] =
1439 {
1440         0x3354, 0x00000333, 0x00000333,
1441         0x9a10, 0x00010000, 0x00058208,
1442         0x9830, 0xffffffff, 0x00000000,
1443         0x9834, 0xf00fffff, 0x00000400,
1444         0x9838, 0x0002021c, 0x00020200,
1445         0xc78, 0x00000080, 0x00000000,
1446         0x5bb0, 0x000000f0, 0x00000070,
1447         0x5bc0, 0xf0311fff, 0x80300000,
1448         0x350c, 0x00810000, 0x408af000,
1449         0x7030, 0x31000111, 0x00000011,
1450         0x2f48, 0x73773777, 0x12010001,
1451         0x2120, 0x0000007f, 0x0000001b,
1452         0x21dc, 0x00007fb6, 0x00002191,
1453         0x3628, 0x0000003f, 0x0000000a,
1454         0x362c, 0x0000003f, 0x0000000a,
1455         0x2ae4, 0x00073ffe, 0x000022a2,
1456         0x240c, 0x000007ff, 0x00000000,
1457         0x8bf0, 0x00002001, 0x00000001,
1458         0x8b24, 0xffffffff, 0x00ffffff,
1459         0x30a04, 0x0000ff0f, 0x00000000,
1460         0x28a4c, 0x07ffffff, 0x06000000,
1461         0x3e78, 0x00000001, 0x00000002,
1462         0xc768, 0x00000008, 0x00000008,
1463         0xc770, 0x00000f00, 0x00000800,
1464         0xc774, 0x00000f00, 0x00000800,
1465         0xc798, 0x00ffffff, 0x00ff7fbf,
1466         0xc79c, 0x00ffffff, 0x00ff7faf,
1467         0x8c00, 0x000000ff, 0x00000800,
1468         0xe40, 0x00001fff, 0x00001fff,
1469         0x9060, 0x0000007f, 0x00000020,
1470         0x9508, 0x00010000, 0x00010000,
1471         0xae00, 0x00100000, 0x000ff07c,
1472         0xac14, 0x000003ff, 0x0000000f,
1473         0xac10, 0xffffffff, 0x7564fdec,
1474         0xac0c, 0xffffffff, 0x3120b9a8,
1475         0xac08, 0x20000000, 0x0f9c0000
1476 };
1477
1478 static const u32 hawaii_mgcg_cgcg_init[] =
1479 {
1480         0xc420, 0xffffffff, 0xfffffffd,
1481         0x30800, 0xffffffff, 0xe0000000,
1482         0x3c2a0, 0xffffffff, 0x00000100,
1483         0x3c208, 0xffffffff, 0x00000100,
1484         0x3c2c0, 0xffffffff, 0x00000100,
1485         0x3c2c8, 0xffffffff, 0x00000100,
1486         0x3c2c4, 0xffffffff, 0x00000100,
1487         0x55e4, 0xffffffff, 0x00200100,
1488         0x3c280, 0xffffffff, 0x00000100,
1489         0x3c214, 0xffffffff, 0x06000100,
1490         0x3c220, 0xffffffff, 0x00000100,
1491         0x3c218, 0xffffffff, 0x06000100,
1492         0x3c204, 0xffffffff, 0x00000100,
1493         0x3c2e0, 0xffffffff, 0x00000100,
1494         0x3c224, 0xffffffff, 0x00000100,
1495         0x3c200, 0xffffffff, 0x00000100,
1496         0x3c230, 0xffffffff, 0x00000100,
1497         0x3c234, 0xffffffff, 0x00000100,
1498         0x3c250, 0xffffffff, 0x00000100,
1499         0x3c254, 0xffffffff, 0x00000100,
1500         0x3c258, 0xffffffff, 0x00000100,
1501         0x3c25c, 0xffffffff, 0x00000100,
1502         0x3c260, 0xffffffff, 0x00000100,
1503         0x3c27c, 0xffffffff, 0x00000100,
1504         0x3c278, 0xffffffff, 0x00000100,
1505         0x3c210, 0xffffffff, 0x06000100,
1506         0x3c290, 0xffffffff, 0x00000100,
1507         0x3c274, 0xffffffff, 0x00000100,
1508         0x3c2b4, 0xffffffff, 0x00000100,
1509         0x3c2b0, 0xffffffff, 0x00000100,
1510         0x3c270, 0xffffffff, 0x00000100,
1511         0x30800, 0xffffffff, 0xe0000000,
1512         0x3c020, 0xffffffff, 0x00010000,
1513         0x3c024, 0xffffffff, 0x00030002,
1514         0x3c028, 0xffffffff, 0x00040007,
1515         0x3c02c, 0xffffffff, 0x00060005,
1516         0x3c030, 0xffffffff, 0x00090008,
1517         0x3c034, 0xffffffff, 0x00010000,
1518         0x3c038, 0xffffffff, 0x00030002,
1519         0x3c03c, 0xffffffff, 0x00040007,
1520         0x3c040, 0xffffffff, 0x00060005,
1521         0x3c044, 0xffffffff, 0x00090008,
1522         0x3c048, 0xffffffff, 0x00010000,
1523         0x3c04c, 0xffffffff, 0x00030002,
1524         0x3c050, 0xffffffff, 0x00040007,
1525         0x3c054, 0xffffffff, 0x00060005,
1526         0x3c058, 0xffffffff, 0x00090008,
1527         0x3c05c, 0xffffffff, 0x00010000,
1528         0x3c060, 0xffffffff, 0x00030002,
1529         0x3c064, 0xffffffff, 0x00040007,
1530         0x3c068, 0xffffffff, 0x00060005,
1531         0x3c06c, 0xffffffff, 0x00090008,
1532         0x3c070, 0xffffffff, 0x00010000,
1533         0x3c074, 0xffffffff, 0x00030002,
1534         0x3c078, 0xffffffff, 0x00040007,
1535         0x3c07c, 0xffffffff, 0x00060005,
1536         0x3c080, 0xffffffff, 0x00090008,
1537         0x3c084, 0xffffffff, 0x00010000,
1538         0x3c088, 0xffffffff, 0x00030002,
1539         0x3c08c, 0xffffffff, 0x00040007,
1540         0x3c090, 0xffffffff, 0x00060005,
1541         0x3c094, 0xffffffff, 0x00090008,
1542         0x3c098, 0xffffffff, 0x00010000,
1543         0x3c09c, 0xffffffff, 0x00030002,
1544         0x3c0a0, 0xffffffff, 0x00040007,
1545         0x3c0a4, 0xffffffff, 0x00060005,
1546         0x3c0a8, 0xffffffff, 0x00090008,
1547         0x3c0ac, 0xffffffff, 0x00010000,
1548         0x3c0b0, 0xffffffff, 0x00030002,
1549         0x3c0b4, 0xffffffff, 0x00040007,
1550         0x3c0b8, 0xffffffff, 0x00060005,
1551         0x3c0bc, 0xffffffff, 0x00090008,
1552         0x3c0c0, 0xffffffff, 0x00010000,
1553         0x3c0c4, 0xffffffff, 0x00030002,
1554         0x3c0c8, 0xffffffff, 0x00040007,
1555         0x3c0cc, 0xffffffff, 0x00060005,
1556         0x3c0d0, 0xffffffff, 0x00090008,
1557         0x3c0d4, 0xffffffff, 0x00010000,
1558         0x3c0d8, 0xffffffff, 0x00030002,
1559         0x3c0dc, 0xffffffff, 0x00040007,
1560         0x3c0e0, 0xffffffff, 0x00060005,
1561         0x3c0e4, 0xffffffff, 0x00090008,
1562         0x3c0e8, 0xffffffff, 0x00010000,
1563         0x3c0ec, 0xffffffff, 0x00030002,
1564         0x3c0f0, 0xffffffff, 0x00040007,
1565         0x3c0f4, 0xffffffff, 0x00060005,
1566         0x3c0f8, 0xffffffff, 0x00090008,
1567         0xc318, 0xffffffff, 0x00020200,
1568         0x3350, 0xffffffff, 0x00000200,
1569         0x15c0, 0xffffffff, 0x00000400,
1570         0x55e8, 0xffffffff, 0x00000000,
1571         0x2f50, 0xffffffff, 0x00000902,
1572         0x3c000, 0xffffffff, 0x96940200,
1573         0x8708, 0xffffffff, 0x00900100,
1574         0xc424, 0xffffffff, 0x0020003f,
1575         0x38, 0xffffffff, 0x0140001c,
1576         0x3c, 0x000f0000, 0x000f0000,
1577         0x220, 0xffffffff, 0xc060000c,
1578         0x224, 0xc0000fff, 0x00000100,
1579         0xf90, 0xffffffff, 0x00000100,
1580         0xf98, 0x00000101, 0x00000000,
1581         0x20a8, 0xffffffff, 0x00000104,
1582         0x55e4, 0xff000fff, 0x00000100,
1583         0x30cc, 0xc0000fff, 0x00000104,
1584         0xc1e4, 0x00000001, 0x00000001,
1585         0xd00c, 0xff000ff0, 0x00000100,
1586         0xd80c, 0xff000ff0, 0x00000100
1587 };
1588
1589 static const u32 godavari_golden_registers[] =
1590 {
1591         0x55e4, 0xff607fff, 0xfc000100,
1592         0x6ed8, 0x00010101, 0x00010000,
1593         0x9830, 0xffffffff, 0x00000000,
1594         0x98302, 0xf00fffff, 0x00000400,
1595         0x6130, 0xffffffff, 0x00010000,
1596         0x5bb0, 0x000000f0, 0x00000070,
1597         0x5bc0, 0xf0311fff, 0x80300000,
1598         0x98f8, 0x73773777, 0x12010001,
1599         0x98fc, 0xffffffff, 0x00000010,
1600         0x8030, 0x00001f0f, 0x0000100a,
1601         0x2f48, 0x73773777, 0x12010001,
1602         0x2408, 0x000fffff, 0x000c007f,
1603         0x8a14, 0xf000003f, 0x00000007,
1604         0x8b24, 0xffffffff, 0x00ff0fff,
1605         0x30a04, 0x0000ff0f, 0x00000000,
1606         0x28a4c, 0x07ffffff, 0x06000000,
1607         0x4d8, 0x00000fff, 0x00000100,
1608         0xd014, 0x00010000, 0x00810001,
1609         0xd814, 0x00010000, 0x00810001,
1610         0x3e78, 0x00000001, 0x00000002,
1611         0xc768, 0x00000008, 0x00000008,
1612         0xc770, 0x00000f00, 0x00000800,
1613         0xc774, 0x00000f00, 0x00000800,
1614         0xc798, 0x00ffffff, 0x00ff7fbf,
1615         0xc79c, 0x00ffffff, 0x00ff7faf,
1616         0x8c00, 0x000000ff, 0x00000001,
1617         0x214f8, 0x01ff01ff, 0x00000002,
1618         0x21498, 0x007ff800, 0x00200000,
1619         0x2015c, 0xffffffff, 0x00000f40,
1620         0x88c4, 0x001f3ae3, 0x00000082,
1621         0x88d4, 0x0000001f, 0x00000010,
1622         0x30934, 0xffffffff, 0x00000000
1623 };
1624
1625
1626 static void cik_init_golden_registers(struct radeon_device *rdev)
1627 {
1628         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1629         mutex_lock(&rdev->grbm_idx_mutex);
1630         switch (rdev->family) {
1631         case CHIP_BONAIRE:
1632                 radeon_program_register_sequence(rdev,
1633                                                  bonaire_mgcg_cgcg_init,
1634                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635                 radeon_program_register_sequence(rdev,
1636                                                  bonaire_golden_registers,
1637                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638                 radeon_program_register_sequence(rdev,
1639                                                  bonaire_golden_common_registers,
1640                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641                 radeon_program_register_sequence(rdev,
1642                                                  bonaire_golden_spm_registers,
1643                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644                 break;
1645         case CHIP_KABINI:
1646                 radeon_program_register_sequence(rdev,
1647                                                  kalindi_mgcg_cgcg_init,
1648                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649                 radeon_program_register_sequence(rdev,
1650                                                  kalindi_golden_registers,
1651                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652                 radeon_program_register_sequence(rdev,
1653                                                  kalindi_golden_common_registers,
1654                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655                 radeon_program_register_sequence(rdev,
1656                                                  kalindi_golden_spm_registers,
1657                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658                 break;
1659         case CHIP_MULLINS:
1660                 radeon_program_register_sequence(rdev,
1661                                                  kalindi_mgcg_cgcg_init,
1662                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663                 radeon_program_register_sequence(rdev,
1664                                                  godavari_golden_registers,
1665                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1666                 radeon_program_register_sequence(rdev,
1667                                                  kalindi_golden_common_registers,
1668                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669                 radeon_program_register_sequence(rdev,
1670                                                  kalindi_golden_spm_registers,
1671                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672                 break;
1673         case CHIP_KAVERI:
1674                 radeon_program_register_sequence(rdev,
1675                                                  spectre_mgcg_cgcg_init,
1676                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677                 radeon_program_register_sequence(rdev,
1678                                                  spectre_golden_registers,
1679                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1680                 radeon_program_register_sequence(rdev,
1681                                                  spectre_golden_common_registers,
1682                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683                 radeon_program_register_sequence(rdev,
1684                                                  spectre_golden_spm_registers,
1685                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686                 break;
1687         case CHIP_HAWAII:
1688                 radeon_program_register_sequence(rdev,
1689                                                  hawaii_mgcg_cgcg_init,
1690                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691                 radeon_program_register_sequence(rdev,
1692                                                  hawaii_golden_registers,
1693                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694                 radeon_program_register_sequence(rdev,
1695                                                  hawaii_golden_common_registers,
1696                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697                 radeon_program_register_sequence(rdev,
1698                                                  hawaii_golden_spm_registers,
1699                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700                 break;
1701         default:
1702                 break;
1703         }
1704         mutex_unlock(&rdev->grbm_idx_mutex);
1705 }
1706
1707 /**
1708  * cik_get_xclk - get the xclk
1709  *
1710  * @rdev: radeon_device pointer
1711  *
1712  * Returns the reference clock used by the gfx engine
1713  * (CIK).
1714  */
1715 u32 cik_get_xclk(struct radeon_device *rdev)
1716 {
1717         u32 reference_clock = rdev->clock.spll.reference_freq;
1718
1719         if (rdev->flags & RADEON_IS_IGP) {
1720                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1721                         return reference_clock / 2;
1722         } else {
1723                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1724                         return reference_clock / 4;
1725         }
1726         return reference_clock;
1727 }
1728
1729 /**
1730  * cik_mm_rdoorbell - read a doorbell dword
1731  *
1732  * @rdev: radeon_device pointer
1733  * @index: doorbell index
1734  *
1735  * Returns the value in the doorbell aperture at the
1736  * requested doorbell index (CIK).
1737  */
1738 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1739 {
1740         if (index < rdev->doorbell.num_doorbells) {
1741                 return readl(rdev->doorbell.ptr + index);
1742         } else {
1743                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1744                 return 0;
1745         }
1746 }
1747
1748 /**
1749  * cik_mm_wdoorbell - write a doorbell dword
1750  *
1751  * @rdev: radeon_device pointer
1752  * @index: doorbell index
1753  * @v: value to write
1754  *
1755  * Writes @v to the doorbell aperture at the
1756  * requested doorbell index (CIK).
1757  */
1758 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1759 {
1760         if (index < rdev->doorbell.num_doorbells) {
1761                 writel(v, rdev->doorbell.ptr + index);
1762         } else {
1763                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1764         }
1765 }
1766
1767 #define BONAIRE_IO_MC_REGS_SIZE 36
1768
1769 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1770 {
1771         {0x00000070, 0x04400000},
1772         {0x00000071, 0x80c01803},
1773         {0x00000072, 0x00004004},
1774         {0x00000073, 0x00000100},
1775         {0x00000074, 0x00ff0000},
1776         {0x00000075, 0x34000000},
1777         {0x00000076, 0x08000014},
1778         {0x00000077, 0x00cc08ec},
1779         {0x00000078, 0x00000400},
1780         {0x00000079, 0x00000000},
1781         {0x0000007a, 0x04090000},
1782         {0x0000007c, 0x00000000},
1783         {0x0000007e, 0x4408a8e8},
1784         {0x0000007f, 0x00000304},
1785         {0x00000080, 0x00000000},
1786         {0x00000082, 0x00000001},
1787         {0x00000083, 0x00000002},
1788         {0x00000084, 0xf3e4f400},
1789         {0x00000085, 0x052024e3},
1790         {0x00000087, 0x00000000},
1791         {0x00000088, 0x01000000},
1792         {0x0000008a, 0x1c0a0000},
1793         {0x0000008b, 0xff010000},
1794         {0x0000008d, 0xffffefff},
1795         {0x0000008e, 0xfff3efff},
1796         {0x0000008f, 0xfff3efbf},
1797         {0x00000092, 0xf7ffffff},
1798         {0x00000093, 0xffffff7f},
1799         {0x00000095, 0x00101101},
1800         {0x00000096, 0x00000fff},
1801         {0x00000097, 0x00116fff},
1802         {0x00000098, 0x60010000},
1803         {0x00000099, 0x10010000},
1804         {0x0000009a, 0x00006000},
1805         {0x0000009b, 0x00001000},
1806         {0x0000009f, 0x00b48000}
1807 };
1808
1809 #define HAWAII_IO_MC_REGS_SIZE 22
1810
1811 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1812 {
1813         {0x0000007d, 0x40000000},
1814         {0x0000007e, 0x40180304},
1815         {0x0000007f, 0x0000ff00},
1816         {0x00000081, 0x00000000},
1817         {0x00000083, 0x00000800},
1818         {0x00000086, 0x00000000},
1819         {0x00000087, 0x00000100},
1820         {0x00000088, 0x00020100},
1821         {0x00000089, 0x00000000},
1822         {0x0000008b, 0x00040000},
1823         {0x0000008c, 0x00000100},
1824         {0x0000008e, 0xff010000},
1825         {0x00000090, 0xffffefff},
1826         {0x00000091, 0xfff3efff},
1827         {0x00000092, 0xfff3efbf},
1828         {0x00000093, 0xf7ffffff},
1829         {0x00000094, 0xffffff7f},
1830         {0x00000095, 0x00000fff},
1831         {0x00000096, 0x00116fff},
1832         {0x00000097, 0x60010000},
1833         {0x00000098, 0x10010000},
1834         {0x0000009f, 0x00c79000}
1835 };
1836
1837
1838 /**
1839  * cik_srbm_select - select specific register instances
1840  *
1841  * @rdev: radeon_device pointer
1842  * @me: selected ME (micro engine)
1843  * @pipe: pipe
1844  * @queue: queue
1845  * @vmid: VMID
1846  *
1847  * Switches the currently active registers instances.  Some
1848  * registers are instanced per VMID, others are instanced per
1849  * me/pipe/queue combination.
1850  */
1851 static void cik_srbm_select(struct radeon_device *rdev,
1852                             u32 me, u32 pipe, u32 queue, u32 vmid)
1853 {
1854         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1855                              MEID(me & 0x3) |
1856                              VMID(vmid & 0xf) |
1857                              QUEUEID(queue & 0x7));
1858         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1859 }
1860
1861 /* ucode loading */
1862 /**
1863  * ci_mc_load_microcode - load MC ucode into the hw
1864  *
1865  * @rdev: radeon_device pointer
1866  *
1867  * Load the GDDR MC ucode into the hw (CIK).
1868  * Returns 0 on success, error on failure.
1869  */
1870 int ci_mc_load_microcode(struct radeon_device *rdev)
1871 {
1872         const __be32 *fw_data = NULL;
1873         const __le32 *new_fw_data = NULL;
1874         u32 running, blackout = 0, tmp;
1875         u32 *io_mc_regs = NULL;
1876         const __le32 *new_io_mc_regs = NULL;
1877         int i, regs_size, ucode_size;
1878
1879         if (!rdev->mc_fw)
1880                 return -EINVAL;
1881
1882         if (rdev->new_fw) {
1883                 const struct mc_firmware_header_v1_0 *hdr =
1884                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1885
1886                 radeon_ucode_print_mc_hdr(&hdr->header);
1887
1888                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1889                 new_io_mc_regs = (const __le32 *)
1890                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1891                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1892                 new_fw_data = (const __le32 *)
1893                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1894         } else {
1895                 ucode_size = rdev->mc_fw->size / 4;
1896
1897                 switch (rdev->family) {
1898                 case CHIP_BONAIRE:
1899                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1900                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1901                         break;
1902                 case CHIP_HAWAII:
1903                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1904                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1905                         break;
1906                 default:
1907                         return -EINVAL;
1908                 }
1909                 fw_data = (const __be32 *)rdev->mc_fw->data;
1910         }
1911
1912         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1913
1914         if (running == 0) {
1915                 if (running) {
1916                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1917                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1918                 }
1919
1920                 /* reset the engine and set to writable */
1921                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1922                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1923
1924                 /* load mc io regs */
1925                 for (i = 0; i < regs_size; i++) {
1926                         if (rdev->new_fw) {
1927                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1928                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1929                         } else {
1930                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1931                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1932                         }
1933                 }
1934
1935                 tmp = RREG32(MC_SEQ_MISC0);
1936                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1937                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1938                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1939                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1940                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1941                 }
1942
1943                 /* load the MC ucode */
1944                 for (i = 0; i < ucode_size; i++) {
1945                         if (rdev->new_fw)
1946                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1947                         else
1948                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1949                 }
1950
1951                 /* put the engine back into the active state */
1952                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1953                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1954                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1955
1956                 /* wait for training to complete */
1957                 for (i = 0; i < rdev->usec_timeout; i++) {
1958                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1959                                 break;
1960                         udelay(1);
1961                 }
1962                 for (i = 0; i < rdev->usec_timeout; i++) {
1963                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1964                                 break;
1965                         udelay(1);
1966                 }
1967
1968                 if (running)
1969                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1970         }
1971
1972         return 0;
1973 }
1974
1975 /**
1976  * cik_init_microcode - load ucode images from disk
1977  *
1978  * @rdev: radeon_device pointer
1979  *
1980  * Use the firmware interface to load the ucode images into
1981  * the driver (not loaded into hw).
1982  * Returns 0 on success, error on failure.
1983  */
1984 static int cik_init_microcode(struct radeon_device *rdev)
1985 {
1986         const char *chip_name;
1987         const char *new_chip_name;
1988         size_t pfp_req_size, me_req_size, ce_req_size,
1989                 mec_req_size, rlc_req_size, mc_req_size = 0,
1990                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1991         char fw_name[30];
1992         int new_fw = 0;
1993         int err;
1994         int num_fw;
1995         bool new_smc = false;
1996
1997         DRM_DEBUG("\n");
1998
1999         switch (rdev->family) {
2000         case CHIP_BONAIRE:
2001                 chip_name = "BONAIRE";
2002                 if ((rdev->pdev->revision == 0x80) ||
2003                     (rdev->pdev->revision == 0x81) ||
2004                     (rdev->pdev->device == 0x665f))
2005                         new_smc = true;
2006                 new_chip_name = "bonaire";
2007                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2008                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2009                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2010                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2011                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2012                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2013                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2014                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2015                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2016                 num_fw = 8;
2017                 break;
2018         case CHIP_HAWAII:
2019                 chip_name = "HAWAII";
2020                 if (rdev->pdev->revision == 0x80)
2021                         new_smc = true;
2022                 new_chip_name = "hawaii";
2023                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2024                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2025                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2026                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2027                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2028                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2029                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2030                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2031                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2032                 num_fw = 8;
2033                 break;
2034         case CHIP_KAVERI:
2035                 chip_name = "KAVERI";
2036                 new_chip_name = "kaveri";
2037                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2038                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2039                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2040                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2041                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2042                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2043                 num_fw = 7;
2044                 break;
2045         case CHIP_KABINI:
2046                 chip_name = "KABINI";
2047                 new_chip_name = "kabini";
2048                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2049                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2050                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2051                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2052                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2053                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2054                 num_fw = 6;
2055                 break;
2056         case CHIP_MULLINS:
2057                 chip_name = "MULLINS";
2058                 new_chip_name = "mullins";
2059                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2060                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2061                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2062                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2063                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2064                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2065                 num_fw = 6;
2066                 break;
2067         default: BUG();
2068         }
2069
2070         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2071
2072         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2073         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2074         if (err) {
2075                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2076                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2077                 if (err)
2078                         goto out;
2079                 if (rdev->pfp_fw->size != pfp_req_size) {
2080                         printk(KERN_ERR
2081                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2082                                rdev->pfp_fw->size, fw_name);
2083                         err = -EINVAL;
2084                         goto out;
2085                 }
2086         } else {
2087                 err = radeon_ucode_validate(rdev->pfp_fw);
2088                 if (err) {
2089                         printk(KERN_ERR
2090                                "cik_fw: validation failed for firmware \"%s\"\n",
2091                                fw_name);
2092                         goto out;
2093                 } else {
2094                         new_fw++;
2095                 }
2096         }
2097
2098         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2099         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2100         if (err) {
2101                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2102                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2103                 if (err)
2104                         goto out;
2105                 if (rdev->me_fw->size != me_req_size) {
2106                         printk(KERN_ERR
2107                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2108                                rdev->me_fw->size, fw_name);
2109                         err = -EINVAL;
2110                 }
2111         } else {
2112                 err = radeon_ucode_validate(rdev->me_fw);
2113                 if (err) {
2114                         printk(KERN_ERR
2115                                "cik_fw: validation failed for firmware \"%s\"\n",
2116                                fw_name);
2117                         goto out;
2118                 } else {
2119                         new_fw++;
2120                 }
2121         }
2122
2123         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2124         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2125         if (err) {
2126                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2127                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2128                 if (err)
2129                         goto out;
2130                 if (rdev->ce_fw->size != ce_req_size) {
2131                         printk(KERN_ERR
2132                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133                                rdev->ce_fw->size, fw_name);
2134                         err = -EINVAL;
2135                 }
2136         } else {
2137                 err = radeon_ucode_validate(rdev->ce_fw);
2138                 if (err) {
2139                         printk(KERN_ERR
2140                                "cik_fw: validation failed for firmware \"%s\"\n",
2141                                fw_name);
2142                         goto out;
2143                 } else {
2144                         new_fw++;
2145                 }
2146         }
2147
2148         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2149         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2150         if (err) {
2151                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2152                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2153                 if (err)
2154                         goto out;
2155                 if (rdev->mec_fw->size != mec_req_size) {
2156                         printk(KERN_ERR
2157                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2158                                rdev->mec_fw->size, fw_name);
2159                         err = -EINVAL;
2160                 }
2161         } else {
2162                 err = radeon_ucode_validate(rdev->mec_fw);
2163                 if (err) {
2164                         printk(KERN_ERR
2165                                "cik_fw: validation failed for firmware \"%s\"\n",
2166                                fw_name);
2167                         goto out;
2168                 } else {
2169                         new_fw++;
2170                 }
2171         }
2172
2173         if (rdev->family == CHIP_KAVERI) {
2174                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2175                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2176                 if (err) {
2177                         goto out;
2178                 } else {
2179                         err = radeon_ucode_validate(rdev->mec2_fw);
2180                         if (err) {
2181                                 goto out;
2182                         } else {
2183                                 new_fw++;
2184                         }
2185                 }
2186         }
2187
2188         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2189         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2190         if (err) {
2191                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2192                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2193                 if (err)
2194                         goto out;
2195                 if (rdev->rlc_fw->size != rlc_req_size) {
2196                         printk(KERN_ERR
2197                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2198                                rdev->rlc_fw->size, fw_name);
2199                         err = -EINVAL;
2200                 }
2201         } else {
2202                 err = radeon_ucode_validate(rdev->rlc_fw);
2203                 if (err) {
2204                         printk(KERN_ERR
2205                                "cik_fw: validation failed for firmware \"%s\"\n",
2206                                fw_name);
2207                         goto out;
2208                 } else {
2209                         new_fw++;
2210                 }
2211         }
2212
2213         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2214         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2215         if (err) {
2216                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2217                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2218                 if (err)
2219                         goto out;
2220                 if (rdev->sdma_fw->size != sdma_req_size) {
2221                         printk(KERN_ERR
2222                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2223                                rdev->sdma_fw->size, fw_name);
2224                         err = -EINVAL;
2225                 }
2226         } else {
2227                 err = radeon_ucode_validate(rdev->sdma_fw);
2228                 if (err) {
2229                         printk(KERN_ERR
2230                                "cik_fw: validation failed for firmware \"%s\"\n",
2231                                fw_name);
2232                         goto out;
2233                 } else {
2234                         new_fw++;
2235                 }
2236         }
2237
2238         /* No SMC, MC ucode on APUs */
2239         if (!(rdev->flags & RADEON_IS_IGP)) {
2240                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2241                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2242                 if (err) {
2243                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2244                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2245                         if (err) {
2246                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2247                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2248                                 if (err)
2249                                         goto out;
2250                         }
2251                         if ((rdev->mc_fw->size != mc_req_size) &&
2252                             (rdev->mc_fw->size != mc2_req_size)){
2253                                 printk(KERN_ERR
2254                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2255                                        rdev->mc_fw->size, fw_name);
2256                                 err = -EINVAL;
2257                         }
2258                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2259                 } else {
2260                         err = radeon_ucode_validate(rdev->mc_fw);
2261                         if (err) {
2262                                 printk(KERN_ERR
2263                                        "cik_fw: validation failed for firmware \"%s\"\n",
2264                                        fw_name);
2265                                 goto out;
2266                         } else {
2267                                 new_fw++;
2268                         }
2269                 }
2270
2271                 if (new_smc)
2272                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2273                 else
2274                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2275                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2276                 if (err) {
2277                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2278                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2279                         if (err) {
2280                                 printk(KERN_ERR
2281                                        "smc: error loading firmware \"%s\"\n",
2282                                        fw_name);
2283                                 release_firmware(rdev->smc_fw);
2284                                 rdev->smc_fw = NULL;
2285                                 err = 0;
2286                         } else if (rdev->smc_fw->size != smc_req_size) {
2287                                 printk(KERN_ERR
2288                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2289                                        rdev->smc_fw->size, fw_name);
2290                                 err = -EINVAL;
2291                         }
2292                 } else {
2293                         err = radeon_ucode_validate(rdev->smc_fw);
2294                         if (err) {
2295                                 printk(KERN_ERR
2296                                        "cik_fw: validation failed for firmware \"%s\"\n",
2297                                        fw_name);
2298                                 goto out;
2299                         } else {
2300                                 new_fw++;
2301                         }
2302                 }
2303         }
2304
2305         if (new_fw == 0) {
2306                 rdev->new_fw = false;
2307         } else if (new_fw < num_fw) {
2308                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2309                 err = -EINVAL;
2310         } else {
2311                 rdev->new_fw = true;
2312         }
2313
2314 out:
2315         if (err) {
2316                 if (err != -EINVAL)
2317                         printk(KERN_ERR
2318                                "cik_cp: Failed to load firmware \"%s\"\n",
2319                                fw_name);
2320                 release_firmware(rdev->pfp_fw);
2321                 rdev->pfp_fw = NULL;
2322                 release_firmware(rdev->me_fw);
2323                 rdev->me_fw = NULL;
2324                 release_firmware(rdev->ce_fw);
2325                 rdev->ce_fw = NULL;
2326                 release_firmware(rdev->mec_fw);
2327                 rdev->mec_fw = NULL;
2328                 release_firmware(rdev->mec2_fw);
2329                 rdev->mec2_fw = NULL;
2330                 release_firmware(rdev->rlc_fw);
2331                 rdev->rlc_fw = NULL;
2332                 release_firmware(rdev->sdma_fw);
2333                 rdev->sdma_fw = NULL;
2334                 release_firmware(rdev->mc_fw);
2335                 rdev->mc_fw = NULL;
2336                 release_firmware(rdev->smc_fw);
2337                 rdev->smc_fw = NULL;
2338         }
2339         return err;
2340 }
2341
2342 /*
2343  * Core functions
2344  */
2345 /**
2346  * cik_tiling_mode_table_init - init the hw tiling table
2347  *
2348  * @rdev: radeon_device pointer
2349  *
2350  * Starting with SI, the tiling setup is done globally in a
2351  * set of 32 tiling modes.  Rather than selecting each set of
2352  * parameters per surface as on older asics, we just select
2353  * which index in the tiling table we want to use, and the
2354  * surface uses those parameters (CIK).
2355  */
2356 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2357 {
2358         u32 *tile = rdev->config.cik.tile_mode_array;
2359         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2360         const u32 num_tile_mode_states =
2361                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2362         const u32 num_secondary_tile_mode_states =
2363                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2364         u32 reg_offset, split_equal_to_row_size;
2365         u32 num_pipe_configs;
2366         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2367                 rdev->config.cik.max_shader_engines;
2368
2369         switch (rdev->config.cik.mem_row_size_in_kb) {
2370         case 1:
2371                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2372                 break;
2373         case 2:
2374         default:
2375                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2376                 break;
2377         case 4:
2378                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2379                 break;
2380         }
2381
2382         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2383         if (num_pipe_configs > 8)
2384                 num_pipe_configs = 16;
2385
2386         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2387                 tile[reg_offset] = 0;
2388         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2389                 macrotile[reg_offset] = 0;
2390
2391         switch(num_pipe_configs) {
2392         case 16:
2393                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2397                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2401                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2405                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2407                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2409                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                            TILE_SPLIT(split_equal_to_row_size));
2413                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2418                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2420                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2421                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2422                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                            TILE_SPLIT(split_equal_to_row_size));
2424                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2425                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2426                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2427                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2429                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2435                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2436                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2438                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2439                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2441                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2444                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2449                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2451                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2453                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2455                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2457                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2459                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2464                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2466                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2468                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2469                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2471
2472                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2474                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475                            NUM_BANKS(ADDR_SURF_16_BANK));
2476                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2478                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479                            NUM_BANKS(ADDR_SURF_16_BANK));
2480                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                            NUM_BANKS(ADDR_SURF_16_BANK));
2484                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                            NUM_BANKS(ADDR_SURF_16_BANK));
2488                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                            NUM_BANKS(ADDR_SURF_8_BANK));
2492                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495                            NUM_BANKS(ADDR_SURF_4_BANK));
2496                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                            NUM_BANKS(ADDR_SURF_2_BANK));
2500                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2502                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2503                            NUM_BANKS(ADDR_SURF_16_BANK));
2504                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2506                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2507                            NUM_BANKS(ADDR_SURF_16_BANK));
2508                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                             NUM_BANKS(ADDR_SURF_16_BANK));
2512                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515                             NUM_BANKS(ADDR_SURF_8_BANK));
2516                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                             NUM_BANKS(ADDR_SURF_4_BANK));
2520                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2523                             NUM_BANKS(ADDR_SURF_2_BANK));
2524                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2526                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2527                             NUM_BANKS(ADDR_SURF_2_BANK));
2528
2529                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2530                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2531                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2532                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2533                 break;
2534
2535         case 8:
2536                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2540                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2544                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2548                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2550                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2552                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2554                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                            TILE_SPLIT(split_equal_to_row_size));
2556                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2557                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2559                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2561                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2563                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2564                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2565                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                            TILE_SPLIT(split_equal_to_row_size));
2567                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2568                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2569                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2572                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2577                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2578                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2579                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2580                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2581                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2584                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2587                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2588                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2592                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2593                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2594                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2596                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2597                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2602                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2607                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2608                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2609                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2611                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614
2615                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2618                                 NUM_BANKS(ADDR_SURF_16_BANK));
2619                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2626                                 NUM_BANKS(ADDR_SURF_16_BANK));
2627                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2630                                 NUM_BANKS(ADDR_SURF_16_BANK));
2631                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634                                 NUM_BANKS(ADDR_SURF_8_BANK));
2635                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2638                                 NUM_BANKS(ADDR_SURF_4_BANK));
2639                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642                                 NUM_BANKS(ADDR_SURF_2_BANK));
2643                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2658                                 NUM_BANKS(ADDR_SURF_16_BANK));
2659                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662                                 NUM_BANKS(ADDR_SURF_8_BANK));
2663                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2666                                 NUM_BANKS(ADDR_SURF_4_BANK));
2667                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2670                                 NUM_BANKS(ADDR_SURF_2_BANK));
2671
2672                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2673                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2674                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2675                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2676                 break;
2677
2678         case 4:
2679                 if (num_rbs == 4) {
2680                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2684                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2688                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2692                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2694                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2696                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2698                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                            TILE_SPLIT(split_equal_to_row_size));
2700                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2701                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2705                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2707                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2708                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2709                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710                            TILE_SPLIT(split_equal_to_row_size));
2711                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2712                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2713                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2714                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2716                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2721                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2723                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2724                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2725                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2728                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2731                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2737                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2740                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2744                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2746                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2752                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2753                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2754                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2755                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2756                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2758
2759                 } else if (num_rbs < 4) {
2760                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2764                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2768                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2772                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2774                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2775                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2776                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2778                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779                            TILE_SPLIT(split_equal_to_row_size));
2780                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2781                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2783                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2785                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2787                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2788                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2789                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790                            TILE_SPLIT(split_equal_to_row_size));
2791                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2792                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2793                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2794                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2796                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2801                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2803                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2804                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2805                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2806                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2807                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2811                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2817                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2820                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2821                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2822                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2823                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2824                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2826                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2827                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2828                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2832                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2833                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2835                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2836                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838                 }
2839
2840                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843                                 NUM_BANKS(ADDR_SURF_16_BANK));
2844                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2863                                 NUM_BANKS(ADDR_SURF_8_BANK));
2864                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2867                                 NUM_BANKS(ADDR_SURF_4_BANK));
2868                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871                                 NUM_BANKS(ADDR_SURF_16_BANK));
2872                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2883                                 NUM_BANKS(ADDR_SURF_16_BANK));
2884                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2887                                 NUM_BANKS(ADDR_SURF_16_BANK));
2888                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2891                                 NUM_BANKS(ADDR_SURF_8_BANK));
2892                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2893                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2894                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2895                                 NUM_BANKS(ADDR_SURF_4_BANK));
2896
2897                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2898                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2899                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2900                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2901                 break;
2902
2903         case 2:
2904                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2908                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                            PIPE_CONFIG(ADDR_SURF_P2) |
2911                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2912                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914                            PIPE_CONFIG(ADDR_SURF_P2) |
2915                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2916                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2918                            PIPE_CONFIG(ADDR_SURF_P2) |
2919                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2920                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2921                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2922                            PIPE_CONFIG(ADDR_SURF_P2) |
2923                            TILE_SPLIT(split_equal_to_row_size));
2924                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2925                            PIPE_CONFIG(ADDR_SURF_P2) |
2926                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2929                            PIPE_CONFIG(ADDR_SURF_P2) |
2930                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2931                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2932                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2933                            PIPE_CONFIG(ADDR_SURF_P2) |
2934                            TILE_SPLIT(split_equal_to_row_size));
2935                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2936                            PIPE_CONFIG(ADDR_SURF_P2);
2937                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2939                            PIPE_CONFIG(ADDR_SURF_P2));
2940                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2941                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                             PIPE_CONFIG(ADDR_SURF_P2) |
2943                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2945                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946                             PIPE_CONFIG(ADDR_SURF_P2) |
2947                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2949                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950                             PIPE_CONFIG(ADDR_SURF_P2) |
2951                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2955                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                             PIPE_CONFIG(ADDR_SURF_P2) |
2958                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2960                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961                             PIPE_CONFIG(ADDR_SURF_P2) |
2962                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2964                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965                             PIPE_CONFIG(ADDR_SURF_P2) |
2966                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2968                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2969                             PIPE_CONFIG(ADDR_SURF_P2));
2970                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972                             PIPE_CONFIG(ADDR_SURF_P2) |
2973                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2975                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976                             PIPE_CONFIG(ADDR_SURF_P2) |
2977                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2978                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2979                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980                             PIPE_CONFIG(ADDR_SURF_P2) |
2981                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2982
2983                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986                                 NUM_BANKS(ADDR_SURF_16_BANK));
2987                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3005                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006                                 NUM_BANKS(ADDR_SURF_16_BANK));
3007                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010                                 NUM_BANKS(ADDR_SURF_8_BANK));
3011                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3029                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3030                                 NUM_BANKS(ADDR_SURF_16_BANK));
3031                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3033                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034                                 NUM_BANKS(ADDR_SURF_16_BANK));
3035                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3038                                 NUM_BANKS(ADDR_SURF_8_BANK));
3039
3040                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3041                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3042                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3043                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3044                 break;
3045
3046         default:
3047                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3048         }
3049 }
3050
3051 /**
3052  * cik_select_se_sh - select which SE, SH to address
3053  *
3054  * @rdev: radeon_device pointer
3055  * @se_num: shader engine to address
3056  * @sh_num: sh block to address
3057  *
3058  * Select which SE, SH combinations to address. Certain
3059  * registers are instanced per SE or SH.  0xffffffff means
3060  * broadcast to all SEs or SHs (CIK).
3061  */
3062 static void cik_select_se_sh(struct radeon_device *rdev,
3063                              u32 se_num, u32 sh_num)
3064 {
3065         u32 data = INSTANCE_BROADCAST_WRITES;
3066
3067         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3068                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3069         else if (se_num == 0xffffffff)
3070                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3071         else if (sh_num == 0xffffffff)
3072                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3073         else
3074                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3075         WREG32(GRBM_GFX_INDEX, data);
3076 }
3077
3078 /**
3079  * cik_create_bitmask - create a bitmask
3080  *
3081  * @bit_width: length of the mask
3082  *
3083  * create a variable length bit mask (CIK).
3084  * Returns the bitmask.
3085  */
3086 static u32 cik_create_bitmask(u32 bit_width)
3087 {
3088         u32 i, mask = 0;
3089
3090         for (i = 0; i < bit_width; i++) {
3091                 mask <<= 1;
3092                 mask |= 1;
3093         }
3094         return mask;
3095 }
3096
3097 /**
3098  * cik_get_rb_disabled - computes the mask of disabled RBs
3099  *
3100  * @rdev: radeon_device pointer
3101  * @max_rb_num: max RBs (render backends) for the asic
3102  * @se_num: number of SEs (shader engines) for the asic
3103  * @sh_per_se: number of SH blocks per SE for the asic
3104  *
3105  * Calculates the bitmask of disabled RBs (CIK).
3106  * Returns the disabled RB bitmask.
3107  */
3108 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3109                               u32 max_rb_num_per_se,
3110                               u32 sh_per_se)
3111 {
3112         u32 data, mask;
3113
3114         data = RREG32(CC_RB_BACKEND_DISABLE);
3115         if (data & 1)
3116                 data &= BACKEND_DISABLE_MASK;
3117         else
3118                 data = 0;
3119         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3120
3121         data >>= BACKEND_DISABLE_SHIFT;
3122
3123         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3124
3125         return data & mask;
3126 }
3127
3128 /**
3129  * cik_setup_rb - setup the RBs on the asic
3130  *
3131  * @rdev: radeon_device pointer
3132  * @se_num: number of SEs (shader engines) for the asic
3133  * @sh_per_se: number of SH blocks per SE for the asic
3134  * @max_rb_num: max RBs (render backends) for the asic
3135  *
3136  * Configures per-SE/SH RB registers (CIK).
3137  */
3138 static void cik_setup_rb(struct radeon_device *rdev,
3139                          u32 se_num, u32 sh_per_se,
3140                          u32 max_rb_num_per_se)
3141 {
3142         int i, j;
3143         u32 data, mask;
3144         u32 disabled_rbs = 0;
3145         u32 enabled_rbs = 0;
3146
3147         mutex_lock(&rdev->grbm_idx_mutex);
3148         for (i = 0; i < se_num; i++) {
3149                 for (j = 0; j < sh_per_se; j++) {
3150                         cik_select_se_sh(rdev, i, j);
3151                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3152                         if (rdev->family == CHIP_HAWAII)
3153                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3154                         else
3155                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3156                 }
3157         }
3158         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3159         mutex_unlock(&rdev->grbm_idx_mutex);
3160
3161         mask = 1;
3162         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3163                 if (!(disabled_rbs & mask))
3164                         enabled_rbs |= mask;
3165                 mask <<= 1;
3166         }
3167
3168         rdev->config.cik.backend_enable_mask = enabled_rbs;
3169
3170         mutex_lock(&rdev->grbm_idx_mutex);
3171         for (i = 0; i < se_num; i++) {
3172                 cik_select_se_sh(rdev, i, 0xffffffff);
3173                 data = 0;
3174                 for (j = 0; j < sh_per_se; j++) {
3175                         switch (enabled_rbs & 3) {
3176                         case 0:
3177                                 if (j == 0)
3178                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3179                                 else
3180                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3181                                 break;
3182                         case 1:
3183                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3184                                 break;
3185                         case 2:
3186                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3187                                 break;
3188                         case 3:
3189                         default:
3190                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3191                                 break;
3192                         }
3193                         enabled_rbs >>= 2;
3194                 }
3195                 WREG32(PA_SC_RASTER_CONFIG, data);
3196         }
3197         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3198         mutex_unlock(&rdev->grbm_idx_mutex);
3199 }
3200
3201 /**
3202  * cik_gpu_init - setup the 3D engine
3203  *
3204  * @rdev: radeon_device pointer
3205  *
3206  * Configures the 3D engine and tiling configuration
3207  * registers so that the 3D engine is usable.
3208  */
3209 static void cik_gpu_init(struct radeon_device *rdev)
3210 {
3211         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3212         u32 mc_shared_chmap, mc_arb_ramcfg;
3213         u32 hdp_host_path_cntl;
3214         u32 tmp;
3215         int i, j;
3216
3217         switch (rdev->family) {
3218         case CHIP_BONAIRE:
3219                 rdev->config.cik.max_shader_engines = 2;
3220                 rdev->config.cik.max_tile_pipes = 4;
3221                 rdev->config.cik.max_cu_per_sh = 7;
3222                 rdev->config.cik.max_sh_per_se = 1;
3223                 rdev->config.cik.max_backends_per_se = 2;
3224                 rdev->config.cik.max_texture_channel_caches = 4;
3225                 rdev->config.cik.max_gprs = 256;
3226                 rdev->config.cik.max_gs_threads = 32;
3227                 rdev->config.cik.max_hw_contexts = 8;
3228
3229                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3230                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3231                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3232                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3233                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3234                 break;
3235         case CHIP_HAWAII:
3236                 rdev->config.cik.max_shader_engines = 4;
3237                 rdev->config.cik.max_tile_pipes = 16;
3238                 rdev->config.cik.max_cu_per_sh = 11;
3239                 rdev->config.cik.max_sh_per_se = 1;
3240                 rdev->config.cik.max_backends_per_se = 4;
3241                 rdev->config.cik.max_texture_channel_caches = 16;
3242                 rdev->config.cik.max_gprs = 256;
3243                 rdev->config.cik.max_gs_threads = 32;
3244                 rdev->config.cik.max_hw_contexts = 8;
3245
3246                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3247                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3248                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3249                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3250                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3251                 break;
3252         case CHIP_KAVERI:
3253                 rdev->config.cik.max_shader_engines = 1;
3254                 rdev->config.cik.max_tile_pipes = 4;
3255                 if ((rdev->pdev->device == 0x1304) ||
3256                     (rdev->pdev->device == 0x1305) ||
3257                     (rdev->pdev->device == 0x130C) ||
3258                     (rdev->pdev->device == 0x130F) ||
3259                     (rdev->pdev->device == 0x1310) ||
3260                     (rdev->pdev->device == 0x1311) ||
3261                     (rdev->pdev->device == 0x131C)) {
3262                         rdev->config.cik.max_cu_per_sh = 8;
3263                         rdev->config.cik.max_backends_per_se = 2;
3264                 } else if ((rdev->pdev->device == 0x1309) ||
3265                            (rdev->pdev->device == 0x130A) ||
3266                            (rdev->pdev->device == 0x130D) ||
3267                            (rdev->pdev->device == 0x1313) ||
3268                            (rdev->pdev->device == 0x131D)) {
3269                         rdev->config.cik.max_cu_per_sh = 6;
3270                         rdev->config.cik.max_backends_per_se = 2;
3271                 } else if ((rdev->pdev->device == 0x1306) ||
3272                            (rdev->pdev->device == 0x1307) ||
3273                            (rdev->pdev->device == 0x130B) ||
3274                            (rdev->pdev->device == 0x130E) ||
3275                            (rdev->pdev->device == 0x1315) ||
3276                            (rdev->pdev->device == 0x1318) ||
3277                            (rdev->pdev->device == 0x131B)) {
3278                         rdev->config.cik.max_cu_per_sh = 4;
3279                         rdev->config.cik.max_backends_per_se = 1;
3280                 } else {
3281                         rdev->config.cik.max_cu_per_sh = 3;
3282                         rdev->config.cik.max_backends_per_se = 1;
3283                 }
3284                 rdev->config.cik.max_sh_per_se = 1;
3285                 rdev->config.cik.max_texture_channel_caches = 4;
3286                 rdev->config.cik.max_gprs = 256;
3287                 rdev->config.cik.max_gs_threads = 16;
3288                 rdev->config.cik.max_hw_contexts = 8;
3289
3290                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3291                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3292                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3293                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3294                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3295                 break;
3296         case CHIP_KABINI:
3297         case CHIP_MULLINS:
3298         default:
3299                 rdev->config.cik.max_shader_engines = 1;
3300                 rdev->config.cik.max_tile_pipes = 2;
3301                 rdev->config.cik.max_cu_per_sh = 2;
3302                 rdev->config.cik.max_sh_per_se = 1;
3303                 rdev->config.cik.max_backends_per_se = 1;
3304                 rdev->config.cik.max_texture_channel_caches = 2;
3305                 rdev->config.cik.max_gprs = 256;
3306                 rdev->config.cik.max_gs_threads = 16;
3307                 rdev->config.cik.max_hw_contexts = 8;
3308
3309                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3310                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3311                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3312                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3313                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3314                 break;
3315         }
3316
3317         /* Initialize HDP */
3318         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3319                 WREG32((0x2c14 + j), 0x00000000);
3320                 WREG32((0x2c18 + j), 0x00000000);
3321                 WREG32((0x2c1c + j), 0x00000000);
3322                 WREG32((0x2c20 + j), 0x00000000);
3323                 WREG32((0x2c24 + j), 0x00000000);
3324         }
3325
3326         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3327         WREG32(SRBM_INT_CNTL, 0x1);
3328         WREG32(SRBM_INT_ACK, 0x1);
3329
3330         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3331
3332         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3333         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3334
3335         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3336         rdev->config.cik.mem_max_burst_length_bytes = 256;
3337         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3338         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3339         if (rdev->config.cik.mem_row_size_in_kb > 4)
3340                 rdev->config.cik.mem_row_size_in_kb = 4;
3341         /* XXX use MC settings? */
3342         rdev->config.cik.shader_engine_tile_size = 32;
3343         rdev->config.cik.num_gpus = 1;
3344         rdev->config.cik.multi_gpu_tile_size = 64;
3345
3346         /* fix up row size */
3347         gb_addr_config &= ~ROW_SIZE_MASK;
3348         switch (rdev->config.cik.mem_row_size_in_kb) {
3349         case 1:
3350         default:
3351                 gb_addr_config |= ROW_SIZE(0);
3352                 break;
3353         case 2:
3354                 gb_addr_config |= ROW_SIZE(1);
3355                 break;
3356         case 4:
3357                 gb_addr_config |= ROW_SIZE(2);
3358                 break;
3359         }
3360
3361         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3362          * not have bank info, so create a custom tiling dword.
3363          * bits 3:0   num_pipes
3364          * bits 7:4   num_banks
3365          * bits 11:8  group_size
3366          * bits 15:12 row_size
3367          */
3368         rdev->config.cik.tile_config = 0;
3369         switch (rdev->config.cik.num_tile_pipes) {
3370         case 1:
3371                 rdev->config.cik.tile_config |= (0 << 0);
3372                 break;
3373         case 2:
3374                 rdev->config.cik.tile_config |= (1 << 0);
3375                 break;
3376         case 4:
3377                 rdev->config.cik.tile_config |= (2 << 0);
3378                 break;
3379         case 8:
3380         default:
3381                 /* XXX what about 12? */
3382                 rdev->config.cik.tile_config |= (3 << 0);
3383                 break;
3384         }
3385         rdev->config.cik.tile_config |=
3386                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3387         rdev->config.cik.tile_config |=
3388                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3389         rdev->config.cik.tile_config |=
3390                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3391
3392         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3393         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3394         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3395         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3396         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3397         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3398         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3399         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3400
3401         cik_tiling_mode_table_init(rdev);
3402
3403         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3404                      rdev->config.cik.max_sh_per_se,
3405                      rdev->config.cik.max_backends_per_se);
3406
3407         rdev->config.cik.active_cus = 0;
3408         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3409                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3410                         rdev->config.cik.active_cus +=
3411                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3412                 }
3413         }
3414
3415         /* set HW defaults for 3D engine */
3416         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3417
3418         mutex_lock(&rdev->grbm_idx_mutex);
3419         /*
3420          * making sure that the following register writes will be broadcasted
3421          * to all the shaders
3422          */
3423         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3424         WREG32(SX_DEBUG_1, 0x20);
3425
3426         WREG32(TA_CNTL_AUX, 0x00010000);
3427
3428         tmp = RREG32(SPI_CONFIG_CNTL);
3429         tmp |= 0x03000000;
3430         WREG32(SPI_CONFIG_CNTL, tmp);
3431
3432         WREG32(SQ_CONFIG, 1);
3433
3434         WREG32(DB_DEBUG, 0);
3435
3436         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3437         tmp |= 0x00000400;
3438         WREG32(DB_DEBUG2, tmp);
3439
3440         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3441         tmp |= 0x00020200;
3442         WREG32(DB_DEBUG3, tmp);
3443
3444         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3445         tmp |= 0x00018208;
3446         WREG32(CB_HW_CONTROL, tmp);
3447
3448         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3449
3450         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3451                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3452                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3453                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3454
3455         WREG32(VGT_NUM_INSTANCES, 1);
3456
3457         WREG32(CP_PERFMON_CNTL, 0);
3458
3459         WREG32(SQ_CONFIG, 0);
3460
3461         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3462                                           FORCE_EOV_MAX_REZ_CNT(255)));
3463
3464         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3465                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3466
3467         WREG32(VGT_GS_VERTEX_REUSE, 16);
3468         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3469
3470         tmp = RREG32(HDP_MISC_CNTL);
3471         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3472         WREG32(HDP_MISC_CNTL, tmp);
3473
3474         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3475         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3476
3477         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3478         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3479         mutex_unlock(&rdev->grbm_idx_mutex);
3480
3481         udelay(50);
3482 }
3483
3484 /*
3485  * GPU scratch registers helpers function.
3486  */
3487 /**
3488  * cik_scratch_init - setup driver info for CP scratch regs
3489  *
3490  * @rdev: radeon_device pointer
3491  *
3492  * Set up the number and offset of the CP scratch registers.
3493  * NOTE: use of CP scratch registers is a legacy inferface and
3494  * is not used by default on newer asics (r6xx+).  On newer asics,
3495  * memory buffers are used for fences rather than scratch regs.
3496  */
3497 static void cik_scratch_init(struct radeon_device *rdev)
3498 {
3499         int i;
3500
3501         rdev->scratch.num_reg = 7;
3502         rdev->scratch.reg_base = SCRATCH_REG0;
3503         for (i = 0; i < rdev->scratch.num_reg; i++) {
3504                 rdev->scratch.free[i] = true;
3505                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3506         }
3507 }
3508
3509 /**
3510  * cik_ring_test - basic gfx ring test
3511  *
3512  * @rdev: radeon_device pointer
3513  * @ring: radeon_ring structure holding ring information
3514  *
3515  * Allocate a scratch register and write to it using the gfx ring (CIK).
3516  * Provides a basic gfx ring test to verify that the ring is working.
3517  * Used by cik_cp_gfx_resume();
3518  * Returns 0 on success, error on failure.
3519  */
3520 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3521 {
3522         uint32_t scratch;
3523         uint32_t tmp = 0;
3524         unsigned i;
3525         int r;
3526
3527         r = radeon_scratch_get(rdev, &scratch);
3528         if (r) {
3529                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3530                 return r;
3531         }
3532         WREG32(scratch, 0xCAFEDEAD);
3533         r = radeon_ring_lock(rdev, ring, 3);
3534         if (r) {
3535                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3536                 radeon_scratch_free(rdev, scratch);
3537                 return r;
3538         }
3539         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3540         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3541         radeon_ring_write(ring, 0xDEADBEEF);
3542         radeon_ring_unlock_commit(rdev, ring, false);
3543
3544         for (i = 0; i < rdev->usec_timeout; i++) {
3545                 tmp = RREG32(scratch);
3546                 if (tmp == 0xDEADBEEF)
3547                         break;
3548                 DRM_UDELAY(1);
3549         }
3550         if (i < rdev->usec_timeout) {
3551                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3552         } else {
3553                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3554                           ring->idx, scratch, tmp);
3555                 r = -EINVAL;
3556         }
3557         radeon_scratch_free(rdev, scratch);
3558         return r;
3559 }
3560
3561 /**
3562  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3563  *
3564  * @rdev: radeon_device pointer
3565  * @ridx: radeon ring index
3566  *
3567  * Emits an hdp flush on the cp.
3568  */
3569 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3570                                        int ridx)
3571 {
3572         struct radeon_ring *ring = &rdev->ring[ridx];
3573         u32 ref_and_mask;
3574
3575         switch (ring->idx) {
3576         case CAYMAN_RING_TYPE_CP1_INDEX:
3577         case CAYMAN_RING_TYPE_CP2_INDEX:
3578         default:
3579                 switch (ring->me) {
3580                 case 0:
3581                         ref_and_mask = CP2 << ring->pipe;
3582                         break;
3583                 case 1:
3584                         ref_and_mask = CP6 << ring->pipe;
3585                         break;
3586                 default:
3587                         return;
3588                 }
3589                 break;
3590         case RADEON_RING_TYPE_GFX_INDEX:
3591                 ref_and_mask = CP0;
3592                 break;
3593         }
3594
3595         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3596         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3597                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3598                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3599         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3600         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3601         radeon_ring_write(ring, ref_and_mask);
3602         radeon_ring_write(ring, ref_and_mask);
3603         radeon_ring_write(ring, 0x20); /* poll interval */
3604 }
3605
3606 /**
3607  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3608  *
3609  * @rdev: radeon_device pointer
3610  * @fence: radeon fence object
3611  *
3612  * Emits a fence sequnce number on the gfx ring and flushes
3613  * GPU caches.
3614  */
3615 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3616                              struct radeon_fence *fence)
3617 {
3618         struct radeon_ring *ring = &rdev->ring[fence->ring];
3619         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3620
3621         /* Workaround for cache flush problems. First send a dummy EOP
3622          * event down the pipe with seq one below.
3623          */
3624         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3625         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3626                                  EOP_TC_ACTION_EN |
3627                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3628                                  EVENT_INDEX(5)));
3629         radeon_ring_write(ring, addr & 0xfffffffc);
3630         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3631                                 DATA_SEL(1) | INT_SEL(0));
3632         radeon_ring_write(ring, fence->seq - 1);
3633         radeon_ring_write(ring, 0);
3634
3635         /* Then send the real EOP event down the pipe. */
3636         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3637         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3638                                  EOP_TC_ACTION_EN |
3639                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3640                                  EVENT_INDEX(5)));
3641         radeon_ring_write(ring, addr & 0xfffffffc);
3642         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3643         radeon_ring_write(ring, fence->seq);
3644         radeon_ring_write(ring, 0);
3645 }
3646
3647 /**
3648  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3649  *
3650  * @rdev: radeon_device pointer
3651  * @fence: radeon fence object
3652  *
3653  * Emits a fence sequnce number on the compute ring and flushes
3654  * GPU caches.
3655  */
3656 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3657                                  struct radeon_fence *fence)
3658 {
3659         struct radeon_ring *ring = &rdev->ring[fence->ring];
3660         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3661
3662         /* RELEASE_MEM - flush caches, send int */
3663         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3664         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3665                                  EOP_TC_ACTION_EN |
3666                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3667                                  EVENT_INDEX(5)));
3668         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3669         radeon_ring_write(ring, addr & 0xfffffffc);
3670         radeon_ring_write(ring, upper_32_bits(addr));
3671         radeon_ring_write(ring, fence->seq);
3672         radeon_ring_write(ring, 0);
3673 }
3674
3675 /**
3676  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3677  *
3678  * @rdev: radeon_device pointer
3679  * @ring: radeon ring buffer object
3680  * @semaphore: radeon semaphore object
3681  * @emit_wait: Is this a sempahore wait?
3682  *
3683  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3684  * from running ahead of semaphore waits.
3685  */
3686 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3687                              struct radeon_ring *ring,
3688                              struct radeon_semaphore *semaphore,
3689                              bool emit_wait)
3690 {
3691         uint64_t addr = semaphore->gpu_addr;
3692         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3693
3694         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3695         radeon_ring_write(ring, lower_32_bits(addr));
3696         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3697
3698         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3699                 /* Prevent the PFP from running ahead of the semaphore wait */
3700                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3701                 radeon_ring_write(ring, 0x0);
3702         }
3703
3704         return true;
3705 }
3706
3707 /**
3708  * cik_copy_cpdma - copy pages using the CP DMA engine
3709  *
3710  * @rdev: radeon_device pointer
3711  * @src_offset: src GPU address
3712  * @dst_offset: dst GPU address
3713  * @num_gpu_pages: number of GPU pages to xfer
3714  * @resv: reservation object to sync to
3715  *
3716  * Copy GPU paging using the CP DMA engine (CIK+).
3717  * Used by the radeon ttm implementation to move pages if
3718  * registered as the asic copy callback.
3719  */
3720 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3721                                     uint64_t src_offset, uint64_t dst_offset,
3722                                     unsigned num_gpu_pages,
3723                                     struct reservation_object *resv)
3724 {
3725         struct radeon_fence *fence;
3726         struct radeon_sync sync;
3727         int ring_index = rdev->asic->copy.blit_ring_index;
3728         struct radeon_ring *ring = &rdev->ring[ring_index];
3729         u32 size_in_bytes, cur_size_in_bytes, control;
3730         int i, num_loops;
3731         int r = 0;
3732
3733         radeon_sync_create(&sync);
3734
3735         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3736         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3737         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3738         if (r) {
3739                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3740                 radeon_sync_free(rdev, &sync, NULL);
3741                 return ERR_PTR(r);
3742         }
3743
3744         radeon_sync_resv(rdev, &sync, resv, false);
3745         radeon_sync_rings(rdev, &sync, ring->idx);
3746
3747         for (i = 0; i < num_loops; i++) {
3748                 cur_size_in_bytes = size_in_bytes;
3749                 if (cur_size_in_bytes > 0x1fffff)
3750                         cur_size_in_bytes = 0x1fffff;
3751                 size_in_bytes -= cur_size_in_bytes;
3752                 control = 0;
3753                 if (size_in_bytes == 0)
3754                         control |= PACKET3_DMA_DATA_CP_SYNC;
3755                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3756                 radeon_ring_write(ring, control);
3757                 radeon_ring_write(ring, lower_32_bits(src_offset));
3758                 radeon_ring_write(ring, upper_32_bits(src_offset));
3759                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3760                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3761                 radeon_ring_write(ring, cur_size_in_bytes);
3762                 src_offset += cur_size_in_bytes;
3763                 dst_offset += cur_size_in_bytes;
3764         }
3765
3766         r = radeon_fence_emit(rdev, &fence, ring->idx);
3767         if (r) {
3768                 radeon_ring_unlock_undo(rdev, ring);
3769                 radeon_sync_free(rdev, &sync, NULL);
3770                 return ERR_PTR(r);
3771         }
3772
3773         radeon_ring_unlock_commit(rdev, ring, false);
3774         radeon_sync_free(rdev, &sync, fence);
3775
3776         return fence;
3777 }
3778
3779 /*
3780  * IB stuff
3781  */
3782 /**
3783  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3784  *
3785  * @rdev: radeon_device pointer
3786  * @ib: radeon indirect buffer object
3787  *
3788  * Emits a DE (drawing engine) or CE (constant engine) IB
3789  * on the gfx ring.  IBs are usually generated by userspace
3790  * acceleration drivers and submitted to the kernel for
3791  * scheduling on the ring.  This function schedules the IB
3792  * on the gfx ring for execution by the GPU.
3793  */
3794 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3795 {
3796         struct radeon_ring *ring = &rdev->ring[ib->ring];
3797         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3798         u32 header, control = INDIRECT_BUFFER_VALID;
3799
3800         if (ib->is_const_ib) {
3801                 /* set switch buffer packet before const IB */
3802                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3803                 radeon_ring_write(ring, 0);
3804
3805                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3806         } else {
3807                 u32 next_rptr;
3808                 if (ring->rptr_save_reg) {
3809                         next_rptr = ring->wptr + 3 + 4;
3810                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3811                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3812                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3813                         radeon_ring_write(ring, next_rptr);
3814                 } else if (rdev->wb.enabled) {
3815                         next_rptr = ring->wptr + 5 + 4;
3816                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3817                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3818                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3819                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3820                         radeon_ring_write(ring, next_rptr);
3821                 }
3822
3823                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3824         }
3825
3826         control |= ib->length_dw | (vm_id << 24);
3827
3828         radeon_ring_write(ring, header);
3829         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3830         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3831         radeon_ring_write(ring, control);
3832 }
3833
3834 /**
3835  * cik_ib_test - basic gfx ring IB test
3836  *
3837  * @rdev: radeon_device pointer
3838  * @ring: radeon_ring structure holding ring information
3839  *
3840  * Allocate an IB and execute it on the gfx ring (CIK).
3841  * Provides a basic gfx ring test to verify that IBs are working.
3842  * Returns 0 on success, error on failure.
3843  */
3844 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3845 {
3846         struct radeon_ib ib;
3847         uint32_t scratch;
3848         uint32_t tmp = 0;
3849         unsigned i;
3850         int r;
3851
3852         r = radeon_scratch_get(rdev, &scratch);
3853         if (r) {
3854                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3855                 return r;
3856         }
3857         WREG32(scratch, 0xCAFEDEAD);
3858         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3859         if (r) {
3860                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3861                 radeon_scratch_free(rdev, scratch);
3862                 return r;
3863         }
3864         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3865         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3866         ib.ptr[2] = 0xDEADBEEF;
3867         ib.length_dw = 3;
3868         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3869         if (r) {
3870                 radeon_scratch_free(rdev, scratch);
3871                 radeon_ib_free(rdev, &ib);
3872                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3873                 return r;
3874         }
3875         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3876                 RADEON_USEC_IB_TEST_TIMEOUT));
3877         if (r < 0) {
3878                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3879                 radeon_scratch_free(rdev, scratch);
3880                 radeon_ib_free(rdev, &ib);
3881                 return r;
3882         } else if (r == 0) {
3883                 DRM_ERROR("radeon: fence wait timed out.\n");
3884                 radeon_scratch_free(rdev, scratch);
3885                 radeon_ib_free(rdev, &ib);
3886                 return -ETIMEDOUT;
3887         }
3888         r = 0;
3889         for (i = 0; i < rdev->usec_timeout; i++) {
3890                 tmp = RREG32(scratch);
3891                 if (tmp == 0xDEADBEEF)
3892                         break;
3893                 DRM_UDELAY(1);
3894         }
3895         if (i < rdev->usec_timeout) {
3896                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3897         } else {
3898                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3899                           scratch, tmp);
3900                 r = -EINVAL;
3901         }
3902         radeon_scratch_free(rdev, scratch);
3903         radeon_ib_free(rdev, &ib);
3904         return r;
3905 }
3906
3907 /*
3908  * CP.
3909  * On CIK, gfx and compute now have independant command processors.
3910  *
3911  * GFX
3912  * Gfx consists of a single ring and can process both gfx jobs and
3913  * compute jobs.  The gfx CP consists of three microengines (ME):
3914  * PFP - Pre-Fetch Parser
3915  * ME - Micro Engine
3916  * CE - Constant Engine
3917  * The PFP and ME make up what is considered the Drawing Engine (DE).
3918  * The CE is an asynchronous engine used for updating buffer desciptors
3919  * used by the DE so that they can be loaded into cache in parallel
3920  * while the DE is processing state update packets.
3921  *
3922  * Compute
3923  * The compute CP consists of two microengines (ME):
3924  * MEC1 - Compute MicroEngine 1
3925  * MEC2 - Compute MicroEngine 2
3926  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3927  * The queues are exposed to userspace and are programmed directly
3928  * by the compute runtime.
3929  */
3930 /**
3931  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3932  *
3933  * @rdev: radeon_device pointer
3934  * @enable: enable or disable the MEs
3935  *
3936  * Halts or unhalts the gfx MEs.
3937  */
3938 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3939 {
3940         if (enable)
3941                 WREG32(CP_ME_CNTL, 0);
3942         else {
3943                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3944                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3945                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3946                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3947         }
3948         udelay(50);
3949 }
3950
3951 /**
3952  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3953  *
3954  * @rdev: radeon_device pointer
3955  *
3956  * Loads the gfx PFP, ME, and CE ucode.
3957  * Returns 0 for success, -EINVAL if the ucode is not available.
3958  */
3959 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3960 {
3961         int i;
3962
3963         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3964                 return -EINVAL;
3965
3966         cik_cp_gfx_enable(rdev, false);
3967
3968         if (rdev->new_fw) {
3969                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3970                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3971                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3972                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3973                 const struct gfx_firmware_header_v1_0 *me_hdr =
3974                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3975                 const __le32 *fw_data;
3976                 u32 fw_size;
3977
3978                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3979                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3980                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3981
3982                 /* PFP */
3983                 fw_data = (const __le32 *)
3984                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3985                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3986                 WREG32(CP_PFP_UCODE_ADDR, 0);
3987                 for (i = 0; i < fw_size; i++)
3988                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3989                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3990
3991                 /* CE */
3992                 fw_data = (const __le32 *)
3993                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3994                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3995                 WREG32(CP_CE_UCODE_ADDR, 0);
3996                 for (i = 0; i < fw_size; i++)
3997                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3998                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3999
4000                 /* ME */
4001                 fw_data = (const __be32 *)
4002                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4003                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4004                 WREG32(CP_ME_RAM_WADDR, 0);
4005                 for (i = 0; i < fw_size; i++)
4006                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4007                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4008                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4009         } else {
4010                 const __be32 *fw_data;
4011
4012                 /* PFP */
4013                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4014                 WREG32(CP_PFP_UCODE_ADDR, 0);
4015                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4016                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4017                 WREG32(CP_PFP_UCODE_ADDR, 0);
4018
4019                 /* CE */
4020                 fw_data = (const __be32 *)rdev->ce_fw->data;
4021                 WREG32(CP_CE_UCODE_ADDR, 0);
4022                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4023                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4024                 WREG32(CP_CE_UCODE_ADDR, 0);
4025
4026                 /* ME */
4027                 fw_data = (const __be32 *)rdev->me_fw->data;
4028                 WREG32(CP_ME_RAM_WADDR, 0);
4029                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4030                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4031                 WREG32(CP_ME_RAM_WADDR, 0);
4032         }
4033
4034         return 0;
4035 }
4036
4037 /**
4038  * cik_cp_gfx_start - start the gfx ring
4039  *
4040  * @rdev: radeon_device pointer
4041  *
4042  * Enables the ring and loads the clear state context and other
4043  * packets required to init the ring.
4044  * Returns 0 for success, error for failure.
4045  */
4046 static int cik_cp_gfx_start(struct radeon_device *rdev)
4047 {
4048         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4049         int r, i;
4050
4051         /* init the CP */
4052         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4053         WREG32(CP_ENDIAN_SWAP, 0);
4054         WREG32(CP_DEVICE_ID, 1);
4055
4056         cik_cp_gfx_enable(rdev, true);
4057
4058         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4059         if (r) {
4060                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4061                 return r;
4062         }
4063
4064         /* init the CE partitions.  CE only used for gfx on CIK */
4065         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4066         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4067         radeon_ring_write(ring, 0x8000);
4068         radeon_ring_write(ring, 0x8000);
4069
4070         /* setup clear context state */
4071         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4072         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4073
4074         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4075         radeon_ring_write(ring, 0x80000000);
4076         radeon_ring_write(ring, 0x80000000);
4077
4078         for (i = 0; i < cik_default_size; i++)
4079                 radeon_ring_write(ring, cik_default_state[i]);
4080
4081         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4082         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4083
4084         /* set clear context state */
4085         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4086         radeon_ring_write(ring, 0);
4087
4088         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4089         radeon_ring_write(ring, 0x00000316);
4090         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4091         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4092
4093         radeon_ring_unlock_commit(rdev, ring, false);
4094
4095         return 0;
4096 }
4097
4098 /**
4099  * cik_cp_gfx_fini - stop the gfx ring
4100  *
4101  * @rdev: radeon_device pointer
4102  *
4103  * Stop the gfx ring and tear down the driver ring
4104  * info.
4105  */
4106 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4107 {
4108         cik_cp_gfx_enable(rdev, false);
4109         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4110 }
4111
4112 /**
4113  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4114  *
4115  * @rdev: radeon_device pointer
4116  *
4117  * Program the location and size of the gfx ring buffer
4118  * and test it to make sure it's working.
4119  * Returns 0 for success, error for failure.
4120  */
4121 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4122 {
4123         struct radeon_ring *ring;
4124         u32 tmp;
4125         u32 rb_bufsz;
4126         u64 rb_addr;
4127         int r;
4128
4129         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4130         if (rdev->family != CHIP_HAWAII)
4131                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4132
4133         /* Set the write pointer delay */
4134         WREG32(CP_RB_WPTR_DELAY, 0);
4135
4136         /* set the RB to use vmid 0 */
4137         WREG32(CP_RB_VMID, 0);
4138
4139         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4140
4141         /* ring 0 - compute and gfx */
4142         /* Set ring buffer size */
4143         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4144         rb_bufsz = order_base_2(ring->ring_size / 8);
4145         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4146 #ifdef __BIG_ENDIAN
4147         tmp |= BUF_SWAP_32BIT;
4148 #endif
4149         WREG32(CP_RB0_CNTL, tmp);
4150
4151         /* Initialize the ring buffer's read and write pointers */
4152         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4153         ring->wptr = 0;
4154         WREG32(CP_RB0_WPTR, ring->wptr);
4155
4156         /* set the wb address wether it's enabled or not */
4157         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4158         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4159
4160         /* scratch register shadowing is no longer supported */
4161         WREG32(SCRATCH_UMSK, 0);
4162
4163         if (!rdev->wb.enabled)
4164                 tmp |= RB_NO_UPDATE;
4165
4166         mdelay(1);
4167         WREG32(CP_RB0_CNTL, tmp);
4168
4169         rb_addr = ring->gpu_addr >> 8;
4170         WREG32(CP_RB0_BASE, rb_addr);
4171         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4172
4173         /* start the ring */
4174         cik_cp_gfx_start(rdev);
4175         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4176         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4177         if (r) {
4178                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4179                 return r;
4180         }
4181
4182         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4183                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4184
4185         return 0;
4186 }
4187
4188 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4189                      struct radeon_ring *ring)
4190 {
4191         u32 rptr;
4192
4193         if (rdev->wb.enabled)
4194                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4195         else
4196                 rptr = RREG32(CP_RB0_RPTR);
4197
4198         return rptr;
4199 }
4200
4201 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4202                      struct radeon_ring *ring)
4203 {
4204         u32 wptr;
4205
4206         wptr = RREG32(CP_RB0_WPTR);
4207
4208         return wptr;
4209 }
4210
4211 void cik_gfx_set_wptr(struct radeon_device *rdev,
4212                       struct radeon_ring *ring)
4213 {
4214         WREG32(CP_RB0_WPTR, ring->wptr);
4215         (void)RREG32(CP_RB0_WPTR);
4216 }
4217
4218 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4219                          struct radeon_ring *ring)
4220 {
4221         u32 rptr;
4222
4223         if (rdev->wb.enabled) {
4224                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4225         } else {
4226                 mutex_lock(&rdev->srbm_mutex);
4227                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4228                 rptr = RREG32(CP_HQD_PQ_RPTR);
4229                 cik_srbm_select(rdev, 0, 0, 0, 0);
4230                 mutex_unlock(&rdev->srbm_mutex);
4231         }
4232
4233         return rptr;
4234 }
4235
4236 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4237                          struct radeon_ring *ring)
4238 {
4239         u32 wptr;
4240
4241         if (rdev->wb.enabled) {
4242                 /* XXX check if swapping is necessary on BE */
4243                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4244         } else {
4245                 mutex_lock(&rdev->srbm_mutex);
4246                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4247                 wptr = RREG32(CP_HQD_PQ_WPTR);
4248                 cik_srbm_select(rdev, 0, 0, 0, 0);
4249                 mutex_unlock(&rdev->srbm_mutex);
4250         }
4251
4252         return wptr;
4253 }
4254
4255 void cik_compute_set_wptr(struct radeon_device *rdev,
4256                           struct radeon_ring *ring)
4257 {
4258         /* XXX check if swapping is necessary on BE */
4259         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4260         WDOORBELL32(ring->doorbell_index, ring->wptr);
4261 }
4262
4263 static void cik_compute_stop(struct radeon_device *rdev,
4264                              struct radeon_ring *ring)
4265 {
4266         u32 j, tmp;
4267
4268         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4269         /* Disable wptr polling. */
4270         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4271         tmp &= ~WPTR_POLL_EN;
4272         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4273         /* Disable HQD. */
4274         if (RREG32(CP_HQD_ACTIVE) & 1) {
4275                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4276                 for (j = 0; j < rdev->usec_timeout; j++) {
4277                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4278                                 break;
4279                         udelay(1);
4280                 }
4281                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4282                 WREG32(CP_HQD_PQ_RPTR, 0);
4283                 WREG32(CP_HQD_PQ_WPTR, 0);
4284         }
4285         cik_srbm_select(rdev, 0, 0, 0, 0);
4286 }
4287
4288 /**
4289  * cik_cp_compute_enable - enable/disable the compute CP MEs
4290  *
4291  * @rdev: radeon_device pointer
4292  * @enable: enable or disable the MEs
4293  *
4294  * Halts or unhalts the compute MEs.
4295  */
4296 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4297 {
4298         if (enable)
4299                 WREG32(CP_MEC_CNTL, 0);
4300         else {
4301                 /*
4302                  * To make hibernation reliable we need to clear compute ring
4303                  * configuration before halting the compute ring.
4304                  */
4305                 mutex_lock(&rdev->srbm_mutex);
4306                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4307                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4308                 mutex_unlock(&rdev->srbm_mutex);
4309
4310                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4311                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4312                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4313         }
4314         udelay(50);
4315 }
4316
4317 /**
4318  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4319  *
4320  * @rdev: radeon_device pointer
4321  *
4322  * Loads the compute MEC1&2 ucode.
4323  * Returns 0 for success, -EINVAL if the ucode is not available.
4324  */
4325 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4326 {
4327         int i;
4328
4329         if (!rdev->mec_fw)
4330                 return -EINVAL;
4331
4332         cik_cp_compute_enable(rdev, false);
4333
4334         if (rdev->new_fw) {
4335                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4336                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4337                 const __le32 *fw_data;
4338                 u32 fw_size;
4339
4340                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4341
4342                 /* MEC1 */
4343                 fw_data = (const __le32 *)
4344                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4345                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4346                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4347                 for (i = 0; i < fw_size; i++)
4348                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4349                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4350
4351                 /* MEC2 */
4352                 if (rdev->family == CHIP_KAVERI) {
4353                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4354                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4355
4356                         fw_data = (const __le32 *)
4357                                 (rdev->mec2_fw->data +
4358                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4359                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4360                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4361                         for (i = 0; i < fw_size; i++)
4362                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4363                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4364                 }
4365         } else {
4366                 const __be32 *fw_data;
4367
4368                 /* MEC1 */
4369                 fw_data = (const __be32 *)rdev->mec_fw->data;
4370                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4371                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4372                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4373                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4374
4375                 if (rdev->family == CHIP_KAVERI) {
4376                         /* MEC2 */
4377                         fw_data = (const __be32 *)rdev->mec_fw->data;
4378                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4379                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4380                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4381                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4382                 }
4383         }
4384
4385         return 0;
4386 }
4387
4388 /**
4389  * cik_cp_compute_start - start the compute queues
4390  *
4391  * @rdev: radeon_device pointer
4392  *
4393  * Enable the compute queues.
4394  * Returns 0 for success, error for failure.
4395  */
4396 static int cik_cp_compute_start(struct radeon_device *rdev)
4397 {
4398         cik_cp_compute_enable(rdev, true);
4399
4400         return 0;
4401 }
4402
4403 /**
4404  * cik_cp_compute_fini - stop the compute queues
4405  *
4406  * @rdev: radeon_device pointer
4407  *
4408  * Stop the compute queues and tear down the driver queue
4409  * info.
4410  */
4411 static void cik_cp_compute_fini(struct radeon_device *rdev)
4412 {
4413         int i, idx, r;
4414
4415         cik_cp_compute_enable(rdev, false);
4416
4417         for (i = 0; i < 2; i++) {
4418                 if (i == 0)
4419                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4420                 else
4421                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4422
4423                 if (rdev->ring[idx].mqd_obj) {
4424                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4425                         if (unlikely(r != 0))
4426                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4427
4428                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4429                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4430
4431                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4432                         rdev->ring[idx].mqd_obj = NULL;
4433                 }
4434         }
4435 }
4436
4437 static void cik_mec_fini(struct radeon_device *rdev)
4438 {
4439         int r;
4440
4441         if (rdev->mec.hpd_eop_obj) {
4442                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4443                 if (unlikely(r != 0))
4444                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4445                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4446                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4447
4448                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4449                 rdev->mec.hpd_eop_obj = NULL;
4450         }
4451 }
4452
4453 #define MEC_HPD_SIZE 2048
4454
4455 static int cik_mec_init(struct radeon_device *rdev)
4456 {
4457         int r;
4458         u32 *hpd;
4459
4460         /*
4461          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4462          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4463          * Nonetheless, we assign only 1 pipe because all other pipes will
4464          * be handled by KFD
4465          */
4466         rdev->mec.num_mec = 1;
4467         rdev->mec.num_pipe = 1;
4468         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4469
4470         if (rdev->mec.hpd_eop_obj == NULL) {
4471                 r = radeon_bo_create(rdev,
4472                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4473                                      PAGE_SIZE, true,
4474                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4475                                      &rdev->mec.hpd_eop_obj);
4476                 if (r) {
4477                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4478                         return r;
4479                 }
4480         }
4481
4482         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4483         if (unlikely(r != 0)) {
4484                 cik_mec_fini(rdev);
4485                 return r;
4486         }
4487         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4488                           &rdev->mec.hpd_eop_gpu_addr);
4489         if (r) {
4490                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4491                 cik_mec_fini(rdev);
4492                 return r;
4493         }
4494         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4495         if (r) {
4496                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4497                 cik_mec_fini(rdev);
4498                 return r;
4499         }
4500
4501         /* clear memory.  Not sure if this is required or not */
4502         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4503
4504         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4505         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4506
4507         return 0;
4508 }
4509
4510 struct hqd_registers
4511 {
4512         u32 cp_mqd_base_addr;
4513         u32 cp_mqd_base_addr_hi;
4514         u32 cp_hqd_active;
4515         u32 cp_hqd_vmid;
4516         u32 cp_hqd_persistent_state;
4517         u32 cp_hqd_pipe_priority;
4518         u32 cp_hqd_queue_priority;
4519         u32 cp_hqd_quantum;
4520         u32 cp_hqd_pq_base;
4521         u32 cp_hqd_pq_base_hi;
4522         u32 cp_hqd_pq_rptr;
4523         u32 cp_hqd_pq_rptr_report_addr;
4524         u32 cp_hqd_pq_rptr_report_addr_hi;
4525         u32 cp_hqd_pq_wptr_poll_addr;
4526         u32 cp_hqd_pq_wptr_poll_addr_hi;
4527         u32 cp_hqd_pq_doorbell_control;
4528         u32 cp_hqd_pq_wptr;
4529         u32 cp_hqd_pq_control;
4530         u32 cp_hqd_ib_base_addr;
4531         u32 cp_hqd_ib_base_addr_hi;
4532         u32 cp_hqd_ib_rptr;
4533         u32 cp_hqd_ib_control;
4534         u32 cp_hqd_iq_timer;
4535         u32 cp_hqd_iq_rptr;
4536         u32 cp_hqd_dequeue_request;
4537         u32 cp_hqd_dma_offload;
4538         u32 cp_hqd_sema_cmd;
4539         u32 cp_hqd_msg_type;
4540         u32 cp_hqd_atomic0_preop_lo;
4541         u32 cp_hqd_atomic0_preop_hi;
4542         u32 cp_hqd_atomic1_preop_lo;
4543         u32 cp_hqd_atomic1_preop_hi;
4544         u32 cp_hqd_hq_scheduler0;
4545         u32 cp_hqd_hq_scheduler1;
4546         u32 cp_mqd_control;
4547 };
4548
4549 struct bonaire_mqd
4550 {
4551         u32 header;
4552         u32 dispatch_initiator;
4553         u32 dimensions[3];
4554         u32 start_idx[3];
4555         u32 num_threads[3];
4556         u32 pipeline_stat_enable;
4557         u32 perf_counter_enable;
4558         u32 pgm[2];
4559         u32 tba[2];
4560         u32 tma[2];
4561         u32 pgm_rsrc[2];
4562         u32 vmid;
4563         u32 resource_limits;
4564         u32 static_thread_mgmt01[2];
4565         u32 tmp_ring_size;
4566         u32 static_thread_mgmt23[2];
4567         u32 restart[3];
4568         u32 thread_trace_enable;
4569         u32 reserved1;
4570         u32 user_data[16];
4571         u32 vgtcs_invoke_count[2];
4572         struct hqd_registers queue_state;
4573         u32 dequeue_cntr;
4574         u32 interrupt_queue[64];
4575 };
4576
4577 /**
4578  * cik_cp_compute_resume - setup the compute queue registers
4579  *
4580  * @rdev: radeon_device pointer
4581  *
4582  * Program the compute queues and test them to make sure they
4583  * are working.
4584  * Returns 0 for success, error for failure.
4585  */
4586 static int cik_cp_compute_resume(struct radeon_device *rdev)
4587 {
4588         int r, i, j, idx;
4589         u32 tmp;
4590         bool use_doorbell = true;
4591         u64 hqd_gpu_addr;
4592         u64 mqd_gpu_addr;
4593         u64 eop_gpu_addr;
4594         u64 wb_gpu_addr;
4595         u32 *buf;
4596         struct bonaire_mqd *mqd;
4597
4598         r = cik_cp_compute_start(rdev);
4599         if (r)
4600                 return r;
4601
4602         /* fix up chicken bits */
4603         tmp = RREG32(CP_CPF_DEBUG);
4604         tmp |= (1 << 23);
4605         WREG32(CP_CPF_DEBUG, tmp);
4606
4607         /* init the pipes */
4608         mutex_lock(&rdev->srbm_mutex);
4609
4610         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4611
4612         cik_srbm_select(rdev, 0, 0, 0, 0);
4613
4614         /* write the EOP addr */
4615         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4616         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4617
4618         /* set the VMID assigned */
4619         WREG32(CP_HPD_EOP_VMID, 0);
4620
4621         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4622         tmp = RREG32(CP_HPD_EOP_CONTROL);
4623         tmp &= ~EOP_SIZE_MASK;
4624         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4625         WREG32(CP_HPD_EOP_CONTROL, tmp);
4626
4627         mutex_unlock(&rdev->srbm_mutex);
4628
4629         /* init the queues.  Just two for now. */
4630         for (i = 0; i < 2; i++) {
4631                 if (i == 0)
4632                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4633                 else
4634                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4635
4636                 if (rdev->ring[idx].mqd_obj == NULL) {
4637                         r = radeon_bo_create(rdev,
4638                                              sizeof(struct bonaire_mqd),
4639                                              PAGE_SIZE, true,
4640                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4641                                              NULL, &rdev->ring[idx].mqd_obj);
4642                         if (r) {
4643                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4644                                 return r;
4645                         }
4646                 }
4647
4648                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4649                 if (unlikely(r != 0)) {
4650                         cik_cp_compute_fini(rdev);
4651                         return r;
4652                 }
4653                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4654                                   &mqd_gpu_addr);
4655                 if (r) {
4656                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4657                         cik_cp_compute_fini(rdev);
4658                         return r;
4659                 }
4660                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4661                 if (r) {
4662                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4663                         cik_cp_compute_fini(rdev);
4664                         return r;
4665                 }
4666
4667                 /* init the mqd struct */
4668                 memset(buf, 0, sizeof(struct bonaire_mqd));
4669
4670                 mqd = (struct bonaire_mqd *)buf;
4671                 mqd->header = 0xC0310800;
4672                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4673                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4674                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4675                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4676
4677                 mutex_lock(&rdev->srbm_mutex);
4678                 cik_srbm_select(rdev, rdev->ring[idx].me,
4679                                 rdev->ring[idx].pipe,
4680                                 rdev->ring[idx].queue, 0);
4681
4682                 /* disable wptr polling */
4683                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4684                 tmp &= ~WPTR_POLL_EN;
4685                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4686
4687                 /* enable doorbell? */
4688                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4689                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4690                 if (use_doorbell)
4691                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4692                 else
4693                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4694                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4695                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4696
4697                 /* disable the queue if it's active */
4698                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4699                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4700                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4701                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4702                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4703                         for (j = 0; j < rdev->usec_timeout; j++) {
4704                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4705                                         break;
4706                                 udelay(1);
4707                         }
4708                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4709                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4710                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4711                 }
4712
4713                 /* set the pointer to the MQD */
4714                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4715                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4716                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4717                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4718                 /* set MQD vmid to 0 */
4719                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4720                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4721                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4722
4723                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4724                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4725                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4726                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4727                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4728                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4729
4730                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4731                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4732                 mqd->queue_state.cp_hqd_pq_control &=
4733                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4734
4735                 mqd->queue_state.cp_hqd_pq_control |=
4736                         order_base_2(rdev->ring[idx].ring_size / 8);
4737                 mqd->queue_state.cp_hqd_pq_control |=
4738                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4739 #ifdef __BIG_ENDIAN
4740                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4741 #endif
4742                 mqd->queue_state.cp_hqd_pq_control &=
4743                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4744                 mqd->queue_state.cp_hqd_pq_control |=
4745                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4746                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4747
4748                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4749                 if (i == 0)
4750                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4751                 else
4752                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4753                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4754                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4755                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4756                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4757                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4758
4759                 /* set the wb address wether it's enabled or not */
4760                 if (i == 0)
4761                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4762                 else
4763                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4764                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4765                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4766                         upper_32_bits(wb_gpu_addr) & 0xffff;
4767                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4768                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4769                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4770                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4771
4772                 /* enable the doorbell if requested */
4773                 if (use_doorbell) {
4774                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4775                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4776                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4777                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4778                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4779                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4780                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4781                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4782
4783                 } else {
4784                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4785                 }
4786                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4787                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4788
4789                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4790                 rdev->ring[idx].wptr = 0;
4791                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4792                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4793                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4794
4795                 /* set the vmid for the queue */
4796                 mqd->queue_state.cp_hqd_vmid = 0;
4797                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4798
4799                 /* activate the queue */
4800                 mqd->queue_state.cp_hqd_active = 1;
4801                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4802
4803                 cik_srbm_select(rdev, 0, 0, 0, 0);
4804                 mutex_unlock(&rdev->srbm_mutex);
4805
4806                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4807                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4808
4809                 rdev->ring[idx].ready = true;
4810                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4811                 if (r)
4812                         rdev->ring[idx].ready = false;
4813         }
4814
4815         return 0;
4816 }
4817
4818 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4819 {
4820         cik_cp_gfx_enable(rdev, enable);
4821         cik_cp_compute_enable(rdev, enable);
4822 }
4823
4824 static int cik_cp_load_microcode(struct radeon_device *rdev)
4825 {
4826         int r;
4827
4828         r = cik_cp_gfx_load_microcode(rdev);
4829         if (r)
4830                 return r;
4831         r = cik_cp_compute_load_microcode(rdev);
4832         if (r)
4833                 return r;
4834
4835         return 0;
4836 }
4837
4838 static void cik_cp_fini(struct radeon_device *rdev)
4839 {
4840         cik_cp_gfx_fini(rdev);
4841         cik_cp_compute_fini(rdev);
4842 }
4843
4844 static int cik_cp_resume(struct radeon_device *rdev)
4845 {
4846         int r;
4847
4848         cik_enable_gui_idle_interrupt(rdev, false);
4849
4850         r = cik_cp_load_microcode(rdev);
4851         if (r)
4852                 return r;
4853
4854         r = cik_cp_gfx_resume(rdev);
4855         if (r)
4856                 return r;
4857         r = cik_cp_compute_resume(rdev);
4858         if (r)
4859                 return r;
4860
4861         cik_enable_gui_idle_interrupt(rdev, true);
4862
4863         return 0;
4864 }
4865
4866 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4867 {
4868         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4869                 RREG32(GRBM_STATUS));
4870         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4871                 RREG32(GRBM_STATUS2));
4872         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4873                 RREG32(GRBM_STATUS_SE0));
4874         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4875                 RREG32(GRBM_STATUS_SE1));
4876         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4877                 RREG32(GRBM_STATUS_SE2));
4878         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4879                 RREG32(GRBM_STATUS_SE3));
4880         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4881                 RREG32(SRBM_STATUS));
4882         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4883                 RREG32(SRBM_STATUS2));
4884         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4885                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4886         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4887                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4888         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4889         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4890                  RREG32(CP_STALLED_STAT1));
4891         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4892                  RREG32(CP_STALLED_STAT2));
4893         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4894                  RREG32(CP_STALLED_STAT3));
4895         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4896                  RREG32(CP_CPF_BUSY_STAT));
4897         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4898                  RREG32(CP_CPF_STALLED_STAT1));
4899         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4900         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4901         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4902                  RREG32(CP_CPC_STALLED_STAT1));
4903         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4904 }
4905
4906 /**
4907  * cik_gpu_check_soft_reset - check which blocks are busy
4908  *
4909  * @rdev: radeon_device pointer
4910  *
4911  * Check which blocks are busy and return the relevant reset
4912  * mask to be used by cik_gpu_soft_reset().
4913  * Returns a mask of the blocks to be reset.
4914  */
4915 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4916 {
4917         u32 reset_mask = 0;
4918         u32 tmp;
4919
4920         /* GRBM_STATUS */
4921         tmp = RREG32(GRBM_STATUS);
4922         if (tmp & (PA_BUSY | SC_BUSY |
4923                    BCI_BUSY | SX_BUSY |
4924                    TA_BUSY | VGT_BUSY |
4925                    DB_BUSY | CB_BUSY |
4926                    GDS_BUSY | SPI_BUSY |
4927                    IA_BUSY | IA_BUSY_NO_DMA))
4928                 reset_mask |= RADEON_RESET_GFX;
4929
4930         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4931                 reset_mask |= RADEON_RESET_CP;
4932
4933         /* GRBM_STATUS2 */
4934         tmp = RREG32(GRBM_STATUS2);
4935         if (tmp & RLC_BUSY)
4936                 reset_mask |= RADEON_RESET_RLC;
4937
4938         /* SDMA0_STATUS_REG */
4939         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4940         if (!(tmp & SDMA_IDLE))
4941                 reset_mask |= RADEON_RESET_DMA;
4942
4943         /* SDMA1_STATUS_REG */
4944         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4945         if (!(tmp & SDMA_IDLE))
4946                 reset_mask |= RADEON_RESET_DMA1;
4947
4948         /* SRBM_STATUS2 */
4949         tmp = RREG32(SRBM_STATUS2);
4950         if (tmp & SDMA_BUSY)
4951                 reset_mask |= RADEON_RESET_DMA;
4952
4953         if (tmp & SDMA1_BUSY)
4954                 reset_mask |= RADEON_RESET_DMA1;
4955
4956         /* SRBM_STATUS */
4957         tmp = RREG32(SRBM_STATUS);
4958
4959         if (tmp & IH_BUSY)
4960                 reset_mask |= RADEON_RESET_IH;
4961
4962         if (tmp & SEM_BUSY)
4963                 reset_mask |= RADEON_RESET_SEM;
4964
4965         if (tmp & GRBM_RQ_PENDING)
4966                 reset_mask |= RADEON_RESET_GRBM;
4967
4968         if (tmp & VMC_BUSY)
4969                 reset_mask |= RADEON_RESET_VMC;
4970
4971         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4972                    MCC_BUSY | MCD_BUSY))
4973                 reset_mask |= RADEON_RESET_MC;
4974
4975         if (evergreen_is_display_hung(rdev))
4976                 reset_mask |= RADEON_RESET_DISPLAY;
4977
4978         /* Skip MC reset as it's mostly likely not hung, just busy */
4979         if (reset_mask & RADEON_RESET_MC) {
4980                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4981                 reset_mask &= ~RADEON_RESET_MC;
4982         }
4983
4984         return reset_mask;
4985 }
4986
4987 /**
4988  * cik_gpu_soft_reset - soft reset GPU
4989  *
4990  * @rdev: radeon_device pointer
4991  * @reset_mask: mask of which blocks to reset
4992  *
4993  * Soft reset the blocks specified in @reset_mask.
4994  */
4995 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4996 {
4997         struct evergreen_mc_save save;
4998         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4999         u32 tmp;
5000
5001         if (reset_mask == 0)
5002                 return;
5003
5004         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5005
5006         cik_print_gpu_status_regs(rdev);
5007         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5008                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5009         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5010                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5011
5012         /* disable CG/PG */
5013         cik_fini_pg(rdev);
5014         cik_fini_cg(rdev);
5015
5016         /* stop the rlc */
5017         cik_rlc_stop(rdev);
5018
5019         /* Disable GFX parsing/prefetching */
5020         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5021
5022         /* Disable MEC parsing/prefetching */
5023         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5024
5025         if (reset_mask & RADEON_RESET_DMA) {
5026                 /* sdma0 */
5027                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5028                 tmp |= SDMA_HALT;
5029                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5030         }
5031         if (reset_mask & RADEON_RESET_DMA1) {
5032                 /* sdma1 */
5033                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5034                 tmp |= SDMA_HALT;
5035                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5036         }
5037
5038         evergreen_mc_stop(rdev, &save);
5039         if (evergreen_mc_wait_for_idle(rdev)) {
5040                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5041         }
5042
5043         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5044                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5045
5046         if (reset_mask & RADEON_RESET_CP) {
5047                 grbm_soft_reset |= SOFT_RESET_CP;
5048
5049                 srbm_soft_reset |= SOFT_RESET_GRBM;
5050         }
5051
5052         if (reset_mask & RADEON_RESET_DMA)
5053                 srbm_soft_reset |= SOFT_RESET_SDMA;
5054
5055         if (reset_mask & RADEON_RESET_DMA1)
5056                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5057
5058         if (reset_mask & RADEON_RESET_DISPLAY)
5059                 srbm_soft_reset |= SOFT_RESET_DC;
5060
5061         if (reset_mask & RADEON_RESET_RLC)
5062                 grbm_soft_reset |= SOFT_RESET_RLC;
5063
5064         if (reset_mask & RADEON_RESET_SEM)
5065                 srbm_soft_reset |= SOFT_RESET_SEM;
5066
5067         if (reset_mask & RADEON_RESET_IH)
5068                 srbm_soft_reset |= SOFT_RESET_IH;
5069
5070         if (reset_mask & RADEON_RESET_GRBM)
5071                 srbm_soft_reset |= SOFT_RESET_GRBM;
5072
5073         if (reset_mask & RADEON_RESET_VMC)
5074                 srbm_soft_reset |= SOFT_RESET_VMC;
5075
5076         if (!(rdev->flags & RADEON_IS_IGP)) {
5077                 if (reset_mask & RADEON_RESET_MC)
5078                         srbm_soft_reset |= SOFT_RESET_MC;
5079         }
5080
5081         if (grbm_soft_reset) {
5082                 tmp = RREG32(GRBM_SOFT_RESET);
5083                 tmp |= grbm_soft_reset;
5084                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5085                 WREG32(GRBM_SOFT_RESET, tmp);
5086                 tmp = RREG32(GRBM_SOFT_RESET);
5087
5088                 udelay(50);
5089
5090                 tmp &= ~grbm_soft_reset;
5091                 WREG32(GRBM_SOFT_RESET, tmp);
5092                 tmp = RREG32(GRBM_SOFT_RESET);
5093         }
5094
5095         if (srbm_soft_reset) {
5096                 tmp = RREG32(SRBM_SOFT_RESET);
5097                 tmp |= srbm_soft_reset;
5098                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5099                 WREG32(SRBM_SOFT_RESET, tmp);
5100                 tmp = RREG32(SRBM_SOFT_RESET);
5101
5102                 udelay(50);
5103
5104                 tmp &= ~srbm_soft_reset;
5105                 WREG32(SRBM_SOFT_RESET, tmp);
5106                 tmp = RREG32(SRBM_SOFT_RESET);
5107         }
5108
5109         /* Wait a little for things to settle down */
5110         udelay(50);
5111
5112         evergreen_mc_resume(rdev, &save);
5113         udelay(50);
5114
5115         cik_print_gpu_status_regs(rdev);
5116 }
5117
5118 struct kv_reset_save_regs {
5119         u32 gmcon_reng_execute;
5120         u32 gmcon_misc;
5121         u32 gmcon_misc3;
5122 };
5123
5124 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5125                                    struct kv_reset_save_regs *save)
5126 {
5127         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5128         save->gmcon_misc = RREG32(GMCON_MISC);
5129         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5130
5131         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5132         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5133                                                 STCTRL_STUTTER_EN));
5134 }
5135
5136 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5137                                       struct kv_reset_save_regs *save)
5138 {
5139         int i;
5140
5141         WREG32(GMCON_PGFSM_WRITE, 0);
5142         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5143
5144         for (i = 0; i < 5; i++)
5145                 WREG32(GMCON_PGFSM_WRITE, 0);
5146
5147         WREG32(GMCON_PGFSM_WRITE, 0);
5148         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5149
5150         for (i = 0; i < 5; i++)
5151                 WREG32(GMCON_PGFSM_WRITE, 0);
5152
5153         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5154         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5155
5156         for (i = 0; i < 5; i++)
5157                 WREG32(GMCON_PGFSM_WRITE, 0);
5158
5159         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5160         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5161
5162         for (i = 0; i < 5; i++)
5163                 WREG32(GMCON_PGFSM_WRITE, 0);
5164
5165         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5166         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5167
5168         for (i = 0; i < 5; i++)
5169                 WREG32(GMCON_PGFSM_WRITE, 0);
5170
5171         WREG32(GMCON_PGFSM_WRITE, 0);
5172         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5173
5174         for (i = 0; i < 5; i++)
5175                 WREG32(GMCON_PGFSM_WRITE, 0);
5176
5177         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5178         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5179
5180         for (i = 0; i < 5; i++)
5181                 WREG32(GMCON_PGFSM_WRITE, 0);
5182
5183         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5184         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5185
5186         for (i = 0; i < 5; i++)
5187                 WREG32(GMCON_PGFSM_WRITE, 0);
5188
5189         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5190         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5191
5192         for (i = 0; i < 5; i++)
5193                 WREG32(GMCON_PGFSM_WRITE, 0);
5194
5195         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5196         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5197
5198         for (i = 0; i < 5; i++)
5199                 WREG32(GMCON_PGFSM_WRITE, 0);
5200
5201         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5202         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5203
5204         WREG32(GMCON_MISC3, save->gmcon_misc3);
5205         WREG32(GMCON_MISC, save->gmcon_misc);
5206         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5207 }
5208
5209 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5210 {
5211         struct evergreen_mc_save save;
5212         struct kv_reset_save_regs kv_save = { 0 };
5213         u32 tmp, i;
5214
5215         dev_info(rdev->dev, "GPU pci config reset\n");
5216
5217         /* disable dpm? */
5218
5219         /* disable cg/pg */
5220         cik_fini_pg(rdev);
5221         cik_fini_cg(rdev);
5222
5223         /* Disable GFX parsing/prefetching */
5224         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5225
5226         /* Disable MEC parsing/prefetching */
5227         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5228
5229         /* sdma0 */
5230         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5231         tmp |= SDMA_HALT;
5232         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5233         /* sdma1 */
5234         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5235         tmp |= SDMA_HALT;
5236         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5237         /* XXX other engines? */
5238
5239         /* halt the rlc, disable cp internal ints */
5240         cik_rlc_stop(rdev);
5241
5242         udelay(50);
5243
5244         /* disable mem access */
5245         evergreen_mc_stop(rdev, &save);
5246         if (evergreen_mc_wait_for_idle(rdev)) {
5247                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5248         }
5249
5250         if (rdev->flags & RADEON_IS_IGP)
5251                 kv_save_regs_for_reset(rdev, &kv_save);
5252
5253         /* disable BM */
5254         pci_clear_master(rdev->pdev);
5255         /* reset */
5256         radeon_pci_config_reset(rdev);
5257
5258         udelay(100);
5259
5260         /* wait for asic to come out of reset */
5261         for (i = 0; i < rdev->usec_timeout; i++) {
5262                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5263                         break;
5264                 udelay(1);
5265         }
5266
5267         /* does asic init need to be run first??? */
5268         if (rdev->flags & RADEON_IS_IGP)
5269                 kv_restore_regs_for_reset(rdev, &kv_save);
5270 }
5271
5272 /**
5273  * cik_asic_reset - soft reset GPU
5274  *
5275  * @rdev: radeon_device pointer
5276  * @hard: force hard reset
5277  *
5278  * Look up which blocks are hung and attempt
5279  * to reset them.
5280  * Returns 0 for success.
5281  */
5282 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5283 {
5284         u32 reset_mask;
5285
5286         if (hard) {
5287                 cik_gpu_pci_config_reset(rdev);
5288                 return 0;
5289         }
5290
5291         reset_mask = cik_gpu_check_soft_reset(rdev);
5292
5293         if (reset_mask)
5294                 r600_set_bios_scratch_engine_hung(rdev, true);
5295
5296         /* try soft reset */
5297         cik_gpu_soft_reset(rdev, reset_mask);
5298
5299         reset_mask = cik_gpu_check_soft_reset(rdev);
5300
5301         /* try pci config reset */
5302         if (reset_mask && radeon_hard_reset)
5303                 cik_gpu_pci_config_reset(rdev);
5304
5305         reset_mask = cik_gpu_check_soft_reset(rdev);
5306
5307         if (!reset_mask)
5308                 r600_set_bios_scratch_engine_hung(rdev, false);
5309
5310         return 0;
5311 }
5312
5313 /**
5314  * cik_gfx_is_lockup - check if the 3D engine is locked up
5315  *
5316  * @rdev: radeon_device pointer
5317  * @ring: radeon_ring structure holding ring information
5318  *
5319  * Check if the 3D engine is locked up (CIK).
5320  * Returns true if the engine is locked, false if not.
5321  */
5322 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5323 {
5324         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5325
5326         if (!(reset_mask & (RADEON_RESET_GFX |
5327                             RADEON_RESET_COMPUTE |
5328                             RADEON_RESET_CP))) {
5329                 radeon_ring_lockup_update(rdev, ring);
5330                 return false;
5331         }
5332         return radeon_ring_test_lockup(rdev, ring);
5333 }
5334
5335 /* MC */
5336 /**
5337  * cik_mc_program - program the GPU memory controller
5338  *
5339  * @rdev: radeon_device pointer
5340  *
5341  * Set the location of vram, gart, and AGP in the GPU's
5342  * physical address space (CIK).
5343  */
5344 static void cik_mc_program(struct radeon_device *rdev)
5345 {
5346         struct evergreen_mc_save save;
5347         u32 tmp;
5348         int i, j;
5349
5350         /* Initialize HDP */
5351         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5352                 WREG32((0x2c14 + j), 0x00000000);
5353                 WREG32((0x2c18 + j), 0x00000000);
5354                 WREG32((0x2c1c + j), 0x00000000);
5355                 WREG32((0x2c20 + j), 0x00000000);
5356                 WREG32((0x2c24 + j), 0x00000000);
5357         }
5358         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5359
5360         evergreen_mc_stop(rdev, &save);
5361         if (radeon_mc_wait_for_idle(rdev)) {
5362                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5363         }
5364         /* Lockout access through VGA aperture*/
5365         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5366         /* Update configuration */
5367         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5368                rdev->mc.vram_start >> 12);
5369         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5370                rdev->mc.vram_end >> 12);
5371         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5372                rdev->vram_scratch.gpu_addr >> 12);
5373         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5374         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5375         WREG32(MC_VM_FB_LOCATION, tmp);
5376         /* XXX double check these! */
5377         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5378         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5379         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5380         WREG32(MC_VM_AGP_BASE, 0);
5381         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5382         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5383         if (radeon_mc_wait_for_idle(rdev)) {
5384                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5385         }
5386         evergreen_mc_resume(rdev, &save);
5387         /* we need to own VRAM, so turn off the VGA renderer here
5388          * to stop it overwriting our objects */
5389         rv515_vga_render_disable(rdev);
5390 }
5391
5392 /**
5393  * cik_mc_init - initialize the memory controller driver params
5394  *
5395  * @rdev: radeon_device pointer
5396  *
5397  * Look up the amount of vram, vram width, and decide how to place
5398  * vram and gart within the GPU's physical address space (CIK).
5399  * Returns 0 for success.
5400  */
5401 static int cik_mc_init(struct radeon_device *rdev)
5402 {
5403         u32 tmp;
5404         int chansize, numchan;
5405
5406         /* Get VRAM informations */
5407         rdev->mc.vram_is_ddr = true;
5408         tmp = RREG32(MC_ARB_RAMCFG);
5409         if (tmp & CHANSIZE_MASK) {
5410                 chansize = 64;
5411         } else {
5412                 chansize = 32;
5413         }
5414         tmp = RREG32(MC_SHARED_CHMAP);
5415         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5416         case 0:
5417         default:
5418                 numchan = 1;
5419                 break;
5420         case 1:
5421                 numchan = 2;
5422                 break;
5423         case 2:
5424                 numchan = 4;
5425                 break;
5426         case 3:
5427                 numchan = 8;
5428                 break;
5429         case 4:
5430                 numchan = 3;
5431                 break;
5432         case 5:
5433                 numchan = 6;
5434                 break;
5435         case 6:
5436                 numchan = 10;
5437                 break;
5438         case 7:
5439                 numchan = 12;
5440                 break;
5441         case 8:
5442                 numchan = 16;
5443                 break;
5444         }
5445         rdev->mc.vram_width = numchan * chansize;
5446         /* Could aper size report 0 ? */
5447         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5448         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5449         /* size in MB on si */
5450         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5451         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5452         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5453         si_vram_gtt_location(rdev, &rdev->mc);
5454         radeon_update_bandwidth_info(rdev);
5455
5456         return 0;
5457 }
5458
5459 /*
5460  * GART
5461  * VMID 0 is the physical GPU addresses as used by the kernel.
5462  * VMIDs 1-15 are used for userspace clients and are handled
5463  * by the radeon vm/hsa code.
5464  */
5465 /**
5466  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5467  *
5468  * @rdev: radeon_device pointer
5469  *
5470  * Flush the TLB for the VMID 0 page table (CIK).
5471  */
5472 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5473 {
5474         /* flush hdp cache */
5475         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5476
5477         /* bits 0-15 are the VM contexts0-15 */
5478         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5479 }
5480
5481 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5482 {
5483         int i;
5484         uint32_t sh_mem_bases, sh_mem_config;
5485
5486         sh_mem_bases = 0x6000 | 0x6000 << 16;
5487         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5488         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5489
5490         mutex_lock(&rdev->srbm_mutex);
5491         for (i = 8; i < 16; i++) {
5492                 cik_srbm_select(rdev, 0, 0, 0, i);
5493                 /* CP and shaders */
5494                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5495                 WREG32(SH_MEM_APE1_BASE, 1);
5496                 WREG32(SH_MEM_APE1_LIMIT, 0);
5497                 WREG32(SH_MEM_BASES, sh_mem_bases);
5498         }
5499         cik_srbm_select(rdev, 0, 0, 0, 0);
5500         mutex_unlock(&rdev->srbm_mutex);
5501 }
5502
5503 /**
5504  * cik_pcie_gart_enable - gart enable
5505  *
5506  * @rdev: radeon_device pointer
5507  *
5508  * This sets up the TLBs, programs the page tables for VMID0,
5509  * sets up the hw for VMIDs 1-15 which are allocated on
5510  * demand, and sets up the global locations for the LDS, GDS,
5511  * and GPUVM for FSA64 clients (CIK).
5512  * Returns 0 for success, errors for failure.
5513  */
5514 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5515 {
5516         int r, i;
5517
5518         if (rdev->gart.robj == NULL) {
5519                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5520                 return -EINVAL;
5521         }
5522         r = radeon_gart_table_vram_pin(rdev);
5523         if (r)
5524                 return r;
5525         /* Setup TLB control */
5526         WREG32(MC_VM_MX_L1_TLB_CNTL,
5527                (0xA << 7) |
5528                ENABLE_L1_TLB |
5529                ENABLE_L1_FRAGMENT_PROCESSING |
5530                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5531                ENABLE_ADVANCED_DRIVER_MODEL |
5532                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5533         /* Setup L2 cache */
5534         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5535                ENABLE_L2_FRAGMENT_PROCESSING |
5536                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5537                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5538                EFFECTIVE_L2_QUEUE_SIZE(7) |
5539                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5540         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5541         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5542                BANK_SELECT(4) |
5543                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5544         /* setup context0 */
5545         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5546         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5547         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5548         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5549                         (u32)(rdev->dummy_page.addr >> 12));
5550         WREG32(VM_CONTEXT0_CNTL2, 0);
5551         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5552                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5553
5554         WREG32(0x15D4, 0);
5555         WREG32(0x15D8, 0);
5556         WREG32(0x15DC, 0);
5557
5558         /* restore context1-15 */
5559         /* set vm size, must be a multiple of 4 */
5560         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5561         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5562         for (i = 1; i < 16; i++) {
5563                 if (i < 8)
5564                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5565                                rdev->vm_manager.saved_table_addr[i]);
5566                 else
5567                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5568                                rdev->vm_manager.saved_table_addr[i]);
5569         }
5570
5571         /* enable context1-15 */
5572         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5573                (u32)(rdev->dummy_page.addr >> 12));
5574         WREG32(VM_CONTEXT1_CNTL2, 4);
5575         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5576                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5577                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5578                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5579                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5580                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5581                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5582                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5583                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5584                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5585                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5586                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5587                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5588                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5589
5590         if (rdev->family == CHIP_KAVERI) {
5591                 u32 tmp = RREG32(CHUB_CONTROL);
5592                 tmp &= ~BYPASS_VM;
5593                 WREG32(CHUB_CONTROL, tmp);
5594         }
5595
5596         /* XXX SH_MEM regs */
5597         /* where to put LDS, scratch, GPUVM in FSA64 space */
5598         mutex_lock(&rdev->srbm_mutex);
5599         for (i = 0; i < 16; i++) {
5600                 cik_srbm_select(rdev, 0, 0, 0, i);
5601                 /* CP and shaders */
5602                 WREG32(SH_MEM_CONFIG, 0);
5603                 WREG32(SH_MEM_APE1_BASE, 1);
5604                 WREG32(SH_MEM_APE1_LIMIT, 0);
5605                 WREG32(SH_MEM_BASES, 0);
5606                 /* SDMA GFX */
5607                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5608                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5609                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5610                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5611                 /* XXX SDMA RLC - todo */
5612         }
5613         cik_srbm_select(rdev, 0, 0, 0, 0);
5614         mutex_unlock(&rdev->srbm_mutex);
5615
5616         cik_pcie_init_compute_vmid(rdev);
5617
5618         cik_pcie_gart_tlb_flush(rdev);
5619         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5620                  (unsigned)(rdev->mc.gtt_size >> 20),
5621                  (unsigned long long)rdev->gart.table_addr);
5622         rdev->gart.ready = true;
5623         return 0;
5624 }
5625
5626 /**
5627  * cik_pcie_gart_disable - gart disable
5628  *
5629  * @rdev: radeon_device pointer
5630  *
5631  * This disables all VM page table (CIK).
5632  */
5633 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5634 {
5635         unsigned i;
5636
5637         for (i = 1; i < 16; ++i) {
5638                 uint32_t reg;
5639                 if (i < 8)
5640                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5641                 else
5642                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5643                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5644         }
5645
5646         /* Disable all tables */
5647         WREG32(VM_CONTEXT0_CNTL, 0);
5648         WREG32(VM_CONTEXT1_CNTL, 0);
5649         /* Setup TLB control */
5650         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5651                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5652         /* Setup L2 cache */
5653         WREG32(VM_L2_CNTL,
5654                ENABLE_L2_FRAGMENT_PROCESSING |
5655                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5656                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5657                EFFECTIVE_L2_QUEUE_SIZE(7) |
5658                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5659         WREG32(VM_L2_CNTL2, 0);
5660         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5661                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5662         radeon_gart_table_vram_unpin(rdev);
5663 }
5664
5665 /**
5666  * cik_pcie_gart_fini - vm fini callback
5667  *
5668  * @rdev: radeon_device pointer
5669  *
5670  * Tears down the driver GART/VM setup (CIK).
5671  */
5672 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5673 {
5674         cik_pcie_gart_disable(rdev);
5675         radeon_gart_table_vram_free(rdev);
5676         radeon_gart_fini(rdev);
5677 }
5678
5679 /* vm parser */
5680 /**
5681  * cik_ib_parse - vm ib_parse callback
5682  *
5683  * @rdev: radeon_device pointer
5684  * @ib: indirect buffer pointer
5685  *
5686  * CIK uses hw IB checking so this is a nop (CIK).
5687  */
5688 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5689 {
5690         return 0;
5691 }
5692
5693 /*
5694  * vm
5695  * VMID 0 is the physical GPU addresses as used by the kernel.
5696  * VMIDs 1-15 are used for userspace clients and are handled
5697  * by the radeon vm/hsa code.
5698  */
5699 /**
5700  * cik_vm_init - cik vm init callback
5701  *
5702  * @rdev: radeon_device pointer
5703  *
5704  * Inits cik specific vm parameters (number of VMs, base of vram for
5705  * VMIDs 1-15) (CIK).
5706  * Returns 0 for success.
5707  */
5708 int cik_vm_init(struct radeon_device *rdev)
5709 {
5710         /*
5711          * number of VMs
5712          * VMID 0 is reserved for System
5713          * radeon graphics/compute will use VMIDs 1-7
5714          * amdkfd will use VMIDs 8-15
5715          */
5716         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5717         /* base offset of vram pages */
5718         if (rdev->flags & RADEON_IS_IGP) {
5719                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5720                 tmp <<= 22;
5721                 rdev->vm_manager.vram_base_offset = tmp;
5722         } else
5723                 rdev->vm_manager.vram_base_offset = 0;
5724
5725         return 0;
5726 }
5727
5728 /**
5729  * cik_vm_fini - cik vm fini callback
5730  *
5731  * @rdev: radeon_device pointer
5732  *
5733  * Tear down any asic specific VM setup (CIK).
5734  */
5735 void cik_vm_fini(struct radeon_device *rdev)
5736 {
5737 }
5738
5739 /**
5740  * cik_vm_decode_fault - print human readable fault info
5741  *
5742  * @rdev: radeon_device pointer
5743  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5744  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5745  *
5746  * Print human readable fault information (CIK).
5747  */
5748 static void cik_vm_decode_fault(struct radeon_device *rdev,
5749                                 u32 status, u32 addr, u32 mc_client)
5750 {
5751         u32 mc_id;
5752         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5753         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5754         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5755                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5756
5757         if (rdev->family == CHIP_HAWAII)
5758                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5759         else
5760                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5761
5762         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5763                protections, vmid, addr,
5764                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5765                block, mc_client, mc_id);
5766 }
5767
5768 /**
5769  * cik_vm_flush - cik vm flush using the CP
5770  *
5771  * @rdev: radeon_device pointer
5772  *
5773  * Update the page table base and flush the VM TLB
5774  * using the CP (CIK).
5775  */
5776 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5777                   unsigned vm_id, uint64_t pd_addr)
5778 {
5779         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5780
5781         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5782         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5783                                  WRITE_DATA_DST_SEL(0)));
5784         if (vm_id < 8) {
5785                 radeon_ring_write(ring,
5786                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5787         } else {
5788                 radeon_ring_write(ring,
5789                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5790         }
5791         radeon_ring_write(ring, 0);
5792         radeon_ring_write(ring, pd_addr >> 12);
5793
5794         /* update SH_MEM_* regs */
5795         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5796         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5797                                  WRITE_DATA_DST_SEL(0)));
5798         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5799         radeon_ring_write(ring, 0);
5800         radeon_ring_write(ring, VMID(vm_id));
5801
5802         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5803         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5804                                  WRITE_DATA_DST_SEL(0)));
5805         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5806         radeon_ring_write(ring, 0);
5807
5808         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5809         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5810         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5811         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5812
5813         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5814         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5815                                  WRITE_DATA_DST_SEL(0)));
5816         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5817         radeon_ring_write(ring, 0);
5818         radeon_ring_write(ring, VMID(0));
5819
5820         /* HDP flush */
5821         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5822
5823         /* bits 0-15 are the VM contexts0-15 */
5824         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5825         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5826                                  WRITE_DATA_DST_SEL(0)));
5827         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5828         radeon_ring_write(ring, 0);
5829         radeon_ring_write(ring, 1 << vm_id);
5830
5831         /* wait for the invalidate to complete */
5832         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5833         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5834                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5835                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5836         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5837         radeon_ring_write(ring, 0);
5838         radeon_ring_write(ring, 0); /* ref */
5839         radeon_ring_write(ring, 0); /* mask */
5840         radeon_ring_write(ring, 0x20); /* poll interval */
5841
5842         /* compute doesn't have PFP */
5843         if (usepfp) {
5844                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5845                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5846                 radeon_ring_write(ring, 0x0);
5847         }
5848 }
5849
5850 /*
5851  * RLC
5852  * The RLC is a multi-purpose microengine that handles a
5853  * variety of functions, the most important of which is
5854  * the interrupt controller.
5855  */
5856 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5857                                           bool enable)
5858 {
5859         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5860
5861         if (enable)
5862                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5863         else
5864                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5865         WREG32(CP_INT_CNTL_RING0, tmp);
5866 }
5867
5868 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5869 {
5870         u32 tmp;
5871
5872         tmp = RREG32(RLC_LB_CNTL);
5873         if (enable)
5874                 tmp |= LOAD_BALANCE_ENABLE;
5875         else
5876                 tmp &= ~LOAD_BALANCE_ENABLE;
5877         WREG32(RLC_LB_CNTL, tmp);
5878 }
5879
5880 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5881 {
5882         u32 i, j, k;
5883         u32 mask;
5884
5885         mutex_lock(&rdev->grbm_idx_mutex);
5886         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5887                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5888                         cik_select_se_sh(rdev, i, j);
5889                         for (k = 0; k < rdev->usec_timeout; k++) {
5890                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5891                                         break;
5892                                 udelay(1);
5893                         }
5894                 }
5895         }
5896         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5897         mutex_unlock(&rdev->grbm_idx_mutex);
5898
5899         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5900         for (k = 0; k < rdev->usec_timeout; k++) {
5901                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5902                         break;
5903                 udelay(1);
5904         }
5905 }
5906
5907 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5908 {
5909         u32 tmp;
5910
5911         tmp = RREG32(RLC_CNTL);
5912         if (tmp != rlc)
5913                 WREG32(RLC_CNTL, rlc);
5914 }
5915
5916 static u32 cik_halt_rlc(struct radeon_device *rdev)
5917 {
5918         u32 data, orig;
5919
5920         orig = data = RREG32(RLC_CNTL);
5921
5922         if (data & RLC_ENABLE) {
5923                 u32 i;
5924
5925                 data &= ~RLC_ENABLE;
5926                 WREG32(RLC_CNTL, data);
5927
5928                 for (i = 0; i < rdev->usec_timeout; i++) {
5929                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5930                                 break;
5931                         udelay(1);
5932                 }
5933
5934                 cik_wait_for_rlc_serdes(rdev);
5935         }
5936
5937         return orig;
5938 }
5939
5940 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5941 {
5942         u32 tmp, i, mask;
5943
5944         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5945         WREG32(RLC_GPR_REG2, tmp);
5946
5947         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5948         for (i = 0; i < rdev->usec_timeout; i++) {
5949                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5950                         break;
5951                 udelay(1);
5952         }
5953
5954         for (i = 0; i < rdev->usec_timeout; i++) {
5955                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5956                         break;
5957                 udelay(1);
5958         }
5959 }
5960
5961 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5962 {
5963         u32 tmp;
5964
5965         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5966         WREG32(RLC_GPR_REG2, tmp);
5967 }
5968
5969 /**
5970  * cik_rlc_stop - stop the RLC ME
5971  *
5972  * @rdev: radeon_device pointer
5973  *
5974  * Halt the RLC ME (MicroEngine) (CIK).
5975  */
5976 static void cik_rlc_stop(struct radeon_device *rdev)
5977 {
5978         WREG32(RLC_CNTL, 0);
5979
5980         cik_enable_gui_idle_interrupt(rdev, false);
5981
5982         cik_wait_for_rlc_serdes(rdev);
5983 }
5984
5985 /**
5986  * cik_rlc_start - start the RLC ME
5987  *
5988  * @rdev: radeon_device pointer
5989  *
5990  * Unhalt the RLC ME (MicroEngine) (CIK).
5991  */
5992 static void cik_rlc_start(struct radeon_device *rdev)
5993 {
5994         WREG32(RLC_CNTL, RLC_ENABLE);
5995
5996         cik_enable_gui_idle_interrupt(rdev, true);
5997
5998         udelay(50);
5999 }
6000
6001 /**
6002  * cik_rlc_resume - setup the RLC hw
6003  *
6004  * @rdev: radeon_device pointer
6005  *
6006  * Initialize the RLC registers, load the ucode,
6007  * and start the RLC (CIK).
6008  * Returns 0 for success, -EINVAL if the ucode is not available.
6009  */
6010 static int cik_rlc_resume(struct radeon_device *rdev)
6011 {
6012         u32 i, size, tmp;
6013
6014         if (!rdev->rlc_fw)
6015                 return -EINVAL;
6016
6017         cik_rlc_stop(rdev);
6018
6019         /* disable CG */
6020         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6021         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6022
6023         si_rlc_reset(rdev);
6024
6025         cik_init_pg(rdev);
6026
6027         cik_init_cg(rdev);
6028
6029         WREG32(RLC_LB_CNTR_INIT, 0);
6030         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6031
6032         mutex_lock(&rdev->grbm_idx_mutex);
6033         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6034         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6035         WREG32(RLC_LB_PARAMS, 0x00600408);
6036         WREG32(RLC_LB_CNTL, 0x80000004);
6037         mutex_unlock(&rdev->grbm_idx_mutex);
6038
6039         WREG32(RLC_MC_CNTL, 0);
6040         WREG32(RLC_UCODE_CNTL, 0);
6041
6042         if (rdev->new_fw) {
6043                 const struct rlc_firmware_header_v1_0 *hdr =
6044                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6045                 const __le32 *fw_data = (const __le32 *)
6046                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6047
6048                 radeon_ucode_print_rlc_hdr(&hdr->header);
6049
6050                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6051                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6052                 for (i = 0; i < size; i++)
6053                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6054                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6055         } else {
6056                 const __be32 *fw_data;
6057
6058                 switch (rdev->family) {
6059                 case CHIP_BONAIRE:
6060                 case CHIP_HAWAII:
6061                 default:
6062                         size = BONAIRE_RLC_UCODE_SIZE;
6063                         break;
6064                 case CHIP_KAVERI:
6065                         size = KV_RLC_UCODE_SIZE;
6066                         break;
6067                 case CHIP_KABINI:
6068                         size = KB_RLC_UCODE_SIZE;
6069                         break;
6070                 case CHIP_MULLINS:
6071                         size = ML_RLC_UCODE_SIZE;
6072                         break;
6073                 }
6074
6075                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6076                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6077                 for (i = 0; i < size; i++)
6078                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6079                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6080         }
6081
6082         /* XXX - find out what chips support lbpw */
6083         cik_enable_lbpw(rdev, false);
6084
6085         if (rdev->family == CHIP_BONAIRE)
6086                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6087
6088         cik_rlc_start(rdev);
6089
6090         return 0;
6091 }
6092
6093 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6094 {
6095         u32 data, orig, tmp, tmp2;
6096
6097         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6098
6099         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6100                 cik_enable_gui_idle_interrupt(rdev, true);
6101
6102                 tmp = cik_halt_rlc(rdev);
6103
6104                 mutex_lock(&rdev->grbm_idx_mutex);
6105                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6106                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6107                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6108                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6109                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6110                 mutex_unlock(&rdev->grbm_idx_mutex);
6111
6112                 cik_update_rlc(rdev, tmp);
6113
6114                 data |= CGCG_EN | CGLS_EN;
6115         } else {
6116                 cik_enable_gui_idle_interrupt(rdev, false);
6117
6118                 RREG32(CB_CGTT_SCLK_CTRL);
6119                 RREG32(CB_CGTT_SCLK_CTRL);
6120                 RREG32(CB_CGTT_SCLK_CTRL);
6121                 RREG32(CB_CGTT_SCLK_CTRL);
6122
6123                 data &= ~(CGCG_EN | CGLS_EN);
6124         }
6125
6126         if (orig != data)
6127                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6128
6129 }
6130
6131 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6132 {
6133         u32 data, orig, tmp = 0;
6134
6135         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6136                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6137                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6138                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6139                                 data |= CP_MEM_LS_EN;
6140                                 if (orig != data)
6141                                         WREG32(CP_MEM_SLP_CNTL, data);
6142                         }
6143                 }
6144
6145                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6146                 data |= 0x00000001;
6147                 data &= 0xfffffffd;
6148                 if (orig != data)
6149                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6150
6151                 tmp = cik_halt_rlc(rdev);
6152
6153                 mutex_lock(&rdev->grbm_idx_mutex);
6154                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6155                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6156                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6157                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6158                 WREG32(RLC_SERDES_WR_CTRL, data);
6159                 mutex_unlock(&rdev->grbm_idx_mutex);
6160
6161                 cik_update_rlc(rdev, tmp);
6162
6163                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6164                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6165                         data &= ~SM_MODE_MASK;
6166                         data |= SM_MODE(0x2);
6167                         data |= SM_MODE_ENABLE;
6168                         data &= ~CGTS_OVERRIDE;
6169                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6170                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6171                                 data &= ~CGTS_LS_OVERRIDE;
6172                         data &= ~ON_MONITOR_ADD_MASK;
6173                         data |= ON_MONITOR_ADD_EN;
6174                         data |= ON_MONITOR_ADD(0x96);
6175                         if (orig != data)
6176                                 WREG32(CGTS_SM_CTRL_REG, data);
6177                 }
6178         } else {
6179                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6180                 data |= 0x00000003;
6181                 if (orig != data)
6182                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6183
6184                 data = RREG32(RLC_MEM_SLP_CNTL);
6185                 if (data & RLC_MEM_LS_EN) {
6186                         data &= ~RLC_MEM_LS_EN;
6187                         WREG32(RLC_MEM_SLP_CNTL, data);
6188                 }
6189
6190                 data = RREG32(CP_MEM_SLP_CNTL);
6191                 if (data & CP_MEM_LS_EN) {
6192                         data &= ~CP_MEM_LS_EN;
6193                         WREG32(CP_MEM_SLP_CNTL, data);
6194                 }
6195
6196                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6197                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6198                 if (orig != data)
6199                         WREG32(CGTS_SM_CTRL_REG, data);
6200
6201                 tmp = cik_halt_rlc(rdev);
6202
6203                 mutex_lock(&rdev->grbm_idx_mutex);
6204                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6205                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6206                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6207                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6208                 WREG32(RLC_SERDES_WR_CTRL, data);
6209                 mutex_unlock(&rdev->grbm_idx_mutex);
6210
6211                 cik_update_rlc(rdev, tmp);
6212         }
6213 }
6214
6215 static const u32 mc_cg_registers[] =
6216 {
6217         MC_HUB_MISC_HUB_CG,
6218         MC_HUB_MISC_SIP_CG,
6219         MC_HUB_MISC_VM_CG,
6220         MC_XPB_CLK_GAT,
6221         ATC_MISC_CG,
6222         MC_CITF_MISC_WR_CG,
6223         MC_CITF_MISC_RD_CG,
6224         MC_CITF_MISC_VM_CG,
6225         VM_L2_CG,
6226 };
6227
6228 static void cik_enable_mc_ls(struct radeon_device *rdev,
6229                              bool enable)
6230 {
6231         int i;
6232         u32 orig, data;
6233
6234         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6235                 orig = data = RREG32(mc_cg_registers[i]);
6236                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6237                         data |= MC_LS_ENABLE;
6238                 else
6239                         data &= ~MC_LS_ENABLE;
6240                 if (data != orig)
6241                         WREG32(mc_cg_registers[i], data);
6242         }
6243 }
6244
6245 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6246                                bool enable)
6247 {
6248         int i;
6249         u32 orig, data;
6250
6251         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6252                 orig = data = RREG32(mc_cg_registers[i]);
6253                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6254                         data |= MC_CG_ENABLE;
6255                 else
6256                         data &= ~MC_CG_ENABLE;
6257                 if (data != orig)
6258                         WREG32(mc_cg_registers[i], data);
6259         }
6260 }
6261
6262 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6263                                  bool enable)
6264 {
6265         u32 orig, data;
6266
6267         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6268                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6269                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6270         } else {
6271                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6272                 data |= 0xff000000;
6273                 if (data != orig)
6274                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6275
6276                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6277                 data |= 0xff000000;
6278                 if (data != orig)
6279                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6280         }
6281 }
6282
6283 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6284                                  bool enable)
6285 {
6286         u32 orig, data;
6287
6288         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6289                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6290                 data |= 0x100;
6291                 if (orig != data)
6292                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6293
6294                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6295                 data |= 0x100;
6296                 if (orig != data)
6297                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6298         } else {
6299                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6300                 data &= ~0x100;
6301                 if (orig != data)
6302                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6303
6304                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6305                 data &= ~0x100;
6306                 if (orig != data)
6307                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6308         }
6309 }
6310
6311 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6312                                 bool enable)
6313 {
6314         u32 orig, data;
6315
6316         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6317                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6318                 data = 0xfff;
6319                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6320
6321                 orig = data = RREG32(UVD_CGC_CTRL);
6322                 data |= DCM;
6323                 if (orig != data)
6324                         WREG32(UVD_CGC_CTRL, data);
6325         } else {
6326                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6327                 data &= ~0xfff;
6328                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6329
6330                 orig = data = RREG32(UVD_CGC_CTRL);
6331                 data &= ~DCM;
6332                 if (orig != data)
6333                         WREG32(UVD_CGC_CTRL, data);
6334         }
6335 }
6336
6337 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6338                                bool enable)
6339 {
6340         u32 orig, data;
6341
6342         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6343
6344         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6345                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6346                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6347         else
6348                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6349                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6350
6351         if (orig != data)
6352                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6353 }
6354
6355 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6356                                 bool enable)
6357 {
6358         u32 orig, data;
6359
6360         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6361
6362         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6363                 data &= ~CLOCK_GATING_DIS;
6364         else
6365                 data |= CLOCK_GATING_DIS;
6366
6367         if (orig != data)
6368                 WREG32(HDP_HOST_PATH_CNTL, data);
6369 }
6370
6371 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6372                               bool enable)
6373 {
6374         u32 orig, data;
6375
6376         orig = data = RREG32(HDP_MEM_POWER_LS);
6377
6378         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6379                 data |= HDP_LS_ENABLE;
6380         else
6381                 data &= ~HDP_LS_ENABLE;
6382
6383         if (orig != data)
6384                 WREG32(HDP_MEM_POWER_LS, data);
6385 }
6386
6387 void cik_update_cg(struct radeon_device *rdev,
6388                    u32 block, bool enable)
6389 {
6390
6391         if (block & RADEON_CG_BLOCK_GFX) {
6392                 cik_enable_gui_idle_interrupt(rdev, false);
6393                 /* order matters! */
6394                 if (enable) {
6395                         cik_enable_mgcg(rdev, true);
6396                         cik_enable_cgcg(rdev, true);
6397                 } else {
6398                         cik_enable_cgcg(rdev, false);
6399                         cik_enable_mgcg(rdev, false);
6400                 }
6401                 cik_enable_gui_idle_interrupt(rdev, true);
6402         }
6403
6404         if (block & RADEON_CG_BLOCK_MC) {
6405                 if (!(rdev->flags & RADEON_IS_IGP)) {
6406                         cik_enable_mc_mgcg(rdev, enable);
6407                         cik_enable_mc_ls(rdev, enable);
6408                 }
6409         }
6410
6411         if (block & RADEON_CG_BLOCK_SDMA) {
6412                 cik_enable_sdma_mgcg(rdev, enable);
6413                 cik_enable_sdma_mgls(rdev, enable);
6414         }
6415
6416         if (block & RADEON_CG_BLOCK_BIF) {
6417                 cik_enable_bif_mgls(rdev, enable);
6418         }
6419
6420         if (block & RADEON_CG_BLOCK_UVD) {
6421                 if (rdev->has_uvd)
6422                         cik_enable_uvd_mgcg(rdev, enable);
6423         }
6424
6425         if (block & RADEON_CG_BLOCK_HDP) {
6426                 cik_enable_hdp_mgcg(rdev, enable);
6427                 cik_enable_hdp_ls(rdev, enable);
6428         }
6429
6430         if (block & RADEON_CG_BLOCK_VCE) {
6431                 vce_v2_0_enable_mgcg(rdev, enable);
6432         }
6433 }
6434
6435 static void cik_init_cg(struct radeon_device *rdev)
6436 {
6437
6438         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6439
6440         if (rdev->has_uvd)
6441                 si_init_uvd_internal_cg(rdev);
6442
6443         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6444                              RADEON_CG_BLOCK_SDMA |
6445                              RADEON_CG_BLOCK_BIF |
6446                              RADEON_CG_BLOCK_UVD |
6447                              RADEON_CG_BLOCK_HDP), true);
6448 }
6449
6450 static void cik_fini_cg(struct radeon_device *rdev)
6451 {
6452         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6453                              RADEON_CG_BLOCK_SDMA |
6454                              RADEON_CG_BLOCK_BIF |
6455                              RADEON_CG_BLOCK_UVD |
6456                              RADEON_CG_BLOCK_HDP), false);
6457
6458         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6459 }
6460
6461 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6462                                           bool enable)
6463 {
6464         u32 data, orig;
6465
6466         orig = data = RREG32(RLC_PG_CNTL);
6467         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6468                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6469         else
6470                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6471         if (orig != data)
6472                 WREG32(RLC_PG_CNTL, data);
6473 }
6474
6475 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6476                                           bool enable)
6477 {
6478         u32 data, orig;
6479
6480         orig = data = RREG32(RLC_PG_CNTL);
6481         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6482                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6483         else
6484                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6485         if (orig != data)
6486                 WREG32(RLC_PG_CNTL, data);
6487 }
6488
6489 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6490 {
6491         u32 data, orig;
6492
6493         orig = data = RREG32(RLC_PG_CNTL);
6494         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6495                 data &= ~DISABLE_CP_PG;
6496         else
6497                 data |= DISABLE_CP_PG;
6498         if (orig != data)
6499                 WREG32(RLC_PG_CNTL, data);
6500 }
6501
6502 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6503 {
6504         u32 data, orig;
6505
6506         orig = data = RREG32(RLC_PG_CNTL);
6507         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6508                 data &= ~DISABLE_GDS_PG;
6509         else
6510                 data |= DISABLE_GDS_PG;
6511         if (orig != data)
6512                 WREG32(RLC_PG_CNTL, data);
6513 }
6514
6515 #define CP_ME_TABLE_SIZE    96
6516 #define CP_ME_TABLE_OFFSET  2048
6517 #define CP_MEC_TABLE_OFFSET 4096
6518
6519 void cik_init_cp_pg_table(struct radeon_device *rdev)
6520 {
6521         volatile u32 *dst_ptr;
6522         int me, i, max_me = 4;
6523         u32 bo_offset = 0;
6524         u32 table_offset, table_size;
6525
6526         if (rdev->family == CHIP_KAVERI)
6527                 max_me = 5;
6528
6529         if (rdev->rlc.cp_table_ptr == NULL)
6530                 return;
6531
6532         /* write the cp table buffer */
6533         dst_ptr = rdev->rlc.cp_table_ptr;
6534         for (me = 0; me < max_me; me++) {
6535                 if (rdev->new_fw) {
6536                         const __le32 *fw_data;
6537                         const struct gfx_firmware_header_v1_0 *hdr;
6538
6539                         if (me == 0) {
6540                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6541                                 fw_data = (const __le32 *)
6542                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6543                                 table_offset = le32_to_cpu(hdr->jt_offset);
6544                                 table_size = le32_to_cpu(hdr->jt_size);
6545                         } else if (me == 1) {
6546                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6547                                 fw_data = (const __le32 *)
6548                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549                                 table_offset = le32_to_cpu(hdr->jt_offset);
6550                                 table_size = le32_to_cpu(hdr->jt_size);
6551                         } else if (me == 2) {
6552                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6553                                 fw_data = (const __le32 *)
6554                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6555                                 table_offset = le32_to_cpu(hdr->jt_offset);
6556                                 table_size = le32_to_cpu(hdr->jt_size);
6557                         } else if (me == 3) {
6558                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6559                                 fw_data = (const __le32 *)
6560                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6561                                 table_offset = le32_to_cpu(hdr->jt_offset);
6562                                 table_size = le32_to_cpu(hdr->jt_size);
6563                         } else {
6564                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6565                                 fw_data = (const __le32 *)
6566                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6567                                 table_offset = le32_to_cpu(hdr->jt_offset);
6568                                 table_size = le32_to_cpu(hdr->jt_size);
6569                         }
6570
6571                         for (i = 0; i < table_size; i ++) {
6572                                 dst_ptr[bo_offset + i] =
6573                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6574                         }
6575                         bo_offset += table_size;
6576                 } else {
6577                         const __be32 *fw_data;
6578                         table_size = CP_ME_TABLE_SIZE;
6579
6580                         if (me == 0) {
6581                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6582                                 table_offset = CP_ME_TABLE_OFFSET;
6583                         } else if (me == 1) {
6584                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6585                                 table_offset = CP_ME_TABLE_OFFSET;
6586                         } else if (me == 2) {
6587                                 fw_data = (const __be32 *)rdev->me_fw->data;
6588                                 table_offset = CP_ME_TABLE_OFFSET;
6589                         } else {
6590                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6591                                 table_offset = CP_MEC_TABLE_OFFSET;
6592                         }
6593
6594                         for (i = 0; i < table_size; i ++) {
6595                                 dst_ptr[bo_offset + i] =
6596                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6597                         }
6598                         bo_offset += table_size;
6599                 }
6600         }
6601 }
6602
6603 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6604                                 bool enable)
6605 {
6606         u32 data, orig;
6607
6608         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6609                 orig = data = RREG32(RLC_PG_CNTL);
6610                 data |= GFX_PG_ENABLE;
6611                 if (orig != data)
6612                         WREG32(RLC_PG_CNTL, data);
6613
6614                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6615                 data |= AUTO_PG_EN;
6616                 if (orig != data)
6617                         WREG32(RLC_AUTO_PG_CTRL, data);
6618         } else {
6619                 orig = data = RREG32(RLC_PG_CNTL);
6620                 data &= ~GFX_PG_ENABLE;
6621                 if (orig != data)
6622                         WREG32(RLC_PG_CNTL, data);
6623
6624                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6625                 data &= ~AUTO_PG_EN;
6626                 if (orig != data)
6627                         WREG32(RLC_AUTO_PG_CTRL, data);
6628
6629                 data = RREG32(DB_RENDER_CONTROL);
6630         }
6631 }
6632
6633 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6634 {
6635         u32 mask = 0, tmp, tmp1;
6636         int i;
6637
6638         mutex_lock(&rdev->grbm_idx_mutex);
6639         cik_select_se_sh(rdev, se, sh);
6640         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6641         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6642         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6643         mutex_unlock(&rdev->grbm_idx_mutex);
6644
6645         tmp &= 0xffff0000;
6646
6647         tmp |= tmp1;
6648         tmp >>= 16;
6649
6650         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6651                 mask <<= 1;
6652                 mask |= 1;
6653         }
6654
6655         return (~tmp) & mask;
6656 }
6657
6658 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6659 {
6660         u32 i, j, k, active_cu_number = 0;
6661         u32 mask, counter, cu_bitmap;
6662         u32 tmp = 0;
6663
6664         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6665                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6666                         mask = 1;
6667                         cu_bitmap = 0;
6668                         counter = 0;
6669                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6670                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6671                                         if (counter < 2)
6672                                                 cu_bitmap |= mask;
6673                                         counter ++;
6674                                 }
6675                                 mask <<= 1;
6676                         }
6677
6678                         active_cu_number += counter;
6679                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6680                 }
6681         }
6682
6683         WREG32(RLC_PG_AO_CU_MASK, tmp);
6684
6685         tmp = RREG32(RLC_MAX_PG_CU);
6686         tmp &= ~MAX_PU_CU_MASK;
6687         tmp |= MAX_PU_CU(active_cu_number);
6688         WREG32(RLC_MAX_PG_CU, tmp);
6689 }
6690
6691 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6692                                        bool enable)
6693 {
6694         u32 data, orig;
6695
6696         orig = data = RREG32(RLC_PG_CNTL);
6697         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6698                 data |= STATIC_PER_CU_PG_ENABLE;
6699         else
6700                 data &= ~STATIC_PER_CU_PG_ENABLE;
6701         if (orig != data)
6702                 WREG32(RLC_PG_CNTL, data);
6703 }
6704
6705 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6706                                         bool enable)
6707 {
6708         u32 data, orig;
6709
6710         orig = data = RREG32(RLC_PG_CNTL);
6711         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6712                 data |= DYN_PER_CU_PG_ENABLE;
6713         else
6714                 data &= ~DYN_PER_CU_PG_ENABLE;
6715         if (orig != data)
6716                 WREG32(RLC_PG_CNTL, data);
6717 }
6718
6719 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6720 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6721
6722 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6723 {
6724         u32 data, orig;
6725         u32 i;
6726
6727         if (rdev->rlc.cs_data) {
6728                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6729                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6730                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6731                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6732         } else {
6733                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6734                 for (i = 0; i < 3; i++)
6735                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6736         }
6737         if (rdev->rlc.reg_list) {
6738                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6739                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6740                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6741         }
6742
6743         orig = data = RREG32(RLC_PG_CNTL);
6744         data |= GFX_PG_SRC;
6745         if (orig != data)
6746                 WREG32(RLC_PG_CNTL, data);
6747
6748         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6749         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6750
6751         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6752         data &= ~IDLE_POLL_COUNT_MASK;
6753         data |= IDLE_POLL_COUNT(0x60);
6754         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6755
6756         data = 0x10101010;
6757         WREG32(RLC_PG_DELAY, data);
6758
6759         data = RREG32(RLC_PG_DELAY_2);
6760         data &= ~0xff;
6761         data |= 0x3;
6762         WREG32(RLC_PG_DELAY_2, data);
6763
6764         data = RREG32(RLC_AUTO_PG_CTRL);
6765         data &= ~GRBM_REG_SGIT_MASK;
6766         data |= GRBM_REG_SGIT(0x700);
6767         WREG32(RLC_AUTO_PG_CTRL, data);
6768
6769 }
6770
6771 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6772 {
6773         cik_enable_gfx_cgpg(rdev, enable);
6774         cik_enable_gfx_static_mgpg(rdev, enable);
6775         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6776 }
6777
6778 u32 cik_get_csb_size(struct radeon_device *rdev)
6779 {
6780         u32 count = 0;
6781         const struct cs_section_def *sect = NULL;
6782         const struct cs_extent_def *ext = NULL;
6783
6784         if (rdev->rlc.cs_data == NULL)
6785                 return 0;
6786
6787         /* begin clear state */
6788         count += 2;
6789         /* context control state */
6790         count += 3;
6791
6792         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6793                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6794                         if (sect->id == SECT_CONTEXT)
6795                                 count += 2 + ext->reg_count;
6796                         else
6797                                 return 0;
6798                 }
6799         }
6800         /* pa_sc_raster_config/pa_sc_raster_config1 */
6801         count += 4;
6802         /* end clear state */
6803         count += 2;
6804         /* clear state */
6805         count += 2;
6806
6807         return count;
6808 }
6809
6810 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6811 {
6812         u32 count = 0, i;
6813         const struct cs_section_def *sect = NULL;
6814         const struct cs_extent_def *ext = NULL;
6815
6816         if (rdev->rlc.cs_data == NULL)
6817                 return;
6818         if (buffer == NULL)
6819                 return;
6820
6821         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6822         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6823
6824         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6825         buffer[count++] = cpu_to_le32(0x80000000);
6826         buffer[count++] = cpu_to_le32(0x80000000);
6827
6828         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6829                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6830                         if (sect->id == SECT_CONTEXT) {
6831                                 buffer[count++] =
6832                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6833                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6834                                 for (i = 0; i < ext->reg_count; i++)
6835                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6836                         } else {
6837                                 return;
6838                         }
6839                 }
6840         }
6841
6842         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6843         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6844         switch (rdev->family) {
6845         case CHIP_BONAIRE:
6846                 buffer[count++] = cpu_to_le32(0x16000012);
6847                 buffer[count++] = cpu_to_le32(0x00000000);
6848                 break;
6849         case CHIP_KAVERI:
6850                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6851                 buffer[count++] = cpu_to_le32(0x00000000);
6852                 break;
6853         case CHIP_KABINI:
6854         case CHIP_MULLINS:
6855                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6856                 buffer[count++] = cpu_to_le32(0x00000000);
6857                 break;
6858         case CHIP_HAWAII:
6859                 buffer[count++] = cpu_to_le32(0x3a00161a);
6860                 buffer[count++] = cpu_to_le32(0x0000002e);
6861                 break;
6862         default:
6863                 buffer[count++] = cpu_to_le32(0x00000000);
6864                 buffer[count++] = cpu_to_le32(0x00000000);
6865                 break;
6866         }
6867
6868         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6869         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6870
6871         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6872         buffer[count++] = cpu_to_le32(0);
6873 }
6874
6875 static void cik_init_pg(struct radeon_device *rdev)
6876 {
6877         if (rdev->pg_flags) {
6878                 cik_enable_sck_slowdown_on_pu(rdev, true);
6879                 cik_enable_sck_slowdown_on_pd(rdev, true);
6880                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6881                         cik_init_gfx_cgpg(rdev);
6882                         cik_enable_cp_pg(rdev, true);
6883                         cik_enable_gds_pg(rdev, true);
6884                 }
6885                 cik_init_ao_cu_mask(rdev);
6886                 cik_update_gfx_pg(rdev, true);
6887         }
6888 }
6889
6890 static void cik_fini_pg(struct radeon_device *rdev)
6891 {
6892         if (rdev->pg_flags) {
6893                 cik_update_gfx_pg(rdev, false);
6894                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6895                         cik_enable_cp_pg(rdev, false);
6896                         cik_enable_gds_pg(rdev, false);
6897                 }
6898         }
6899 }
6900
6901 /*
6902  * Interrupts
6903  * Starting with r6xx, interrupts are handled via a ring buffer.
6904  * Ring buffers are areas of GPU accessible memory that the GPU
6905  * writes interrupt vectors into and the host reads vectors out of.
6906  * There is a rptr (read pointer) that determines where the
6907  * host is currently reading, and a wptr (write pointer)
6908  * which determines where the GPU has written.  When the
6909  * pointers are equal, the ring is idle.  When the GPU
6910  * writes vectors to the ring buffer, it increments the
6911  * wptr.  When there is an interrupt, the host then starts
6912  * fetching commands and processing them until the pointers are
6913  * equal again at which point it updates the rptr.
6914  */
6915
6916 /**
6917  * cik_enable_interrupts - Enable the interrupt ring buffer
6918  *
6919  * @rdev: radeon_device pointer
6920  *
6921  * Enable the interrupt ring buffer (CIK).
6922  */
6923 static void cik_enable_interrupts(struct radeon_device *rdev)
6924 {
6925         u32 ih_cntl = RREG32(IH_CNTL);
6926         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6927
6928         ih_cntl |= ENABLE_INTR;
6929         ih_rb_cntl |= IH_RB_ENABLE;
6930         WREG32(IH_CNTL, ih_cntl);
6931         WREG32(IH_RB_CNTL, ih_rb_cntl);
6932         rdev->ih.enabled = true;
6933 }
6934
6935 /**
6936  * cik_disable_interrupts - Disable the interrupt ring buffer
6937  *
6938  * @rdev: radeon_device pointer
6939  *
6940  * Disable the interrupt ring buffer (CIK).
6941  */
6942 static void cik_disable_interrupts(struct radeon_device *rdev)
6943 {
6944         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6945         u32 ih_cntl = RREG32(IH_CNTL);
6946
6947         ih_rb_cntl &= ~IH_RB_ENABLE;
6948         ih_cntl &= ~ENABLE_INTR;
6949         WREG32(IH_RB_CNTL, ih_rb_cntl);
6950         WREG32(IH_CNTL, ih_cntl);
6951         /* set rptr, wptr to 0 */
6952         WREG32(IH_RB_RPTR, 0);
6953         WREG32(IH_RB_WPTR, 0);
6954         rdev->ih.enabled = false;
6955         rdev->ih.rptr = 0;
6956 }
6957
6958 /**
6959  * cik_disable_interrupt_state - Disable all interrupt sources
6960  *
6961  * @rdev: radeon_device pointer
6962  *
6963  * Clear all interrupt enable bits used by the driver (CIK).
6964  */
6965 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6966 {
6967         u32 tmp;
6968
6969         /* gfx ring */
6970         tmp = RREG32(CP_INT_CNTL_RING0) &
6971                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6972         WREG32(CP_INT_CNTL_RING0, tmp);
6973         /* sdma */
6974         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6975         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6976         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6977         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6978         /* compute queues */
6979         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6980         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6981         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6982         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6983         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6984         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6985         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6986         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6987         /* grbm */
6988         WREG32(GRBM_INT_CNTL, 0);
6989         /* SRBM */
6990         WREG32(SRBM_INT_CNTL, 0);
6991         /* vline/vblank, etc. */
6992         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6993         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6994         if (rdev->num_crtc >= 4) {
6995                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6996                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6997         }
6998         if (rdev->num_crtc >= 6) {
6999                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7000                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7001         }
7002         /* pflip */
7003         if (rdev->num_crtc >= 2) {
7004                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7005                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7006         }
7007         if (rdev->num_crtc >= 4) {
7008                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7009                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7010         }
7011         if (rdev->num_crtc >= 6) {
7012                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7013                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7014         }
7015
7016         /* dac hotplug */
7017         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7018
7019         /* digital hotplug */
7020         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7021         WREG32(DC_HPD1_INT_CONTROL, tmp);
7022         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7023         WREG32(DC_HPD2_INT_CONTROL, tmp);
7024         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7025         WREG32(DC_HPD3_INT_CONTROL, tmp);
7026         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7027         WREG32(DC_HPD4_INT_CONTROL, tmp);
7028         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7029         WREG32(DC_HPD5_INT_CONTROL, tmp);
7030         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7031         WREG32(DC_HPD6_INT_CONTROL, tmp);
7032
7033 }
7034
7035 /**
7036  * cik_irq_init - init and enable the interrupt ring
7037  *
7038  * @rdev: radeon_device pointer
7039  *
7040  * Allocate a ring buffer for the interrupt controller,
7041  * enable the RLC, disable interrupts, enable the IH
7042  * ring buffer and enable it (CIK).
7043  * Called at device load and reume.
7044  * Returns 0 for success, errors for failure.
7045  */
7046 static int cik_irq_init(struct radeon_device *rdev)
7047 {
7048         int ret = 0;
7049         int rb_bufsz;
7050         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7051
7052         /* allocate ring */
7053         ret = r600_ih_ring_alloc(rdev);
7054         if (ret)
7055                 return ret;
7056
7057         /* disable irqs */
7058         cik_disable_interrupts(rdev);
7059
7060         /* init rlc */
7061         ret = cik_rlc_resume(rdev);
7062         if (ret) {
7063                 r600_ih_ring_fini(rdev);
7064                 return ret;
7065         }
7066
7067         /* setup interrupt control */
7068         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7069         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7070         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7071         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7072          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7073          */
7074         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7075         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7076         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7077         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7078
7079         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7080         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7081
7082         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7083                       IH_WPTR_OVERFLOW_CLEAR |
7084                       (rb_bufsz << 1));
7085
7086         if (rdev->wb.enabled)
7087                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7088
7089         /* set the writeback address whether it's enabled or not */
7090         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7091         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7092
7093         WREG32(IH_RB_CNTL, ih_rb_cntl);
7094
7095         /* set rptr, wptr to 0 */
7096         WREG32(IH_RB_RPTR, 0);
7097         WREG32(IH_RB_WPTR, 0);
7098
7099         /* Default settings for IH_CNTL (disabled at first) */
7100         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7101         /* RPTR_REARM only works if msi's are enabled */
7102         if (rdev->msi_enabled)
7103                 ih_cntl |= RPTR_REARM;
7104         WREG32(IH_CNTL, ih_cntl);
7105
7106         /* force the active interrupt state to all disabled */
7107         cik_disable_interrupt_state(rdev);
7108
7109         pci_set_master(rdev->pdev);
7110
7111         /* enable irqs */
7112         cik_enable_interrupts(rdev);
7113
7114         return ret;
7115 }
7116
7117 /**
7118  * cik_irq_set - enable/disable interrupt sources
7119  *
7120  * @rdev: radeon_device pointer
7121  *
7122  * Enable interrupt sources on the GPU (vblanks, hpd,
7123  * etc.) (CIK).
7124  * Returns 0 for success, errors for failure.
7125  */
7126 int cik_irq_set(struct radeon_device *rdev)
7127 {
7128         u32 cp_int_cntl;
7129         u32 cp_m1p0;
7130         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7131         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7132         u32 grbm_int_cntl = 0;
7133         u32 dma_cntl, dma_cntl1;
7134
7135         if (!rdev->irq.installed) {
7136                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7137                 return -EINVAL;
7138         }
7139         /* don't enable anything if the ih is disabled */
7140         if (!rdev->ih.enabled) {
7141                 cik_disable_interrupts(rdev);
7142                 /* force the active interrupt state to all disabled */
7143                 cik_disable_interrupt_state(rdev);
7144                 return 0;
7145         }
7146
7147         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7148                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7149         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7150
7151         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7152         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7153         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7154         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7155         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7156         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7157
7158         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7159         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7160
7161         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7162
7163         /* enable CP interrupts on all rings */
7164         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7165                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7166                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7167         }
7168         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7169                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7170                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7171                 if (ring->me == 1) {
7172                         switch (ring->pipe) {
7173                         case 0:
7174                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7175                                 break;
7176                         default:
7177                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7178                                 break;
7179                         }
7180                 } else {
7181                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7182                 }
7183         }
7184         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7185                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7186                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7187                 if (ring->me == 1) {
7188                         switch (ring->pipe) {
7189                         case 0:
7190                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7191                                 break;
7192                         default:
7193                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7194                                 break;
7195                         }
7196                 } else {
7197                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7198                 }
7199         }
7200
7201         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7202                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7203                 dma_cntl |= TRAP_ENABLE;
7204         }
7205
7206         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7207                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7208                 dma_cntl1 |= TRAP_ENABLE;
7209         }
7210
7211         if (rdev->irq.crtc_vblank_int[0] ||
7212             atomic_read(&rdev->irq.pflip[0])) {
7213                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7214                 crtc1 |= VBLANK_INTERRUPT_MASK;
7215         }
7216         if (rdev->irq.crtc_vblank_int[1] ||
7217             atomic_read(&rdev->irq.pflip[1])) {
7218                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7219                 crtc2 |= VBLANK_INTERRUPT_MASK;
7220         }
7221         if (rdev->irq.crtc_vblank_int[2] ||
7222             atomic_read(&rdev->irq.pflip[2])) {
7223                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7224                 crtc3 |= VBLANK_INTERRUPT_MASK;
7225         }
7226         if (rdev->irq.crtc_vblank_int[3] ||
7227             atomic_read(&rdev->irq.pflip[3])) {
7228                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7229                 crtc4 |= VBLANK_INTERRUPT_MASK;
7230         }
7231         if (rdev->irq.crtc_vblank_int[4] ||
7232             atomic_read(&rdev->irq.pflip[4])) {
7233                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7234                 crtc5 |= VBLANK_INTERRUPT_MASK;
7235         }
7236         if (rdev->irq.crtc_vblank_int[5] ||
7237             atomic_read(&rdev->irq.pflip[5])) {
7238                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7239                 crtc6 |= VBLANK_INTERRUPT_MASK;
7240         }
7241         if (rdev->irq.hpd[0]) {
7242                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7243                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7244         }
7245         if (rdev->irq.hpd[1]) {
7246                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7247                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7248         }
7249         if (rdev->irq.hpd[2]) {
7250                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7251                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7252         }
7253         if (rdev->irq.hpd[3]) {
7254                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7255                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7256         }
7257         if (rdev->irq.hpd[4]) {
7258                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7259                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7260         }
7261         if (rdev->irq.hpd[5]) {
7262                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7263                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7264         }
7265
7266         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7267
7268         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7269         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7270
7271         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7272
7273         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7274
7275         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7276         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7277         if (rdev->num_crtc >= 4) {
7278                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7279                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7280         }
7281         if (rdev->num_crtc >= 6) {
7282                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7283                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7284         }
7285
7286         if (rdev->num_crtc >= 2) {
7287                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7288                        GRPH_PFLIP_INT_MASK);
7289                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7290                        GRPH_PFLIP_INT_MASK);
7291         }
7292         if (rdev->num_crtc >= 4) {
7293                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7294                        GRPH_PFLIP_INT_MASK);
7295                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7296                        GRPH_PFLIP_INT_MASK);
7297         }
7298         if (rdev->num_crtc >= 6) {
7299                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7300                        GRPH_PFLIP_INT_MASK);
7301                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7302                        GRPH_PFLIP_INT_MASK);
7303         }
7304
7305         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7306         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7307         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7308         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7309         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7310         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7311
7312         /* posting read */
7313         RREG32(SRBM_STATUS);
7314
7315         return 0;
7316 }
7317
7318 /**
7319  * cik_irq_ack - ack interrupt sources
7320  *
7321  * @rdev: radeon_device pointer
7322  *
7323  * Ack interrupt sources on the GPU (vblanks, hpd,
7324  * etc.) (CIK).  Certain interrupts sources are sw
7325  * generated and do not require an explicit ack.
7326  */
7327 static inline void cik_irq_ack(struct radeon_device *rdev)
7328 {
7329         u32 tmp;
7330
7331         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7332         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7333         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7334         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7335         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7336         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7337         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7338
7339         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7340                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7341         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7342                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7343         if (rdev->num_crtc >= 4) {
7344                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7345                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7346                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7347                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7348         }
7349         if (rdev->num_crtc >= 6) {
7350                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7351                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7352                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7353                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7354         }
7355
7356         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7357                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7358                        GRPH_PFLIP_INT_CLEAR);
7359         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7360                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7361                        GRPH_PFLIP_INT_CLEAR);
7362         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7363                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7364         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7365                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7366         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7367                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7368         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7369                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7370
7371         if (rdev->num_crtc >= 4) {
7372                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7373                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7374                                GRPH_PFLIP_INT_CLEAR);
7375                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7376                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7377                                GRPH_PFLIP_INT_CLEAR);
7378                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7379                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7380                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7381                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7382                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7383                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7384                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7385                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7386         }
7387
7388         if (rdev->num_crtc >= 6) {
7389                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7390                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7391                                GRPH_PFLIP_INT_CLEAR);
7392                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7393                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7394                                GRPH_PFLIP_INT_CLEAR);
7395                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7396                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7397                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7398                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7399                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7400                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7401                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7402                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7403         }
7404
7405         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7406                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7407                 tmp |= DC_HPDx_INT_ACK;
7408                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7409         }
7410         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7411                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7412                 tmp |= DC_HPDx_INT_ACK;
7413                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7414         }
7415         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7416                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7417                 tmp |= DC_HPDx_INT_ACK;
7418                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7419         }
7420         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7421                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7422                 tmp |= DC_HPDx_INT_ACK;
7423                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7424         }
7425         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7426                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7427                 tmp |= DC_HPDx_INT_ACK;
7428                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7429         }
7430         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7431                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7432                 tmp |= DC_HPDx_INT_ACK;
7433                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7434         }
7435         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7436                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7437                 tmp |= DC_HPDx_RX_INT_ACK;
7438                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7439         }
7440         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7441                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7442                 tmp |= DC_HPDx_RX_INT_ACK;
7443                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7444         }
7445         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7446                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7447                 tmp |= DC_HPDx_RX_INT_ACK;
7448                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7449         }
7450         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7451                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7452                 tmp |= DC_HPDx_RX_INT_ACK;
7453                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7454         }
7455         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7456                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7457                 tmp |= DC_HPDx_RX_INT_ACK;
7458                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7459         }
7460         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7461                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7462                 tmp |= DC_HPDx_RX_INT_ACK;
7463                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7464         }
7465 }
7466
7467 /**
7468  * cik_irq_disable - disable interrupts
7469  *
7470  * @rdev: radeon_device pointer
7471  *
7472  * Disable interrupts on the hw (CIK).
7473  */
7474 static void cik_irq_disable(struct radeon_device *rdev)
7475 {
7476         cik_disable_interrupts(rdev);
7477         /* Wait and acknowledge irq */
7478         mdelay(1);
7479         cik_irq_ack(rdev);
7480         cik_disable_interrupt_state(rdev);
7481 }
7482
7483 /**
7484  * cik_irq_disable - disable interrupts for suspend
7485  *
7486  * @rdev: radeon_device pointer
7487  *
7488  * Disable interrupts and stop the RLC (CIK).
7489  * Used for suspend.
7490  */
7491 static void cik_irq_suspend(struct radeon_device *rdev)
7492 {
7493         cik_irq_disable(rdev);
7494         cik_rlc_stop(rdev);
7495 }
7496
7497 /**
7498  * cik_irq_fini - tear down interrupt support
7499  *
7500  * @rdev: radeon_device pointer
7501  *
7502  * Disable interrupts on the hw and free the IH ring
7503  * buffer (CIK).
7504  * Used for driver unload.
7505  */
7506 static void cik_irq_fini(struct radeon_device *rdev)
7507 {
7508         cik_irq_suspend(rdev);
7509         r600_ih_ring_fini(rdev);
7510 }
7511
7512 /**
7513  * cik_get_ih_wptr - get the IH ring buffer wptr
7514  *
7515  * @rdev: radeon_device pointer
7516  *
7517  * Get the IH ring buffer wptr from either the register
7518  * or the writeback memory buffer (CIK).  Also check for
7519  * ring buffer overflow and deal with it.
7520  * Used by cik_irq_process().
7521  * Returns the value of the wptr.
7522  */
7523 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7524 {
7525         u32 wptr, tmp;
7526
7527         if (rdev->wb.enabled)
7528                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7529         else
7530                 wptr = RREG32(IH_RB_WPTR);
7531
7532         if (wptr & RB_OVERFLOW) {
7533                 wptr &= ~RB_OVERFLOW;
7534                 /* When a ring buffer overflow happen start parsing interrupt
7535                  * from the last not overwritten vector (wptr + 16). Hopefully
7536                  * this should allow us to catchup.
7537                  */
7538                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7539                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7540                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7541                 tmp = RREG32(IH_RB_CNTL);
7542                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7543                 WREG32(IH_RB_CNTL, tmp);
7544         }
7545         return (wptr & rdev->ih.ptr_mask);
7546 }
7547
7548 /*        CIK IV Ring
7549  * Each IV ring entry is 128 bits:
7550  * [7:0]    - interrupt source id
7551  * [31:8]   - reserved
7552  * [59:32]  - interrupt source data
7553  * [63:60]  - reserved
7554  * [71:64]  - RINGID
7555  *            CP:
7556  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7557  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7558  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7559  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7560  *            PIPE_ID - ME0 0=3D
7561  *                    - ME1&2 compute dispatcher (4 pipes each)
7562  *            SDMA:
7563  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7564  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7565  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7566  * [79:72]  - VMID
7567  * [95:80]  - PASID
7568  * [127:96] - reserved
7569  */
7570 /**
7571  * cik_irq_process - interrupt handler
7572  *
7573  * @rdev: radeon_device pointer
7574  *
7575  * Interrupt hander (CIK).  Walk the IH ring,
7576  * ack interrupts and schedule work to handle
7577  * interrupt events.
7578  * Returns irq process return code.
7579  */
7580 int cik_irq_process(struct radeon_device *rdev)
7581 {
7582         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7583         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7584         u32 wptr;
7585         u32 rptr;
7586         u32 src_id, src_data, ring_id;
7587         u8 me_id, pipe_id, queue_id;
7588         u32 ring_index;
7589         bool queue_hotplug = false;
7590         bool queue_dp = false;
7591         bool queue_reset = false;
7592         u32 addr, status, mc_client;
7593         bool queue_thermal = false;
7594
7595         if (!rdev->ih.enabled || rdev->shutdown)
7596                 return IRQ_NONE;
7597
7598         wptr = cik_get_ih_wptr(rdev);
7599
7600 restart_ih:
7601         /* is somebody else already processing irqs? */
7602         if (atomic_xchg(&rdev->ih.lock, 1))
7603                 return IRQ_NONE;
7604
7605         rptr = rdev->ih.rptr;
7606         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7607
7608         /* Order reading of wptr vs. reading of IH ring data */
7609         rmb();
7610
7611         /* display interrupts */
7612         cik_irq_ack(rdev);
7613
7614         while (rptr != wptr) {
7615                 /* wptr/rptr are in bytes! */
7616                 ring_index = rptr / 4;
7617
7618                 radeon_kfd_interrupt(rdev,
7619                                 (const void *) &rdev->ih.ring[ring_index]);
7620
7621                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7622                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7623                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7624
7625                 switch (src_id) {
7626                 case 1: /* D1 vblank/vline */
7627                         switch (src_data) {
7628                         case 0: /* D1 vblank */
7629                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7630                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632                                 if (rdev->irq.crtc_vblank_int[0]) {
7633                                         drm_handle_vblank(rdev->ddev, 0);
7634                                         rdev->pm.vblank_sync = true;
7635                                         wake_up(&rdev->irq.vblank_queue);
7636                                 }
7637                                 if (atomic_read(&rdev->irq.pflip[0]))
7638                                         radeon_crtc_handle_vblank(rdev, 0);
7639                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7640                                 DRM_DEBUG("IH: D1 vblank\n");
7641
7642                                 break;
7643                         case 1: /* D1 vline */
7644                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7645                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7648                                 DRM_DEBUG("IH: D1 vline\n");
7649
7650                                 break;
7651                         default:
7652                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653                                 break;
7654                         }
7655                         break;
7656                 case 2: /* D2 vblank/vline */
7657                         switch (src_data) {
7658                         case 0: /* D2 vblank */
7659                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7660                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662                                 if (rdev->irq.crtc_vblank_int[1]) {
7663                                         drm_handle_vblank(rdev->ddev, 1);
7664                                         rdev->pm.vblank_sync = true;
7665                                         wake_up(&rdev->irq.vblank_queue);
7666                                 }
7667                                 if (atomic_read(&rdev->irq.pflip[1]))
7668                                         radeon_crtc_handle_vblank(rdev, 1);
7669                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7670                                 DRM_DEBUG("IH: D2 vblank\n");
7671
7672                                 break;
7673                         case 1: /* D2 vline */
7674                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7675                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7678                                 DRM_DEBUG("IH: D2 vline\n");
7679
7680                                 break;
7681                         default:
7682                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683                                 break;
7684                         }
7685                         break;
7686                 case 3: /* D3 vblank/vline */
7687                         switch (src_data) {
7688                         case 0: /* D3 vblank */
7689                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7690                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692                                 if (rdev->irq.crtc_vblank_int[2]) {
7693                                         drm_handle_vblank(rdev->ddev, 2);
7694                                         rdev->pm.vblank_sync = true;
7695                                         wake_up(&rdev->irq.vblank_queue);
7696                                 }
7697                                 if (atomic_read(&rdev->irq.pflip[2]))
7698                                         radeon_crtc_handle_vblank(rdev, 2);
7699                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7700                                 DRM_DEBUG("IH: D3 vblank\n");
7701
7702                                 break;
7703                         case 1: /* D3 vline */
7704                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7705                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7708                                 DRM_DEBUG("IH: D3 vline\n");
7709
7710                                 break;
7711                         default:
7712                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7713                                 break;
7714                         }
7715                         break;
7716                 case 4: /* D4 vblank/vline */
7717                         switch (src_data) {
7718                         case 0: /* D4 vblank */
7719                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7720                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722                                 if (rdev->irq.crtc_vblank_int[3]) {
7723                                         drm_handle_vblank(rdev->ddev, 3);
7724                                         rdev->pm.vblank_sync = true;
7725                                         wake_up(&rdev->irq.vblank_queue);
7726                                 }
7727                                 if (atomic_read(&rdev->irq.pflip[3]))
7728                                         radeon_crtc_handle_vblank(rdev, 3);
7729                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7730                                 DRM_DEBUG("IH: D4 vblank\n");
7731
7732                                 break;
7733                         case 1: /* D4 vline */
7734                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7735                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7738                                 DRM_DEBUG("IH: D4 vline\n");
7739
7740                                 break;
7741                         default:
7742                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743                                 break;
7744                         }
7745                         break;
7746                 case 5: /* D5 vblank/vline */
7747                         switch (src_data) {
7748                         case 0: /* D5 vblank */
7749                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7750                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752                                 if (rdev->irq.crtc_vblank_int[4]) {
7753                                         drm_handle_vblank(rdev->ddev, 4);
7754                                         rdev->pm.vblank_sync = true;
7755                                         wake_up(&rdev->irq.vblank_queue);
7756                                 }
7757                                 if (atomic_read(&rdev->irq.pflip[4]))
7758                                         radeon_crtc_handle_vblank(rdev, 4);
7759                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7760                                 DRM_DEBUG("IH: D5 vblank\n");
7761
7762                                 break;
7763                         case 1: /* D5 vline */
7764                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7765                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766
7767                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7768                                 DRM_DEBUG("IH: D5 vline\n");
7769
7770                                 break;
7771                         default:
7772                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7773                                 break;
7774                         }
7775                         break;
7776                 case 6: /* D6 vblank/vline */
7777                         switch (src_data) {
7778                         case 0: /* D6 vblank */
7779                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7780                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7781
7782                                 if (rdev->irq.crtc_vblank_int[5]) {
7783                                         drm_handle_vblank(rdev->ddev, 5);
7784                                         rdev->pm.vblank_sync = true;
7785                                         wake_up(&rdev->irq.vblank_queue);
7786                                 }
7787                                 if (atomic_read(&rdev->irq.pflip[5]))
7788                                         radeon_crtc_handle_vblank(rdev, 5);
7789                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7790                                 DRM_DEBUG("IH: D6 vblank\n");
7791
7792                                 break;
7793                         case 1: /* D6 vline */
7794                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7795                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7796
7797                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7798                                 DRM_DEBUG("IH: D6 vline\n");
7799
7800                                 break;
7801                         default:
7802                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7803                                 break;
7804                         }
7805                         break;
7806                 case 8: /* D1 page flip */
7807                 case 10: /* D2 page flip */
7808                 case 12: /* D3 page flip */
7809                 case 14: /* D4 page flip */
7810                 case 16: /* D5 page flip */
7811                 case 18: /* D6 page flip */
7812                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7813                         if (radeon_use_pflipirq > 0)
7814                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7815                         break;
7816                 case 42: /* HPD hotplug */
7817                         switch (src_data) {
7818                         case 0:
7819                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7820                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821
7822                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7823                                 queue_hotplug = true;
7824                                 DRM_DEBUG("IH: HPD1\n");
7825
7826                                 break;
7827                         case 1:
7828                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7829                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830
7831                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7832                                 queue_hotplug = true;
7833                                 DRM_DEBUG("IH: HPD2\n");
7834
7835                                 break;
7836                         case 2:
7837                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7838                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839
7840                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7841                                 queue_hotplug = true;
7842                                 DRM_DEBUG("IH: HPD3\n");
7843
7844                                 break;
7845                         case 3:
7846                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7847                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848
7849                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7850                                 queue_hotplug = true;
7851                                 DRM_DEBUG("IH: HPD4\n");
7852
7853                                 break;
7854                         case 4:
7855                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7856                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857
7858                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7859                                 queue_hotplug = true;
7860                                 DRM_DEBUG("IH: HPD5\n");
7861
7862                                 break;
7863                         case 5:
7864                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7865                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866
7867                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7868                                 queue_hotplug = true;
7869                                 DRM_DEBUG("IH: HPD6\n");
7870
7871                                 break;
7872                         case 6:
7873                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7874                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875
7876                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7877                                 queue_dp = true;
7878                                 DRM_DEBUG("IH: HPD_RX 1\n");
7879
7880                                 break;
7881                         case 7:
7882                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7883                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7884
7885                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7886                                 queue_dp = true;
7887                                 DRM_DEBUG("IH: HPD_RX 2\n");
7888
7889                                 break;
7890                         case 8:
7891                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7892                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7893
7894                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7895                                 queue_dp = true;
7896                                 DRM_DEBUG("IH: HPD_RX 3\n");
7897
7898                                 break;
7899                         case 9:
7900                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7901                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7902
7903                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7904                                 queue_dp = true;
7905                                 DRM_DEBUG("IH: HPD_RX 4\n");
7906
7907                                 break;
7908                         case 10:
7909                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7910                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7911
7912                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7913                                 queue_dp = true;
7914                                 DRM_DEBUG("IH: HPD_RX 5\n");
7915
7916                                 break;
7917                         case 11:
7918                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7919                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7920
7921                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7922                                 queue_dp = true;
7923                                 DRM_DEBUG("IH: HPD_RX 6\n");
7924
7925                                 break;
7926                         default:
7927                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7928                                 break;
7929                         }
7930                         break;
7931                 case 96:
7932                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7933                         WREG32(SRBM_INT_ACK, 0x1);
7934                         break;
7935                 case 124: /* UVD */
7936                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7937                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7938                         break;
7939                 case 146:
7940                 case 147:
7941                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7942                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7943                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7944                         /* reset addr and status */
7945                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7946                         if (addr == 0x0 && status == 0x0)
7947                                 break;
7948                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7949                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7950                                 addr);
7951                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7952                                 status);
7953                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7954                         break;
7955                 case 167: /* VCE */
7956                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7957                         switch (src_data) {
7958                         case 0:
7959                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7960                                 break;
7961                         case 1:
7962                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7963                                 break;
7964                         default:
7965                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7966                                 break;
7967                         }
7968                         break;
7969                 case 176: /* GFX RB CP_INT */
7970                 case 177: /* GFX IB CP_INT */
7971                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7972                         break;
7973                 case 181: /* CP EOP event */
7974                         DRM_DEBUG("IH: CP EOP\n");
7975                         /* XXX check the bitfield order! */
7976                         me_id = (ring_id & 0x60) >> 5;
7977                         pipe_id = (ring_id & 0x18) >> 3;
7978                         queue_id = (ring_id & 0x7) >> 0;
7979                         switch (me_id) {
7980                         case 0:
7981                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7982                                 break;
7983                         case 1:
7984                         case 2:
7985                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7986                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7987                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7988                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7989                                 break;
7990                         }
7991                         break;
7992                 case 184: /* CP Privileged reg access */
7993                         DRM_ERROR("Illegal register access in command stream\n");
7994                         /* XXX check the bitfield order! */
7995                         me_id = (ring_id & 0x60) >> 5;
7996                         pipe_id = (ring_id & 0x18) >> 3;
7997                         queue_id = (ring_id & 0x7) >> 0;
7998                         switch (me_id) {
7999                         case 0:
8000                                 /* This results in a full GPU reset, but all we need to do is soft
8001                                  * reset the CP for gfx
8002                                  */
8003                                 queue_reset = true;
8004                                 break;
8005                         case 1:
8006                                 /* XXX compute */
8007                                 queue_reset = true;
8008                                 break;
8009                         case 2:
8010                                 /* XXX compute */
8011                                 queue_reset = true;
8012                                 break;
8013                         }
8014                         break;
8015                 case 185: /* CP Privileged inst */
8016                         DRM_ERROR("Illegal instruction in command stream\n");
8017                         /* XXX check the bitfield order! */
8018                         me_id = (ring_id & 0x60) >> 5;
8019                         pipe_id = (ring_id & 0x18) >> 3;
8020                         queue_id = (ring_id & 0x7) >> 0;
8021                         switch (me_id) {
8022                         case 0:
8023                                 /* This results in a full GPU reset, but all we need to do is soft
8024                                  * reset the CP for gfx
8025                                  */
8026                                 queue_reset = true;
8027                                 break;
8028                         case 1:
8029                                 /* XXX compute */
8030                                 queue_reset = true;
8031                                 break;
8032                         case 2:
8033                                 /* XXX compute */
8034                                 queue_reset = true;
8035                                 break;
8036                         }
8037                         break;
8038                 case 224: /* SDMA trap event */
8039                         /* XXX check the bitfield order! */
8040                         me_id = (ring_id & 0x3) >> 0;
8041                         queue_id = (ring_id & 0xc) >> 2;
8042                         DRM_DEBUG("IH: SDMA trap\n");
8043                         switch (me_id) {
8044                         case 0:
8045                                 switch (queue_id) {
8046                                 case 0:
8047                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8048                                         break;
8049                                 case 1:
8050                                         /* XXX compute */
8051                                         break;
8052                                 case 2:
8053                                         /* XXX compute */
8054                                         break;
8055                                 }
8056                                 break;
8057                         case 1:
8058                                 switch (queue_id) {
8059                                 case 0:
8060                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8061                                         break;
8062                                 case 1:
8063                                         /* XXX compute */
8064                                         break;
8065                                 case 2:
8066                                         /* XXX compute */
8067                                         break;
8068                                 }
8069                                 break;
8070                         }
8071                         break;
8072                 case 230: /* thermal low to high */
8073                         DRM_DEBUG("IH: thermal low to high\n");
8074                         rdev->pm.dpm.thermal.high_to_low = false;
8075                         queue_thermal = true;
8076                         break;
8077                 case 231: /* thermal high to low */
8078                         DRM_DEBUG("IH: thermal high to low\n");
8079                         rdev->pm.dpm.thermal.high_to_low = true;
8080                         queue_thermal = true;
8081                         break;
8082                 case 233: /* GUI IDLE */
8083                         DRM_DEBUG("IH: GUI idle\n");
8084                         break;
8085                 case 241: /* SDMA Privileged inst */
8086                 case 247: /* SDMA Privileged inst */
8087                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8088                         /* XXX check the bitfield order! */
8089                         me_id = (ring_id & 0x3) >> 0;
8090                         queue_id = (ring_id & 0xc) >> 2;
8091                         switch (me_id) {
8092                         case 0:
8093                                 switch (queue_id) {
8094                                 case 0:
8095                                         queue_reset = true;
8096                                         break;
8097                                 case 1:
8098                                         /* XXX compute */
8099                                         queue_reset = true;
8100                                         break;
8101                                 case 2:
8102                                         /* XXX compute */
8103                                         queue_reset = true;
8104                                         break;
8105                                 }
8106                                 break;
8107                         case 1:
8108                                 switch (queue_id) {
8109                                 case 0:
8110                                         queue_reset = true;
8111                                         break;
8112                                 case 1:
8113                                         /* XXX compute */
8114                                         queue_reset = true;
8115                                         break;
8116                                 case 2:
8117                                         /* XXX compute */
8118                                         queue_reset = true;
8119                                         break;
8120                                 }
8121                                 break;
8122                         }
8123                         break;
8124                 default:
8125                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8126                         break;
8127                 }
8128
8129                 /* wptr/rptr are in bytes! */
8130                 rptr += 16;
8131                 rptr &= rdev->ih.ptr_mask;
8132                 WREG32(IH_RB_RPTR, rptr);
8133         }
8134         if (queue_dp)
8135                 schedule_work(&rdev->dp_work);
8136         if (queue_hotplug)
8137                 schedule_delayed_work(&rdev->hotplug_work, 0);
8138         if (queue_reset) {
8139                 rdev->needs_reset = true;
8140                 wake_up_all(&rdev->fence_queue);
8141         }
8142         if (queue_thermal)
8143                 schedule_work(&rdev->pm.dpm.thermal.work);
8144         rdev->ih.rptr = rptr;
8145         atomic_set(&rdev->ih.lock, 0);
8146
8147         /* make sure wptr hasn't changed while processing */
8148         wptr = cik_get_ih_wptr(rdev);
8149         if (wptr != rptr)
8150                 goto restart_ih;
8151
8152         return IRQ_HANDLED;
8153 }
8154
8155 /*
8156  * startup/shutdown callbacks
8157  */
8158 static void cik_uvd_init(struct radeon_device *rdev)
8159 {
8160         int r;
8161
8162         if (!rdev->has_uvd)
8163                 return;
8164
8165         r = radeon_uvd_init(rdev);
8166         if (r) {
8167                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8168                 /*
8169                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8170                  * to early fails cik_uvd_start() and thus nothing happens
8171                  * there. So it is pointless to try to go through that code
8172                  * hence why we disable uvd here.
8173                  */
8174                 rdev->has_uvd = 0;
8175                 return;
8176         }
8177         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8178         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8179 }
8180
8181 static void cik_uvd_start(struct radeon_device *rdev)
8182 {
8183         int r;
8184
8185         if (!rdev->has_uvd)
8186                 return;
8187
8188         r = radeon_uvd_resume(rdev);
8189         if (r) {
8190                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8191                 goto error;
8192         }
8193         r = uvd_v4_2_resume(rdev);
8194         if (r) {
8195                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8196                 goto error;
8197         }
8198         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8199         if (r) {
8200                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8201                 goto error;
8202         }
8203         return;
8204
8205 error:
8206         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8207 }
8208
8209 static void cik_uvd_resume(struct radeon_device *rdev)
8210 {
8211         struct radeon_ring *ring;
8212         int r;
8213
8214         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8215                 return;
8216
8217         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8218         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, RADEON_CP_PACKET2);
8219         if (r) {
8220                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8221                 return;
8222         }
8223         r = uvd_v1_0_init(rdev);
8224         if (r) {
8225                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8226                 return;
8227         }
8228 }
8229
8230 static void cik_vce_init(struct radeon_device *rdev)
8231 {
8232         int r;
8233
8234         if (!rdev->has_vce)
8235                 return;
8236
8237         r = radeon_vce_init(rdev);
8238         if (r) {
8239                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8240                 /*
8241                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8242                  * to early fails cik_vce_start() and thus nothing happens
8243                  * there. So it is pointless to try to go through that code
8244                  * hence why we disable vce here.
8245                  */
8246                 rdev->has_vce = 0;
8247                 return;
8248         }
8249         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8250         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8251         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8252         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8253 }
8254
8255 static void cik_vce_start(struct radeon_device *rdev)
8256 {
8257         int r;
8258
8259         if (!rdev->has_vce)
8260                 return;
8261
8262         r = radeon_vce_resume(rdev);
8263         if (r) {
8264                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8265                 goto error;
8266         }
8267         r = vce_v2_0_resume(rdev);
8268         if (r) {
8269                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8270                 goto error;
8271         }
8272         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8273         if (r) {
8274                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8275                 goto error;
8276         }
8277         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8278         if (r) {
8279                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8280                 goto error;
8281         }
8282         return;
8283
8284 error:
8285         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8286         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8287 }
8288
8289 static void cik_vce_resume(struct radeon_device *rdev)
8290 {
8291         struct radeon_ring *ring;
8292         int r;
8293
8294         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8295                 return;
8296
8297         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8298         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8299         if (r) {
8300                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8301                 return;
8302         }
8303         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8304         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8305         if (r) {
8306                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8307                 return;
8308         }
8309         r = vce_v1_0_init(rdev);
8310         if (r) {
8311                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8312                 return;
8313         }
8314 }
8315
8316 /**
8317  * cik_startup - program the asic to a functional state
8318  *
8319  * @rdev: radeon_device pointer
8320  *
8321  * Programs the asic to a functional state (CIK).
8322  * Called by cik_init() and cik_resume().
8323  * Returns 0 for success, error for failure.
8324  */
8325 static int cik_startup(struct radeon_device *rdev)
8326 {
8327         struct radeon_ring *ring;
8328         u32 nop;
8329         int r;
8330
8331         /* enable pcie gen2/3 link */
8332         cik_pcie_gen3_enable(rdev);
8333         /* enable aspm */
8334         cik_program_aspm(rdev);
8335
8336         /* scratch needs to be initialized before MC */
8337         r = r600_vram_scratch_init(rdev);
8338         if (r)
8339                 return r;
8340
8341         cik_mc_program(rdev);
8342
8343         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8344                 r = ci_mc_load_microcode(rdev);
8345                 if (r) {
8346                         DRM_ERROR("Failed to load MC firmware!\n");
8347                         return r;
8348                 }
8349         }
8350
8351         r = cik_pcie_gart_enable(rdev);
8352         if (r)
8353                 return r;
8354         cik_gpu_init(rdev);
8355
8356         /* allocate rlc buffers */
8357         if (rdev->flags & RADEON_IS_IGP) {
8358                 if (rdev->family == CHIP_KAVERI) {
8359                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8360                         rdev->rlc.reg_list_size =
8361                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8362                 } else {
8363                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8364                         rdev->rlc.reg_list_size =
8365                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8366                 }
8367         }
8368         rdev->rlc.cs_data = ci_cs_data;
8369         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; /* CP JT */
8370         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8371         r = sumo_rlc_init(rdev);
8372         if (r) {
8373                 DRM_ERROR("Failed to init rlc BOs!\n");
8374                 return r;
8375         }
8376
8377         /* allocate wb buffer */
8378         r = radeon_wb_init(rdev);
8379         if (r)
8380                 return r;
8381
8382         /* allocate mec buffers */
8383         r = cik_mec_init(rdev);
8384         if (r) {
8385                 DRM_ERROR("Failed to init MEC BOs!\n");
8386                 return r;
8387         }
8388
8389         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8390         if (r) {
8391                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8392                 return r;
8393         }
8394
8395         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8396         if (r) {
8397                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8398                 return r;
8399         }
8400
8401         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8402         if (r) {
8403                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8404                 return r;
8405         }
8406
8407         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8408         if (r) {
8409                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8410                 return r;
8411         }
8412
8413         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8414         if (r) {
8415                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8416                 return r;
8417         }
8418
8419         cik_uvd_start(rdev);
8420         cik_vce_start(rdev);
8421
8422         /* Enable IRQ */
8423         if (!rdev->irq.installed) {
8424                 r = radeon_irq_kms_init(rdev);
8425                 if (r)
8426                         return r;
8427         }
8428
8429         r = cik_irq_init(rdev);
8430         if (r) {
8431                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8432                 radeon_irq_kms_fini(rdev);
8433                 return r;
8434         }
8435         cik_irq_set(rdev);
8436
8437         if (rdev->family == CHIP_HAWAII) {
8438                 if (rdev->new_fw)
8439                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8440                 else
8441                         nop = RADEON_CP_PACKET2;
8442         } else {
8443                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8444         }
8445
8446         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8447         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8448                              nop);
8449         if (r)
8450                 return r;
8451
8452         /* set up the compute queues */
8453         /* type-2 packets are deprecated on MEC, use type-3 instead */
8454         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8455         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8456                              nop);
8457         if (r)
8458                 return r;
8459         ring->me = 1; /* first MEC */
8460         ring->pipe = 0; /* first pipe */
8461         ring->queue = 0; /* first queue */
8462         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8463
8464         /* type-2 packets are deprecated on MEC, use type-3 instead */
8465         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8466         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8467                              nop);
8468         if (r)
8469                 return r;
8470         /* dGPU only have 1 MEC */
8471         ring->me = 1; /* first MEC */
8472         ring->pipe = 0; /* first pipe */
8473         ring->queue = 1; /* second queue */
8474         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8475
8476         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8477         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8478                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8479         if (r)
8480                 return r;
8481
8482         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8483         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8484                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8485         if (r)
8486                 return r;
8487
8488         r = cik_cp_resume(rdev);
8489         if (r)
8490                 return r;
8491
8492         r = cik_sdma_resume(rdev);
8493         if (r)
8494                 return r;
8495
8496         cik_uvd_resume(rdev);
8497         cik_vce_resume(rdev);
8498
8499         r = radeon_ib_pool_init(rdev);
8500         if (r) {
8501                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8502                 return r;
8503         }
8504
8505         r = radeon_vm_manager_init(rdev);
8506         if (r) {
8507                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8508                 return r;
8509         }
8510
8511         r = radeon_audio_init(rdev);
8512         if (r)
8513                 return r;
8514
8515         r = radeon_kfd_resume(rdev);
8516         if (r)
8517                 return r;
8518
8519         return 0;
8520 }
8521
8522 /**
8523  * cik_resume - resume the asic to a functional state
8524  *
8525  * @rdev: radeon_device pointer
8526  *
8527  * Programs the asic to a functional state (CIK).
8528  * Called at resume.
8529  * Returns 0 for success, error for failure.
8530  */
8531 int cik_resume(struct radeon_device *rdev)
8532 {
8533         int r;
8534
8535         /* post card */
8536         atom_asic_init(rdev->mode_info.atom_context);
8537
8538         /* init golden registers */
8539         cik_init_golden_registers(rdev);
8540
8541         if (rdev->pm.pm_method == PM_METHOD_DPM)
8542                 radeon_pm_resume(rdev);
8543
8544         rdev->accel_working = true;
8545         r = cik_startup(rdev);
8546         if (r) {
8547                 DRM_ERROR("cik startup failed on resume\n");
8548                 rdev->accel_working = false;
8549                 return r;
8550         }
8551
8552         return r;
8553
8554 }
8555
8556 /**
8557  * cik_suspend - suspend the asic
8558  *
8559  * @rdev: radeon_device pointer
8560  *
8561  * Bring the chip into a state suitable for suspend (CIK).
8562  * Called at suspend.
8563  * Returns 0 for success.
8564  */
8565 int cik_suspend(struct radeon_device *rdev)
8566 {
8567         radeon_kfd_suspend(rdev);
8568         radeon_pm_suspend(rdev);
8569         radeon_audio_fini(rdev);
8570         radeon_vm_manager_fini(rdev);
8571         cik_cp_enable(rdev, false);
8572         cik_sdma_enable(rdev, false);
8573         if (rdev->has_uvd) {
8574                 uvd_v1_0_fini(rdev);
8575                 radeon_uvd_suspend(rdev);
8576         }
8577         if (rdev->has_vce)
8578                 radeon_vce_suspend(rdev);
8579         cik_fini_pg(rdev);
8580         cik_fini_cg(rdev);
8581         cik_irq_suspend(rdev);
8582         radeon_wb_disable(rdev);
8583         cik_pcie_gart_disable(rdev);
8584         return 0;
8585 }
8586
8587 /* Plan is to move initialization in that function and use
8588  * helper function so that radeon_device_init pretty much
8589  * do nothing more than calling asic specific function. This
8590  * should also allow to remove a bunch of callback function
8591  * like vram_info.
8592  */
8593 /**
8594  * cik_init - asic specific driver and hw init
8595  *
8596  * @rdev: radeon_device pointer
8597  *
8598  * Setup asic specific driver variables and program the hw
8599  * to a functional state (CIK).
8600  * Called at driver startup.
8601  * Returns 0 for success, errors for failure.
8602  */
8603 int cik_init(struct radeon_device *rdev)
8604 {
8605         struct radeon_ring *ring;
8606         int r;
8607
8608         /* Read BIOS */
8609         if (!radeon_get_bios(rdev)) {
8610                 if (ASIC_IS_AVIVO(rdev))
8611                         return -EINVAL;
8612         }
8613         /* Must be an ATOMBIOS */
8614         if (!rdev->is_atom_bios) {
8615                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8616                 return -EINVAL;
8617         }
8618         r = radeon_atombios_init(rdev);
8619         if (r)
8620                 return r;
8621
8622         /* Post card if necessary */
8623         if (!radeon_card_posted(rdev)) {
8624                 if (!rdev->bios) {
8625                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8626                         return -EINVAL;
8627                 }
8628                 DRM_INFO("GPU not posted. posting now...\n");
8629                 atom_asic_init(rdev->mode_info.atom_context);
8630         }
8631         /* init golden registers */
8632         cik_init_golden_registers(rdev);
8633         /* Initialize scratch registers */
8634         cik_scratch_init(rdev);
8635         /* Initialize surface registers */
8636         radeon_surface_init(rdev);
8637         /* Initialize clocks */
8638         radeon_get_clock_info(rdev->ddev);
8639
8640         /* Fence driver */
8641         r = radeon_fence_driver_init(rdev);
8642         if (r)
8643                 return r;
8644
8645         /* initialize memory controller */
8646         r = cik_mc_init(rdev);
8647         if (r)
8648                 return r;
8649         /* Memory manager */
8650         r = radeon_bo_init(rdev);
8651         if (r)
8652                 return r;
8653
8654         if (rdev->flags & RADEON_IS_IGP) {
8655                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8656                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8657                         r = cik_init_microcode(rdev);
8658                         if (r) {
8659                                 DRM_ERROR("Failed to load firmware!\n");
8660                                 return r;
8661                         }
8662                 }
8663         } else {
8664                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8665                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8666                     !rdev->mc_fw) {
8667                         r = cik_init_microcode(rdev);
8668                         if (r) {
8669                                 DRM_ERROR("Failed to load firmware!\n");
8670                                 return r;
8671                         }
8672                 }
8673         }
8674
8675         /* Initialize power management */
8676         radeon_pm_init(rdev);
8677
8678         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8679         ring->ring_obj = NULL;
8680         r600_ring_init(rdev, ring, 1024 * 1024);
8681
8682         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8683         ring->ring_obj = NULL;
8684         r600_ring_init(rdev, ring, 1024 * 1024);
8685         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8686         if (r)
8687                 return r;
8688
8689         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8690         ring->ring_obj = NULL;
8691         r600_ring_init(rdev, ring, 1024 * 1024);
8692         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8693         if (r)
8694                 return r;
8695
8696         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8697         ring->ring_obj = NULL;
8698         r600_ring_init(rdev, ring, 256 * 1024);
8699
8700         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8701         ring->ring_obj = NULL;
8702         r600_ring_init(rdev, ring, 256 * 1024);
8703
8704         cik_uvd_init(rdev);
8705         cik_vce_init(rdev);
8706
8707         rdev->ih.ring_obj = NULL;
8708         r600_ih_ring_init(rdev, 64 * 1024);
8709
8710         r = r600_pcie_gart_init(rdev);
8711         if (r)
8712                 return r;
8713
8714         rdev->accel_working = true;
8715         r = cik_startup(rdev);
8716         if (r) {
8717                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8718                 cik_cp_fini(rdev);
8719                 cik_sdma_fini(rdev);
8720                 cik_irq_fini(rdev);
8721                 sumo_rlc_fini(rdev);
8722                 cik_mec_fini(rdev);
8723                 radeon_wb_fini(rdev);
8724                 radeon_ib_pool_fini(rdev);
8725                 radeon_vm_manager_fini(rdev);
8726                 radeon_irq_kms_fini(rdev);
8727                 cik_pcie_gart_fini(rdev);
8728                 rdev->accel_working = false;
8729         }
8730
8731         /* Don't start up if the MC ucode is missing.
8732          * The default clocks and voltages before the MC ucode
8733          * is loaded are not suffient for advanced operations.
8734          */
8735         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8736                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8737                 return -EINVAL;
8738         }
8739
8740         return 0;
8741 }
8742
8743 /**
8744  * cik_fini - asic specific driver and hw fini
8745  *
8746  * @rdev: radeon_device pointer
8747  *
8748  * Tear down the asic specific driver variables and program the hw
8749  * to an idle state (CIK).
8750  * Called at driver unload.
8751  */
8752 void cik_fini(struct radeon_device *rdev)
8753 {
8754         radeon_pm_fini(rdev);
8755         cik_cp_fini(rdev);
8756         cik_sdma_fini(rdev);
8757         cik_fini_pg(rdev);
8758         cik_fini_cg(rdev);
8759         cik_irq_fini(rdev);
8760         sumo_rlc_fini(rdev);
8761         cik_mec_fini(rdev);
8762         radeon_wb_fini(rdev);
8763         radeon_vm_manager_fini(rdev);
8764         radeon_ib_pool_fini(rdev);
8765         radeon_irq_kms_fini(rdev);
8766         uvd_v1_0_fini(rdev);
8767         radeon_uvd_fini(rdev);
8768         radeon_vce_fini(rdev);
8769         cik_pcie_gart_fini(rdev);
8770         r600_vram_scratch_fini(rdev);
8771         radeon_gem_fini(rdev);
8772         radeon_fence_driver_fini(rdev);
8773         radeon_bo_fini(rdev);
8774         radeon_atombios_fini(rdev);
8775         kfree(rdev->bios);
8776         rdev->bios = NULL;
8777 }
8778
8779 void dce8_program_fmt(struct drm_encoder *encoder)
8780 {
8781         struct drm_device *dev = encoder->dev;
8782         struct radeon_device *rdev = dev->dev_private;
8783         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8784         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8785         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8786         int bpc = 0;
8787         u32 tmp = 0;
8788         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8789
8790         if (connector) {
8791                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8792                 bpc = radeon_get_monitor_bpc(connector);
8793                 dither = radeon_connector->dither;
8794         }
8795
8796         /* LVDS/eDP FMT is set up by atom */
8797         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8798                 return;
8799
8800         /* not needed for analog */
8801         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8802             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8803                 return;
8804
8805         if (bpc == 0)
8806                 return;
8807
8808         switch (bpc) {
8809         case 6:
8810                 if (dither == RADEON_FMT_DITHER_ENABLE)
8811                         /* XXX sort out optimal dither settings */
8812                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8813                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8814                 else
8815                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8816                 break;
8817         case 8:
8818                 if (dither == RADEON_FMT_DITHER_ENABLE)
8819                         /* XXX sort out optimal dither settings */
8820                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8821                                 FMT_RGB_RANDOM_ENABLE |
8822                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8823                 else
8824                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8825                 break;
8826         case 10:
8827                 if (dither == RADEON_FMT_DITHER_ENABLE)
8828                         /* XXX sort out optimal dither settings */
8829                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8830                                 FMT_RGB_RANDOM_ENABLE |
8831                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8832                 else
8833                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8834                 break;
8835         default:
8836                 /* not needed */
8837                 break;
8838         }
8839
8840         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8841 }
8842
8843 /* display watermark setup */
8844 /**
8845  * dce8_line_buffer_adjust - Set up the line buffer
8846  *
8847  * @rdev: radeon_device pointer
8848  * @radeon_crtc: the selected display controller
8849  * @mode: the current display mode on the selected display
8850  * controller
8851  *
8852  * Setup up the line buffer allocation for
8853  * the selected display controller (CIK).
8854  * Returns the line buffer size in pixels.
8855  */
8856 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8857                                    struct radeon_crtc *radeon_crtc,
8858                                    struct drm_display_mode *mode)
8859 {
8860         u32 tmp, buffer_alloc, i;
8861         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8862         /*
8863          * Line Buffer Setup
8864          * There are 6 line buffers, one for each display controllers.
8865          * There are 3 partitions per LB. Select the number of partitions
8866          * to enable based on the display width.  For display widths larger
8867          * than 4096, you need use to use 2 display controllers and combine
8868          * them using the stereo blender.
8869          */
8870         if (radeon_crtc->base.enabled && mode) {
8871                 if (mode->crtc_hdisplay < 1920) {
8872                         tmp = 1;
8873                         buffer_alloc = 2;
8874                 } else if (mode->crtc_hdisplay < 2560) {
8875                         tmp = 2;
8876                         buffer_alloc = 2;
8877                 } else if (mode->crtc_hdisplay < 4096) {
8878                         tmp = 0;
8879                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8880                 } else {
8881                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8882                         tmp = 0;
8883                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8884                 }
8885         } else {
8886                 tmp = 1;
8887                 buffer_alloc = 0;
8888         }
8889
8890         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8891                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8892
8893         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8894                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8895         for (i = 0; i < rdev->usec_timeout; i++) {
8896                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8897                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8898                         break;
8899                 udelay(1);
8900         }
8901
8902         if (radeon_crtc->base.enabled && mode) {
8903                 switch (tmp) {
8904                 case 0:
8905                 default:
8906                         return 4096 * 2;
8907                 case 1:
8908                         return 1920 * 2;
8909                 case 2:
8910                         return 2560 * 2;
8911                 }
8912         }
8913
8914         /* controller not enabled, so no lb used */
8915         return 0;
8916 }
8917
8918 /**
8919  * cik_get_number_of_dram_channels - get the number of dram channels
8920  *
8921  * @rdev: radeon_device pointer
8922  *
8923  * Look up the number of video ram channels (CIK).
8924  * Used for display watermark bandwidth calculations
8925  * Returns the number of dram channels
8926  */
8927 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8928 {
8929         u32 tmp = RREG32(MC_SHARED_CHMAP);
8930
8931         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8932         case 0:
8933         default:
8934                 return 1;
8935         case 1:
8936                 return 2;
8937         case 2:
8938                 return 4;
8939         case 3:
8940                 return 8;
8941         case 4:
8942                 return 3;
8943         case 5:
8944                 return 6;
8945         case 6:
8946                 return 10;
8947         case 7:
8948                 return 12;
8949         case 8:
8950                 return 16;
8951         }
8952 }
8953
8954 struct dce8_wm_params {
8955         u32 dram_channels; /* number of dram channels */
8956         u32 yclk;          /* bandwidth per dram data pin in kHz */
8957         u32 sclk;          /* engine clock in kHz */
8958         u32 disp_clk;      /* display clock in kHz */
8959         u32 src_width;     /* viewport width */
8960         u32 active_time;   /* active display time in ns */
8961         u32 blank_time;    /* blank time in ns */
8962         bool interlaced;    /* mode is interlaced */
8963         fixed20_12 vsc;    /* vertical scale ratio */
8964         u32 num_heads;     /* number of active crtcs */
8965         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8966         u32 lb_size;       /* line buffer allocated to pipe */
8967         u32 vtaps;         /* vertical scaler taps */
8968 };
8969
8970 /**
8971  * dce8_dram_bandwidth - get the dram bandwidth
8972  *
8973  * @wm: watermark calculation data
8974  *
8975  * Calculate the raw dram bandwidth (CIK).
8976  * Used for display watermark bandwidth calculations
8977  * Returns the dram bandwidth in MBytes/s
8978  */
8979 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8980 {
8981         /* Calculate raw DRAM Bandwidth */
8982         fixed20_12 dram_efficiency; /* 0.7 */
8983         fixed20_12 yclk, dram_channels, bandwidth;
8984         fixed20_12 a;
8985
8986         a.full = dfixed_const(1000);
8987         yclk.full = dfixed_const(wm->yclk);
8988         yclk.full = dfixed_div(yclk, a);
8989         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8990         a.full = dfixed_const(10);
8991         dram_efficiency.full = dfixed_const(7);
8992         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8993         bandwidth.full = dfixed_mul(dram_channels, yclk);
8994         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8995
8996         return dfixed_trunc(bandwidth);
8997 }
8998
8999 /**
9000  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9001  *
9002  * @wm: watermark calculation data
9003  *
9004  * Calculate the dram bandwidth used for display (CIK).
9005  * Used for display watermark bandwidth calculations
9006  * Returns the dram bandwidth for display in MBytes/s
9007  */
9008 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9009 {
9010         /* Calculate DRAM Bandwidth and the part allocated to display. */
9011         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9012         fixed20_12 yclk, dram_channels, bandwidth;
9013         fixed20_12 a;
9014
9015         a.full = dfixed_const(1000);
9016         yclk.full = dfixed_const(wm->yclk);
9017         yclk.full = dfixed_div(yclk, a);
9018         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9019         a.full = dfixed_const(10);
9020         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9021         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9022         bandwidth.full = dfixed_mul(dram_channels, yclk);
9023         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9024
9025         return dfixed_trunc(bandwidth);
9026 }
9027
9028 /**
9029  * dce8_data_return_bandwidth - get the data return bandwidth
9030  *
9031  * @wm: watermark calculation data
9032  *
9033  * Calculate the data return bandwidth used for display (CIK).
9034  * Used for display watermark bandwidth calculations
9035  * Returns the data return bandwidth in MBytes/s
9036  */
9037 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9038 {
9039         /* Calculate the display Data return Bandwidth */
9040         fixed20_12 return_efficiency; /* 0.8 */
9041         fixed20_12 sclk, bandwidth;
9042         fixed20_12 a;
9043
9044         a.full = dfixed_const(1000);
9045         sclk.full = dfixed_const(wm->sclk);
9046         sclk.full = dfixed_div(sclk, a);
9047         a.full = dfixed_const(10);
9048         return_efficiency.full = dfixed_const(8);
9049         return_efficiency.full = dfixed_div(return_efficiency, a);
9050         a.full = dfixed_const(32);
9051         bandwidth.full = dfixed_mul(a, sclk);
9052         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9053
9054         return dfixed_trunc(bandwidth);
9055 }
9056
9057 /**
9058  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9059  *
9060  * @wm: watermark calculation data
9061  *
9062  * Calculate the dmif bandwidth used for display (CIK).
9063  * Used for display watermark bandwidth calculations
9064  * Returns the dmif bandwidth in MBytes/s
9065  */
9066 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9067 {
9068         /* Calculate the DMIF Request Bandwidth */
9069         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9070         fixed20_12 disp_clk, bandwidth;
9071         fixed20_12 a, b;
9072
9073         a.full = dfixed_const(1000);
9074         disp_clk.full = dfixed_const(wm->disp_clk);
9075         disp_clk.full = dfixed_div(disp_clk, a);
9076         a.full = dfixed_const(32);
9077         b.full = dfixed_mul(a, disp_clk);
9078
9079         a.full = dfixed_const(10);
9080         disp_clk_request_efficiency.full = dfixed_const(8);
9081         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9082
9083         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9084
9085         return dfixed_trunc(bandwidth);
9086 }
9087
9088 /**
9089  * dce8_available_bandwidth - get the min available bandwidth
9090  *
9091  * @wm: watermark calculation data
9092  *
9093  * Calculate the min available bandwidth used for display (CIK).
9094  * Used for display watermark bandwidth calculations
9095  * Returns the min available bandwidth in MBytes/s
9096  */
9097 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9098 {
9099         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9100         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9101         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9102         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9103
9104         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9105 }
9106
9107 /**
9108  * dce8_average_bandwidth - get the average available bandwidth
9109  *
9110  * @wm: watermark calculation data
9111  *
9112  * Calculate the average available bandwidth used for display (CIK).
9113  * Used for display watermark bandwidth calculations
9114  * Returns the average available bandwidth in MBytes/s
9115  */
9116 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9117 {
9118         /* Calculate the display mode Average Bandwidth
9119          * DisplayMode should contain the source and destination dimensions,
9120          * timing, etc.
9121          */
9122         fixed20_12 bpp;
9123         fixed20_12 line_time;
9124         fixed20_12 src_width;
9125         fixed20_12 bandwidth;
9126         fixed20_12 a;
9127
9128         a.full = dfixed_const(1000);
9129         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9130         line_time.full = dfixed_div(line_time, a);
9131         bpp.full = dfixed_const(wm->bytes_per_pixel);
9132         src_width.full = dfixed_const(wm->src_width);
9133         bandwidth.full = dfixed_mul(src_width, bpp);
9134         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9135         bandwidth.full = dfixed_div(bandwidth, line_time);
9136
9137         return dfixed_trunc(bandwidth);
9138 }
9139
9140 /**
9141  * dce8_latency_watermark - get the latency watermark
9142  *
9143  * @wm: watermark calculation data
9144  *
9145  * Calculate the latency watermark (CIK).
9146  * Used for display watermark bandwidth calculations
9147  * Returns the latency watermark in ns
9148  */
9149 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9150 {
9151         /* First calculate the latency in ns */
9152         u32 mc_latency = 2000; /* 2000 ns. */
9153         u32 available_bandwidth = dce8_available_bandwidth(wm);
9154         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9155         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9156         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9157         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9158                 (wm->num_heads * cursor_line_pair_return_time);
9159         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9160         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9161         u32 tmp, dmif_size = 12288;
9162         fixed20_12 a, b, c;
9163
9164         if (wm->num_heads == 0)
9165                 return 0;
9166
9167         a.full = dfixed_const(2);
9168         b.full = dfixed_const(1);
9169         if ((wm->vsc.full > a.full) ||
9170             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9171             (wm->vtaps >= 5) ||
9172             ((wm->vsc.full >= a.full) && wm->interlaced))
9173                 max_src_lines_per_dst_line = 4;
9174         else
9175                 max_src_lines_per_dst_line = 2;
9176
9177         a.full = dfixed_const(available_bandwidth);
9178         b.full = dfixed_const(wm->num_heads);
9179         a.full = dfixed_div(a, b);
9180
9181         b.full = dfixed_const(mc_latency + 512);
9182         c.full = dfixed_const(wm->disp_clk);
9183         b.full = dfixed_div(b, c);
9184
9185         c.full = dfixed_const(dmif_size);
9186         b.full = dfixed_div(c, b);
9187
9188         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9189
9190         b.full = dfixed_const(1000);
9191         c.full = dfixed_const(wm->disp_clk);
9192         b.full = dfixed_div(c, b);
9193         c.full = dfixed_const(wm->bytes_per_pixel);
9194         b.full = dfixed_mul(b, c);
9195
9196         lb_fill_bw = min(tmp, dfixed_trunc(b));
9197
9198         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9199         b.full = dfixed_const(1000);
9200         c.full = dfixed_const(lb_fill_bw);
9201         b.full = dfixed_div(c, b);
9202         a.full = dfixed_div(a, b);
9203         line_fill_time = dfixed_trunc(a);
9204
9205         if (line_fill_time < wm->active_time)
9206                 return latency;
9207         else
9208                 return latency + (line_fill_time - wm->active_time);
9209
9210 }
9211
9212 /**
9213  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9214  * average and available dram bandwidth
9215  *
9216  * @wm: watermark calculation data
9217  *
9218  * Check if the display average bandwidth fits in the display
9219  * dram bandwidth (CIK).
9220  * Used for display watermark bandwidth calculations
9221  * Returns true if the display fits, false if not.
9222  */
9223 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9224 {
9225         if (dce8_average_bandwidth(wm) <=
9226             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9227                 return true;
9228         else
9229                 return false;
9230 }
9231
9232 /**
9233  * dce8_average_bandwidth_vs_available_bandwidth - check
9234  * average and available bandwidth
9235  *
9236  * @wm: watermark calculation data
9237  *
9238  * Check if the display average bandwidth fits in the display
9239  * available bandwidth (CIK).
9240  * Used for display watermark bandwidth calculations
9241  * Returns true if the display fits, false if not.
9242  */
9243 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9244 {
9245         if (dce8_average_bandwidth(wm) <=
9246             (dce8_available_bandwidth(wm) / wm->num_heads))
9247                 return true;
9248         else
9249                 return false;
9250 }
9251
9252 /**
9253  * dce8_check_latency_hiding - check latency hiding
9254  *
9255  * @wm: watermark calculation data
9256  *
9257  * Check latency hiding (CIK).
9258  * Used for display watermark bandwidth calculations
9259  * Returns true if the display fits, false if not.
9260  */
9261 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9262 {
9263         u32 lb_partitions = wm->lb_size / wm->src_width;
9264         u32 line_time = wm->active_time + wm->blank_time;
9265         u32 latency_tolerant_lines;
9266         u32 latency_hiding;
9267         fixed20_12 a;
9268
9269         a.full = dfixed_const(1);
9270         if (wm->vsc.full > a.full)
9271                 latency_tolerant_lines = 1;
9272         else {
9273                 if (lb_partitions <= (wm->vtaps + 1))
9274                         latency_tolerant_lines = 1;
9275                 else
9276                         latency_tolerant_lines = 2;
9277         }
9278
9279         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9280
9281         if (dce8_latency_watermark(wm) <= latency_hiding)
9282                 return true;
9283         else
9284                 return false;
9285 }
9286
9287 /**
9288  * dce8_program_watermarks - program display watermarks
9289  *
9290  * @rdev: radeon_device pointer
9291  * @radeon_crtc: the selected display controller
9292  * @lb_size: line buffer size
9293  * @num_heads: number of display controllers in use
9294  *
9295  * Calculate and program the display watermarks for the
9296  * selected display controller (CIK).
9297  */
9298 static void dce8_program_watermarks(struct radeon_device *rdev,
9299                                     struct radeon_crtc *radeon_crtc,
9300                                     u32 lb_size, u32 num_heads)
9301 {
9302         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9303         struct dce8_wm_params wm_low, wm_high;
9304         u32 pixel_period;
9305         u32 line_time = 0;
9306         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9307         u32 tmp, wm_mask;
9308
9309         if (radeon_crtc->base.enabled && num_heads && mode) {
9310                 pixel_period = 1000000 / (u32)mode->clock;
9311                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9312
9313                 /* watermark for high clocks */
9314                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9315                     rdev->pm.dpm_enabled) {
9316                         wm_high.yclk =
9317                                 radeon_dpm_get_mclk(rdev, false) * 10;
9318                         wm_high.sclk =
9319                                 radeon_dpm_get_sclk(rdev, false) * 10;
9320                 } else {
9321                         wm_high.yclk = rdev->pm.current_mclk * 10;
9322                         wm_high.sclk = rdev->pm.current_sclk * 10;
9323                 }
9324
9325                 wm_high.disp_clk = mode->clock;
9326                 wm_high.src_width = mode->crtc_hdisplay;
9327                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9328                 wm_high.blank_time = line_time - wm_high.active_time;
9329                 wm_high.interlaced = false;
9330                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9331                         wm_high.interlaced = true;
9332                 wm_high.vsc = radeon_crtc->vsc;
9333                 wm_high.vtaps = 1;
9334                 if (radeon_crtc->rmx_type != RMX_OFF)
9335                         wm_high.vtaps = 2;
9336                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9337                 wm_high.lb_size = lb_size;
9338                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9339                 wm_high.num_heads = num_heads;
9340
9341                 /* set for high clocks */
9342                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9343
9344                 /* possibly force display priority to high */
9345                 /* should really do this at mode validation time... */
9346                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9347                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9348                     !dce8_check_latency_hiding(&wm_high) ||
9349                     (rdev->disp_priority == 2)) {
9350                         DRM_DEBUG_KMS("force priority to high\n");
9351                 }
9352
9353                 /* watermark for low clocks */
9354                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9355                     rdev->pm.dpm_enabled) {
9356                         wm_low.yclk =
9357                                 radeon_dpm_get_mclk(rdev, true) * 10;
9358                         wm_low.sclk =
9359                                 radeon_dpm_get_sclk(rdev, true) * 10;
9360                 } else {
9361                         wm_low.yclk = rdev->pm.current_mclk * 10;
9362                         wm_low.sclk = rdev->pm.current_sclk * 10;
9363                 }
9364
9365                 wm_low.disp_clk = mode->clock;
9366                 wm_low.src_width = mode->crtc_hdisplay;
9367                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9368                 wm_low.blank_time = line_time - wm_low.active_time;
9369                 wm_low.interlaced = false;
9370                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9371                         wm_low.interlaced = true;
9372                 wm_low.vsc = radeon_crtc->vsc;
9373                 wm_low.vtaps = 1;
9374                 if (radeon_crtc->rmx_type != RMX_OFF)
9375                         wm_low.vtaps = 2;
9376                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9377                 wm_low.lb_size = lb_size;
9378                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9379                 wm_low.num_heads = num_heads;
9380
9381                 /* set for low clocks */
9382                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9383
9384                 /* possibly force display priority to high */
9385                 /* should really do this at mode validation time... */
9386                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9387                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9388                     !dce8_check_latency_hiding(&wm_low) ||
9389                     (rdev->disp_priority == 2)) {
9390                         DRM_DEBUG_KMS("force priority to high\n");
9391                 }
9392
9393                 /* Save number of lines the linebuffer leads before the scanout */
9394                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9395         }
9396
9397         /* select wm A */
9398         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9399         tmp = wm_mask;
9400         tmp &= ~LATENCY_WATERMARK_MASK(3);
9401         tmp |= LATENCY_WATERMARK_MASK(1);
9402         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9403         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9404                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9405                 LATENCY_HIGH_WATERMARK(line_time)));
9406         /* select wm B */
9407         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9408         tmp &= ~LATENCY_WATERMARK_MASK(3);
9409         tmp |= LATENCY_WATERMARK_MASK(2);
9410         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9411         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9412                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9413                 LATENCY_HIGH_WATERMARK(line_time)));
9414         /* restore original selection */
9415         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9416
9417         /* save values for DPM */
9418         radeon_crtc->line_time = line_time;
9419         radeon_crtc->wm_high = latency_watermark_a;
9420         radeon_crtc->wm_low = latency_watermark_b;
9421 }
9422
9423 /**
9424  * dce8_bandwidth_update - program display watermarks
9425  *
9426  * @rdev: radeon_device pointer
9427  *
9428  * Calculate and program the display watermarks and line
9429  * buffer allocation (CIK).
9430  */
9431 void dce8_bandwidth_update(struct radeon_device *rdev)
9432 {
9433         struct drm_display_mode *mode = NULL;
9434         u32 num_heads = 0, lb_size;
9435         int i;
9436
9437         if (!rdev->mode_info.mode_config_initialized)
9438                 return;
9439
9440         radeon_update_display_priority(rdev);
9441
9442         for (i = 0; i < rdev->num_crtc; i++) {
9443                 if (rdev->mode_info.crtcs[i]->base.enabled)
9444                         num_heads++;
9445         }
9446         for (i = 0; i < rdev->num_crtc; i++) {
9447                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9448                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9449                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9450         }
9451 }
9452
9453 /**
9454  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9455  *
9456  * @rdev: radeon_device pointer
9457  *
9458  * Fetches a GPU clock counter snapshot (SI).
9459  * Returns the 64 bit clock counter snapshot.
9460  */
9461 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9462 {
9463         uint64_t clock;
9464
9465         mutex_lock(&rdev->gpu_clock_mutex);
9466         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9467         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9468                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9469         mutex_unlock(&rdev->gpu_clock_mutex);
9470         return clock;
9471 }
9472
9473 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9474                              u32 cntl_reg, u32 status_reg)
9475 {
9476         int r, i;
9477         struct atom_clock_dividers dividers;
9478         uint32_t tmp;
9479
9480         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9481                                            clock, false, &dividers);
9482         if (r)
9483                 return r;
9484
9485         tmp = RREG32_SMC(cntl_reg);
9486         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9487         tmp |= dividers.post_divider;
9488         WREG32_SMC(cntl_reg, tmp);
9489
9490         for (i = 0; i < 100; i++) {
9491                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9492                         break;
9493                 mdelay(10);
9494         }
9495         if (i == 100)
9496                 return -ETIMEDOUT;
9497
9498         return 0;
9499 }
9500
9501 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9502 {
9503         int r = 0;
9504
9505         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9506         if (r)
9507                 return r;
9508
9509         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9510         return r;
9511 }
9512
9513 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9514 {
9515         int r, i;
9516         struct atom_clock_dividers dividers;
9517         u32 tmp;
9518
9519         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9520                                            ecclk, false, &dividers);
9521         if (r)
9522                 return r;
9523
9524         for (i = 0; i < 100; i++) {
9525                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9526                         break;
9527                 mdelay(10);
9528         }
9529         if (i == 100)
9530                 return -ETIMEDOUT;
9531
9532         tmp = RREG32_SMC(CG_ECLK_CNTL);
9533         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9534         tmp |= dividers.post_divider;
9535         WREG32_SMC(CG_ECLK_CNTL, tmp);
9536
9537         for (i = 0; i < 100; i++) {
9538                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9539                         break;
9540                 mdelay(10);
9541         }
9542         if (i == 100)
9543                 return -ETIMEDOUT;
9544
9545         return 0;
9546 }
9547
9548 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9549 {
9550         struct pci_dev *root = rdev->pdev->bus->self;
9551         int bridge_pos, gpu_pos;
9552         u32 speed_cntl, mask, current_data_rate;
9553         int ret, i;
9554         u16 tmp16;
9555
9556         if (pci_is_root_bus(rdev->pdev->bus))
9557                 return;
9558
9559         if (radeon_pcie_gen2 == 0)
9560                 return;
9561
9562         if (rdev->flags & RADEON_IS_IGP)
9563                 return;
9564
9565         if (!(rdev->flags & RADEON_IS_PCIE))
9566                 return;
9567
9568         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9569         if (ret != 0)
9570                 return;
9571
9572         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9573                 return;
9574
9575         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9576         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9577                 LC_CURRENT_DATA_RATE_SHIFT;
9578         if (mask & DRM_PCIE_SPEED_80) {
9579                 if (current_data_rate == 2) {
9580                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9581                         return;
9582                 }
9583                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9584         } else if (mask & DRM_PCIE_SPEED_50) {
9585                 if (current_data_rate == 1) {
9586                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9587                         return;
9588                 }
9589                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9590         }
9591
9592         bridge_pos = pci_pcie_cap(root);
9593         if (!bridge_pos)
9594                 return;
9595
9596         gpu_pos = pci_pcie_cap(rdev->pdev);
9597         if (!gpu_pos)
9598                 return;
9599
9600         if (mask & DRM_PCIE_SPEED_80) {
9601                 /* re-try equalization if gen3 is not already enabled */
9602                 if (current_data_rate != 2) {
9603                         u16 bridge_cfg, gpu_cfg;
9604                         u16 bridge_cfg2, gpu_cfg2;
9605                         u32 max_lw, current_lw, tmp;
9606
9607                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9608                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9609
9610                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9611                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9612
9613                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9614                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9615
9616                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9617                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9618                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9619
9620                         if (current_lw < max_lw) {
9621                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9622                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9623                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9624                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9625                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9626                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9627                                 }
9628                         }
9629
9630                         for (i = 0; i < 10; i++) {
9631                                 /* check status */
9632                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9633                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9634                                         break;
9635
9636                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9637                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9638
9639                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9640                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9641
9642                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9643                                 tmp |= LC_SET_QUIESCE;
9644                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9645
9646                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9647                                 tmp |= LC_REDO_EQ;
9648                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9649
9650                                 mdelay(100);
9651
9652                                 /* linkctl */
9653                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9654                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9655                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9656                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9657
9658                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9659                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9660                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9661                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9662
9663                                 /* linkctl2 */
9664                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9665                                 tmp16 &= ~((1 << 4) | (7 << 9));
9666                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9667                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9668
9669                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9670                                 tmp16 &= ~((1 << 4) | (7 << 9));
9671                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9672                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9673
9674                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9675                                 tmp &= ~LC_SET_QUIESCE;
9676                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9677                         }
9678                 }
9679         }
9680
9681         /* set the link speed */
9682         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9683         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9684         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9685
9686         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9687         tmp16 &= ~0xf;
9688         if (mask & DRM_PCIE_SPEED_80)
9689                 tmp16 |= 3; /* gen3 */
9690         else if (mask & DRM_PCIE_SPEED_50)
9691                 tmp16 |= 2; /* gen2 */
9692         else
9693                 tmp16 |= 1; /* gen1 */
9694         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9695
9696         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9697         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9698         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9699
9700         for (i = 0; i < rdev->usec_timeout; i++) {
9701                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9702                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9703                         break;
9704                 udelay(1);
9705         }
9706 }
9707
9708 static void cik_program_aspm(struct radeon_device *rdev)
9709 {
9710         u32 data, orig;
9711         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9712         bool disable_clkreq = false;
9713
9714         if (radeon_aspm == 0)
9715                 return;
9716
9717         /* XXX double check IGPs */
9718         if (rdev->flags & RADEON_IS_IGP)
9719                 return;
9720
9721         if (!(rdev->flags & RADEON_IS_PCIE))
9722                 return;
9723
9724         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9725         data &= ~LC_XMIT_N_FTS_MASK;
9726         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9727         if (orig != data)
9728                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9729
9730         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9731         data |= LC_GO_TO_RECOVERY;
9732         if (orig != data)
9733                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9734
9735         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9736         data |= P_IGNORE_EDB_ERR;
9737         if (orig != data)
9738                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9739
9740         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9741         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9742         data |= LC_PMI_TO_L1_DIS;
9743         if (!disable_l0s)
9744                 data |= LC_L0S_INACTIVITY(7);
9745
9746         if (!disable_l1) {
9747                 data |= LC_L1_INACTIVITY(7);
9748                 data &= ~LC_PMI_TO_L1_DIS;
9749                 if (orig != data)
9750                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9751
9752                 if (!disable_plloff_in_l1) {
9753                         bool clk_req_support;
9754
9755                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9756                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9757                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9758                         if (orig != data)
9759                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9760
9761                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9762                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9763                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9764                         if (orig != data)
9765                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9766
9767                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9768                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9769                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9770                         if (orig != data)
9771                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9772
9773                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9774                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9775                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9776                         if (orig != data)
9777                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9778
9779                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9780                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9781                         data |= LC_DYN_LANES_PWR_STATE(3);
9782                         if (orig != data)
9783                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9784
9785                         if (!disable_clkreq &&
9786                             !pci_is_root_bus(rdev->pdev->bus)) {
9787                                 struct pci_dev *root = rdev->pdev->bus->self;
9788                                 u32 lnkcap;
9789
9790                                 clk_req_support = false;
9791                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9792                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9793                                         clk_req_support = true;
9794                         } else {
9795                                 clk_req_support = false;
9796                         }
9797
9798                         if (clk_req_support) {
9799                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9800                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9801                                 if (orig != data)
9802                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9803
9804                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9805                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9806                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9807                                 if (orig != data)
9808                                         WREG32_SMC(THM_CLK_CNTL, data);
9809
9810                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9811                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9812                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9813                                 if (orig != data)
9814                                         WREG32_SMC(MISC_CLK_CTRL, data);
9815
9816                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9817                                 data &= ~BCLK_AS_XCLK;
9818                                 if (orig != data)
9819                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9820
9821                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9822                                 data &= ~FORCE_BIF_REFCLK_EN;
9823                                 if (orig != data)
9824                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9825
9826                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9827                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9828                                 data |= MPLL_CLKOUT_SEL(4);
9829                                 if (orig != data)
9830                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9831                         }
9832                 }
9833         } else {
9834                 if (orig != data)
9835                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9836         }
9837
9838         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9839         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9840         if (orig != data)
9841                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9842
9843         if (!disable_l0s) {
9844                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9845                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9846                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9847                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9848                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9849                                 data &= ~LC_L0S_INACTIVITY_MASK;
9850                                 if (orig != data)
9851                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9852                         }
9853                 }
9854         }
9855 }