Merge tag 'mac80211-for-davem-2016-07-06' of git://git.kernel.org/pub/scm/linux/kerne...
[cascardo/linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142                                           bool enable);
143
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155                                   u32 reg, u32 *val)
156 {
157         switch (reg) {
158         case GRBM_STATUS:
159         case GRBM_STATUS2:
160         case GRBM_STATUS_SE0:
161         case GRBM_STATUS_SE1:
162         case GRBM_STATUS_SE2:
163         case GRBM_STATUS_SE3:
164         case SRBM_STATUS:
165         case SRBM_STATUS2:
166         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168         case UVD_STATUS:
169         /* TODO VCE */
170                 *val = RREG32(reg);
171                 return 0;
172         default:
173                 return -EINVAL;
174         }
175 }
176
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182         unsigned long flags;
183         u32 r;
184
185         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186         WREG32(CIK_DIDT_IND_INDEX, (reg));
187         r = RREG32(CIK_DIDT_IND_DATA);
188         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189         return r;
190 }
191
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194         unsigned long flags;
195
196         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197         WREG32(CIK_DIDT_IND_INDEX, (reg));
198         WREG32(CIK_DIDT_IND_DATA, (v));
199         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205         u32 temp;
206         int actual_temp = 0;
207
208         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209                 CTF_TEMP_SHIFT;
210
211         if (temp & 0x200)
212                 actual_temp = 255;
213         else
214                 actual_temp = temp & 0x1ff;
215
216         actual_temp = actual_temp * 1000;
217
218         return actual_temp;
219 }
220
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224         u32 temp;
225         int actual_temp = 0;
226
227         temp = RREG32_SMC(0xC0300E0C);
228
229         if (temp)
230                 actual_temp = (temp / 8) - 49;
231         else
232                 actual_temp = 0;
233
234         actual_temp = actual_temp * 1000;
235
236         return actual_temp;
237 }
238
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244         unsigned long flags;
245         u32 r;
246
247         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248         WREG32(PCIE_INDEX, reg);
249         (void)RREG32(PCIE_INDEX);
250         r = RREG32(PCIE_DATA);
251         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252         return r;
253 }
254
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257         unsigned long flags;
258
259         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260         WREG32(PCIE_INDEX, reg);
261         (void)RREG32(PCIE_INDEX);
262         WREG32(PCIE_DATA, v);
263         (void)RREG32(PCIE_DATA);
264         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269         (0x0e00 << 16) | (0xc12c >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc140 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc150 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc15c >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc168 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc170 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc178 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc204 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc2b4 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc2b8 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2bc >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2c0 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0x8228 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0x829c >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x869c >> 2),
298         0x00000000,
299         (0x0600 << 16) | (0x98f4 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x98f8 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0x9900 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc260 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0x90e8 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x3c000 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x3c00c >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x8c1c >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x9700 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xcd20 >> 2),
318         0x00000000,
319         (0x4e00 << 16) | (0xcd20 >> 2),
320         0x00000000,
321         (0x5e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x6e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x7e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x8e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x9e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0xae00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0xbe00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0x89bc >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x8900 >> 2),
338         0x00000000,
339         0x3,
340         (0x0e00 << 16) | (0xc130 >> 2),
341         0x00000000,
342         (0x0e00 << 16) | (0xc134 >> 2),
343         0x00000000,
344         (0x0e00 << 16) | (0xc1fc >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc208 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc264 >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc268 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc26c >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc270 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc274 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc278 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc27c >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc280 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc284 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc288 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc28c >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc290 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc294 >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc298 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc29c >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc2a0 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc2a4 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a8 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2ac  >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2b0 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x301d0 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x30238 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30250 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30254 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30258 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x3025c >> 2),
399         0x00000000,
400         (0x4e00 << 16) | (0xc900 >> 2),
401         0x00000000,
402         (0x5e00 << 16) | (0xc900 >> 2),
403         0x00000000,
404         (0x6e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x7e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x8e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x9e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0xae00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0xbe00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0x4e00 << 16) | (0xc904 >> 2),
417         0x00000000,
418         (0x5e00 << 16) | (0xc904 >> 2),
419         0x00000000,
420         (0x6e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x7e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x8e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x9e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0xae00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0xbe00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0x4e00 << 16) | (0xc908 >> 2),
433         0x00000000,
434         (0x5e00 << 16) | (0xc908 >> 2),
435         0x00000000,
436         (0x6e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x7e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x8e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x9e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0xae00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0xbe00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0x4e00 << 16) | (0xc90c >> 2),
449         0x00000000,
450         (0x5e00 << 16) | (0xc90c >> 2),
451         0x00000000,
452         (0x6e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x7e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x8e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x9e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0xae00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0xbe00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0x4e00 << 16) | (0xc910 >> 2),
465         0x00000000,
466         (0x5e00 << 16) | (0xc910 >> 2),
467         0x00000000,
468         (0x6e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x7e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x8e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x9e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0xae00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0xbe00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0xc99c >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x9834 >> 2),
483         0x00000000,
484         (0x0000 << 16) | (0x30f00 >> 2),
485         0x00000000,
486         (0x0001 << 16) | (0x30f00 >> 2),
487         0x00000000,
488         (0x0000 << 16) | (0x30f04 >> 2),
489         0x00000000,
490         (0x0001 << 16) | (0x30f04 >> 2),
491         0x00000000,
492         (0x0000 << 16) | (0x30f08 >> 2),
493         0x00000000,
494         (0x0001 << 16) | (0x30f08 >> 2),
495         0x00000000,
496         (0x0000 << 16) | (0x30f0c >> 2),
497         0x00000000,
498         (0x0001 << 16) | (0x30f0c >> 2),
499         0x00000000,
500         (0x0600 << 16) | (0x9b7c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0x8a14 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x8a18 >> 2),
505         0x00000000,
506         (0x0600 << 16) | (0x30a00 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8bf0 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x8bcc >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8b24 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x30a04 >> 2),
515         0x00000000,
516         (0x0600 << 16) | (0x30a10 >> 2),
517         0x00000000,
518         (0x0600 << 16) | (0x30a14 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a18 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a2c >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0xc700 >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xc704 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc708 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc768 >> 2),
531         0x00000000,
532         (0x0400 << 16) | (0xc770 >> 2),
533         0x00000000,
534         (0x0400 << 16) | (0xc774 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc778 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc77c >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc780 >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc784 >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc788 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc78c >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc798 >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc79c >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc7a0 >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc7a4 >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a8 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7ac >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7b0 >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7b4 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x9100 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x3c010 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x92a8 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x92ac >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92b4 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92b8 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92bc >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92c0 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92c4 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c8 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92cc >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92d0 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x8c00 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8c04 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c20 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c38 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c3c >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xae00 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x9604 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xac08 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xac0c >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac10 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac14 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac58 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac68 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac6c >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac70 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac74 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac78 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac7c >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac80 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac84 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac88 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac8c >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x970c >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9714 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x9718 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x971c >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x31068 >> 2),
641         0x00000000,
642         (0x4e00 << 16) | (0x31068 >> 2),
643         0x00000000,
644         (0x5e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x6e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x7e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x8e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x9e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0xae00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0xbe00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0xcd10 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xcd14 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0x88b0 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x88b4 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b8 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88bc >> 2),
669         0x00000000,
670         (0x0400 << 16) | (0x89c0 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x88c4 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x88c8 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88d0 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88d4 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d8 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x8980 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x30938 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x3093c >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x30940 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x89a0 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x30900 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30904 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x89b4 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x3c210 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x3c214 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c218 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x8904 >> 2),
705         0x00000000,
706         0x5,
707         (0x0e00 << 16) | (0x8c28 >> 2),
708         (0x0e00 << 16) | (0x8c2c >> 2),
709         (0x0e00 << 16) | (0x8c30 >> 2),
710         (0x0e00 << 16) | (0x8c34 >> 2),
711         (0x0e00 << 16) | (0x9600 >> 2),
712 };
713
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716         (0x0e00 << 16) | (0xc12c >> 2),
717         0x00000000,
718         (0x0e00 << 16) | (0xc140 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc150 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc15c >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc168 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc170 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc204 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc2b4 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc2b8 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2bc >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2c0 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0x8228 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x829c >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x869c >> 2),
743         0x00000000,
744         (0x0600 << 16) | (0x98f4 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x98f8 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x9900 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc260 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x90e8 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x3c000 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x3c00c >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x8c1c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x9700 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0xcd20 >> 2),
763         0x00000000,
764         (0x4e00 << 16) | (0xcd20 >> 2),
765         0x00000000,
766         (0x5e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x6e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x7e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x89bc >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x8900 >> 2),
775         0x00000000,
776         0x3,
777         (0x0e00 << 16) | (0xc130 >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0xc134 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc1fc >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc208 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc264 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc268 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc26c >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc270 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc274 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc28c >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc290 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc294 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc298 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc2a0 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc2a4 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a8 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2ac >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x301d0 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x30238 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x30250 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30254 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30258 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x3025c >> 2),
822         0x00000000,
823         (0x4e00 << 16) | (0xc900 >> 2),
824         0x00000000,
825         (0x5e00 << 16) | (0xc900 >> 2),
826         0x00000000,
827         (0x6e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x7e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x4e00 << 16) | (0xc904 >> 2),
832         0x00000000,
833         (0x5e00 << 16) | (0xc904 >> 2),
834         0x00000000,
835         (0x6e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x7e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x4e00 << 16) | (0xc908 >> 2),
840         0x00000000,
841         (0x5e00 << 16) | (0xc908 >> 2),
842         0x00000000,
843         (0x6e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x7e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x4e00 << 16) | (0xc90c >> 2),
848         0x00000000,
849         (0x5e00 << 16) | (0xc90c >> 2),
850         0x00000000,
851         (0x6e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x7e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x4e00 << 16) | (0xc910 >> 2),
856         0x00000000,
857         (0x5e00 << 16) | (0xc910 >> 2),
858         0x00000000,
859         (0x6e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x7e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0xc99c >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0x9834 >> 2),
866         0x00000000,
867         (0x0000 << 16) | (0x30f00 >> 2),
868         0x00000000,
869         (0x0000 << 16) | (0x30f04 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f08 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f0c >> 2),
874         0x00000000,
875         (0x0600 << 16) | (0x9b7c >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x8a14 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x8a18 >> 2),
880         0x00000000,
881         (0x0600 << 16) | (0x30a00 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8bf0 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x8bcc >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8b24 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x30a04 >> 2),
890         0x00000000,
891         (0x0600 << 16) | (0x30a10 >> 2),
892         0x00000000,
893         (0x0600 << 16) | (0x30a14 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a18 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a2c >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0xc700 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xc704 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc708 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc768 >> 2),
906         0x00000000,
907         (0x0400 << 16) | (0xc770 >> 2),
908         0x00000000,
909         (0x0400 << 16) | (0xc774 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc798 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc79c >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x9100 >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x3c010 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x8c00 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x8c04 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c20 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c38 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c3c >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0xae00 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x9604 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xac08 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0xac0c >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac10 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac14 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac58 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac68 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac6c >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac70 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac74 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac78 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac7c >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac80 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac84 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac88 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac8c >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0x970c >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x9714 >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x9718 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x971c >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x31068 >> 2),
972         0x00000000,
973         (0x4e00 << 16) | (0x31068 >> 2),
974         0x00000000,
975         (0x5e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x6e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x7e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x0e00 << 16) | (0xcd10 >> 2),
982         0x00000000,
983         (0x0e00 << 16) | (0xcd14 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0x88b0 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0x88b4 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b8 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88bc >> 2),
992         0x00000000,
993         (0x0400 << 16) | (0x89c0 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x88c4 >> 2),
996         0x00000000,
997         (0x0e00 << 16) | (0x88c8 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88d0 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88d4 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d8 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x8980 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x30938 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x3093c >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x30940 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x89a0 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30900 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x30904 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x89b4 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x3e1fc >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x3c210 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3c214 >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c218 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x8904 >> 2),
1030         0x00000000,
1031         0x5,
1032         (0x0e00 << 16) | (0x8c28 >> 2),
1033         (0x0e00 << 16) | (0x8c2c >> 2),
1034         (0x0e00 << 16) | (0x8c30 >> 2),
1035         (0x0e00 << 16) | (0x8c34 >> 2),
1036         (0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041         0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046         0xc770, 0xffffffff, 0x00000800,
1047         0xc774, 0xffffffff, 0x00000800,
1048         0xc798, 0xffffffff, 0x00007fbf,
1049         0xc79c, 0xffffffff, 0x00007faf
1050 };
1051
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054         0x3354, 0x00000333, 0x00000333,
1055         0x3350, 0x000c0fc0, 0x00040200,
1056         0x9a10, 0x00010000, 0x00058208,
1057         0x3c000, 0xffff1fff, 0x00140000,
1058         0x3c200, 0xfdfc0fff, 0x00000100,
1059         0x3c234, 0x40000000, 0x40000200,
1060         0x9830, 0xffffffff, 0x00000000,
1061         0x9834, 0xf00fffff, 0x00000400,
1062         0x9838, 0x0002021c, 0x00020200,
1063         0xc78, 0x00000080, 0x00000000,
1064         0x5bb0, 0x000000f0, 0x00000070,
1065         0x5bc0, 0xf0311fff, 0x80300000,
1066         0x98f8, 0x73773777, 0x12010001,
1067         0x350c, 0x00810000, 0x408af000,
1068         0x7030, 0x31000111, 0x00000011,
1069         0x2f48, 0x73773777, 0x12010001,
1070         0x220c, 0x00007fb6, 0x0021a1b1,
1071         0x2210, 0x00007fb6, 0x002021b1,
1072         0x2180, 0x00007fb6, 0x00002191,
1073         0x2218, 0x00007fb6, 0x002121b1,
1074         0x221c, 0x00007fb6, 0x002021b1,
1075         0x21dc, 0x00007fb6, 0x00002191,
1076         0x21e0, 0x00007fb6, 0x00002191,
1077         0x3628, 0x0000003f, 0x0000000a,
1078         0x362c, 0x0000003f, 0x0000000a,
1079         0x2ae4, 0x00073ffe, 0x000022a2,
1080         0x240c, 0x000007ff, 0x00000000,
1081         0x8a14, 0xf000003f, 0x00000007,
1082         0x8bf0, 0x00002001, 0x00000001,
1083         0x8b24, 0xffffffff, 0x00ffffff,
1084         0x30a04, 0x0000ff0f, 0x00000000,
1085         0x28a4c, 0x07ffffff, 0x06000000,
1086         0x4d8, 0x00000fff, 0x00000100,
1087         0x3e78, 0x00000001, 0x00000002,
1088         0x9100, 0x03000000, 0x0362c688,
1089         0x8c00, 0x000000ff, 0x00000001,
1090         0xe40, 0x00001fff, 0x00001fff,
1091         0x9060, 0x0000007f, 0x00000020,
1092         0x9508, 0x00010000, 0x00010000,
1093         0xac14, 0x000003ff, 0x000000f3,
1094         0xac0c, 0xffffffff, 0x00001032
1095 };
1096
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099         0xc420, 0xffffffff, 0xfffffffc,
1100         0x30800, 0xffffffff, 0xe0000000,
1101         0x3c2a0, 0xffffffff, 0x00000100,
1102         0x3c208, 0xffffffff, 0x00000100,
1103         0x3c2c0, 0xffffffff, 0xc0000100,
1104         0x3c2c8, 0xffffffff, 0xc0000100,
1105         0x3c2c4, 0xffffffff, 0xc0000100,
1106         0x55e4, 0xffffffff, 0x00600100,
1107         0x3c280, 0xffffffff, 0x00000100,
1108         0x3c214, 0xffffffff, 0x06000100,
1109         0x3c220, 0xffffffff, 0x00000100,
1110         0x3c218, 0xffffffff, 0x06000100,
1111         0x3c204, 0xffffffff, 0x00000100,
1112         0x3c2e0, 0xffffffff, 0x00000100,
1113         0x3c224, 0xffffffff, 0x00000100,
1114         0x3c200, 0xffffffff, 0x00000100,
1115         0x3c230, 0xffffffff, 0x00000100,
1116         0x3c234, 0xffffffff, 0x00000100,
1117         0x3c250, 0xffffffff, 0x00000100,
1118         0x3c254, 0xffffffff, 0x00000100,
1119         0x3c258, 0xffffffff, 0x00000100,
1120         0x3c25c, 0xffffffff, 0x00000100,
1121         0x3c260, 0xffffffff, 0x00000100,
1122         0x3c27c, 0xffffffff, 0x00000100,
1123         0x3c278, 0xffffffff, 0x00000100,
1124         0x3c210, 0xffffffff, 0x06000100,
1125         0x3c290, 0xffffffff, 0x00000100,
1126         0x3c274, 0xffffffff, 0x00000100,
1127         0x3c2b4, 0xffffffff, 0x00000100,
1128         0x3c2b0, 0xffffffff, 0x00000100,
1129         0x3c270, 0xffffffff, 0x00000100,
1130         0x30800, 0xffffffff, 0xe0000000,
1131         0x3c020, 0xffffffff, 0x00010000,
1132         0x3c024, 0xffffffff, 0x00030002,
1133         0x3c028, 0xffffffff, 0x00040007,
1134         0x3c02c, 0xffffffff, 0x00060005,
1135         0x3c030, 0xffffffff, 0x00090008,
1136         0x3c034, 0xffffffff, 0x00010000,
1137         0x3c038, 0xffffffff, 0x00030002,
1138         0x3c03c, 0xffffffff, 0x00040007,
1139         0x3c040, 0xffffffff, 0x00060005,
1140         0x3c044, 0xffffffff, 0x00090008,
1141         0x3c048, 0xffffffff, 0x00010000,
1142         0x3c04c, 0xffffffff, 0x00030002,
1143         0x3c050, 0xffffffff, 0x00040007,
1144         0x3c054, 0xffffffff, 0x00060005,
1145         0x3c058, 0xffffffff, 0x00090008,
1146         0x3c05c, 0xffffffff, 0x00010000,
1147         0x3c060, 0xffffffff, 0x00030002,
1148         0x3c064, 0xffffffff, 0x00040007,
1149         0x3c068, 0xffffffff, 0x00060005,
1150         0x3c06c, 0xffffffff, 0x00090008,
1151         0x3c070, 0xffffffff, 0x00010000,
1152         0x3c074, 0xffffffff, 0x00030002,
1153         0x3c078, 0xffffffff, 0x00040007,
1154         0x3c07c, 0xffffffff, 0x00060005,
1155         0x3c080, 0xffffffff, 0x00090008,
1156         0x3c084, 0xffffffff, 0x00010000,
1157         0x3c088, 0xffffffff, 0x00030002,
1158         0x3c08c, 0xffffffff, 0x00040007,
1159         0x3c090, 0xffffffff, 0x00060005,
1160         0x3c094, 0xffffffff, 0x00090008,
1161         0x3c098, 0xffffffff, 0x00010000,
1162         0x3c09c, 0xffffffff, 0x00030002,
1163         0x3c0a0, 0xffffffff, 0x00040007,
1164         0x3c0a4, 0xffffffff, 0x00060005,
1165         0x3c0a8, 0xffffffff, 0x00090008,
1166         0x3c000, 0xffffffff, 0x96e00200,
1167         0x8708, 0xffffffff, 0x00900100,
1168         0xc424, 0xffffffff, 0x0020003f,
1169         0x38, 0xffffffff, 0x0140001c,
1170         0x3c, 0x000f0000, 0x000f0000,
1171         0x220, 0xffffffff, 0xC060000C,
1172         0x224, 0xc0000fff, 0x00000100,
1173         0xf90, 0xffffffff, 0x00000100,
1174         0xf98, 0x00000101, 0x00000000,
1175         0x20a8, 0xffffffff, 0x00000104,
1176         0x55e4, 0xff000fff, 0x00000100,
1177         0x30cc, 0xc0000fff, 0x00000104,
1178         0xc1e4, 0x00000001, 0x00000001,
1179         0xd00c, 0xff000ff0, 0x00000100,
1180         0xd80c, 0xff000ff0, 0x00000100
1181 };
1182
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185         0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190         0xc770, 0xffffffff, 0x00000800,
1191         0xc774, 0xffffffff, 0x00000800,
1192         0xc798, 0xffffffff, 0x00007fbf,
1193         0xc79c, 0xffffffff, 0x00007faf
1194 };
1195
1196 static const u32 spectre_golden_registers[] =
1197 {
1198         0x3c000, 0xffff1fff, 0x96940200,
1199         0x3c00c, 0xffff0001, 0xff000000,
1200         0x3c200, 0xfffc0fff, 0x00000100,
1201         0x6ed8, 0x00010101, 0x00010000,
1202         0x9834, 0xf00fffff, 0x00000400,
1203         0x9838, 0xfffffffc, 0x00020200,
1204         0x5bb0, 0x000000f0, 0x00000070,
1205         0x5bc0, 0xf0311fff, 0x80300000,
1206         0x98f8, 0x73773777, 0x12010001,
1207         0x9b7c, 0x00ff0000, 0x00fc0000,
1208         0x2f48, 0x73773777, 0x12010001,
1209         0x8a14, 0xf000003f, 0x00000007,
1210         0x8b24, 0xffffffff, 0x00ffffff,
1211         0x28350, 0x3f3f3fff, 0x00000082,
1212         0x28354, 0x0000003f, 0x00000000,
1213         0x3e78, 0x00000001, 0x00000002,
1214         0x913c, 0xffff03df, 0x00000004,
1215         0xc768, 0x00000008, 0x00000008,
1216         0x8c00, 0x000008ff, 0x00000800,
1217         0x9508, 0x00010000, 0x00010000,
1218         0xac0c, 0xffffffff, 0x54763210,
1219         0x214f8, 0x01ff01ff, 0x00000002,
1220         0x21498, 0x007ff800, 0x00200000,
1221         0x2015c, 0xffffffff, 0x00000f40,
1222         0x30934, 0xffffffff, 0x00000001
1223 };
1224
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227         0xc420, 0xffffffff, 0xfffffffc,
1228         0x30800, 0xffffffff, 0xe0000000,
1229         0x3c2a0, 0xffffffff, 0x00000100,
1230         0x3c208, 0xffffffff, 0x00000100,
1231         0x3c2c0, 0xffffffff, 0x00000100,
1232         0x3c2c8, 0xffffffff, 0x00000100,
1233         0x3c2c4, 0xffffffff, 0x00000100,
1234         0x55e4, 0xffffffff, 0x00600100,
1235         0x3c280, 0xffffffff, 0x00000100,
1236         0x3c214, 0xffffffff, 0x06000100,
1237         0x3c220, 0xffffffff, 0x00000100,
1238         0x3c218, 0xffffffff, 0x06000100,
1239         0x3c204, 0xffffffff, 0x00000100,
1240         0x3c2e0, 0xffffffff, 0x00000100,
1241         0x3c224, 0xffffffff, 0x00000100,
1242         0x3c200, 0xffffffff, 0x00000100,
1243         0x3c230, 0xffffffff, 0x00000100,
1244         0x3c234, 0xffffffff, 0x00000100,
1245         0x3c250, 0xffffffff, 0x00000100,
1246         0x3c254, 0xffffffff, 0x00000100,
1247         0x3c258, 0xffffffff, 0x00000100,
1248         0x3c25c, 0xffffffff, 0x00000100,
1249         0x3c260, 0xffffffff, 0x00000100,
1250         0x3c27c, 0xffffffff, 0x00000100,
1251         0x3c278, 0xffffffff, 0x00000100,
1252         0x3c210, 0xffffffff, 0x06000100,
1253         0x3c290, 0xffffffff, 0x00000100,
1254         0x3c274, 0xffffffff, 0x00000100,
1255         0x3c2b4, 0xffffffff, 0x00000100,
1256         0x3c2b0, 0xffffffff, 0x00000100,
1257         0x3c270, 0xffffffff, 0x00000100,
1258         0x30800, 0xffffffff, 0xe0000000,
1259         0x3c020, 0xffffffff, 0x00010000,
1260         0x3c024, 0xffffffff, 0x00030002,
1261         0x3c028, 0xffffffff, 0x00040007,
1262         0x3c02c, 0xffffffff, 0x00060005,
1263         0x3c030, 0xffffffff, 0x00090008,
1264         0x3c034, 0xffffffff, 0x00010000,
1265         0x3c038, 0xffffffff, 0x00030002,
1266         0x3c03c, 0xffffffff, 0x00040007,
1267         0x3c040, 0xffffffff, 0x00060005,
1268         0x3c044, 0xffffffff, 0x00090008,
1269         0x3c048, 0xffffffff, 0x00010000,
1270         0x3c04c, 0xffffffff, 0x00030002,
1271         0x3c050, 0xffffffff, 0x00040007,
1272         0x3c054, 0xffffffff, 0x00060005,
1273         0x3c058, 0xffffffff, 0x00090008,
1274         0x3c05c, 0xffffffff, 0x00010000,
1275         0x3c060, 0xffffffff, 0x00030002,
1276         0x3c064, 0xffffffff, 0x00040007,
1277         0x3c068, 0xffffffff, 0x00060005,
1278         0x3c06c, 0xffffffff, 0x00090008,
1279         0x3c070, 0xffffffff, 0x00010000,
1280         0x3c074, 0xffffffff, 0x00030002,
1281         0x3c078, 0xffffffff, 0x00040007,
1282         0x3c07c, 0xffffffff, 0x00060005,
1283         0x3c080, 0xffffffff, 0x00090008,
1284         0x3c084, 0xffffffff, 0x00010000,
1285         0x3c088, 0xffffffff, 0x00030002,
1286         0x3c08c, 0xffffffff, 0x00040007,
1287         0x3c090, 0xffffffff, 0x00060005,
1288         0x3c094, 0xffffffff, 0x00090008,
1289         0x3c098, 0xffffffff, 0x00010000,
1290         0x3c09c, 0xffffffff, 0x00030002,
1291         0x3c0a0, 0xffffffff, 0x00040007,
1292         0x3c0a4, 0xffffffff, 0x00060005,
1293         0x3c0a8, 0xffffffff, 0x00090008,
1294         0x3c0ac, 0xffffffff, 0x00010000,
1295         0x3c0b0, 0xffffffff, 0x00030002,
1296         0x3c0b4, 0xffffffff, 0x00040007,
1297         0x3c0b8, 0xffffffff, 0x00060005,
1298         0x3c0bc, 0xffffffff, 0x00090008,
1299         0x3c000, 0xffffffff, 0x96e00200,
1300         0x8708, 0xffffffff, 0x00900100,
1301         0xc424, 0xffffffff, 0x0020003f,
1302         0x38, 0xffffffff, 0x0140001c,
1303         0x3c, 0x000f0000, 0x000f0000,
1304         0x220, 0xffffffff, 0xC060000C,
1305         0x224, 0xc0000fff, 0x00000100,
1306         0xf90, 0xffffffff, 0x00000100,
1307         0xf98, 0x00000101, 0x00000000,
1308         0x20a8, 0xffffffff, 0x00000104,
1309         0x55e4, 0xff000fff, 0x00000100,
1310         0x30cc, 0xc0000fff, 0x00000104,
1311         0xc1e4, 0x00000001, 0x00000001,
1312         0xd00c, 0xff000ff0, 0x00000100,
1313         0xd80c, 0xff000ff0, 0x00000100
1314 };
1315
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318         0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323         0xc770, 0xffffffff, 0x00000800,
1324         0xc774, 0xffffffff, 0x00000800,
1325         0xc798, 0xffffffff, 0x00007fbf,
1326         0xc79c, 0xffffffff, 0x00007faf
1327 };
1328
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331         0x3c000, 0xffffdfff, 0x6e944040,
1332         0x55e4, 0xff607fff, 0xfc000100,
1333         0x3c220, 0xff000fff, 0x00000100,
1334         0x3c224, 0xff000fff, 0x00000100,
1335         0x3c200, 0xfffc0fff, 0x00000100,
1336         0x6ed8, 0x00010101, 0x00010000,
1337         0x9830, 0xffffffff, 0x00000000,
1338         0x9834, 0xf00fffff, 0x00000400,
1339         0x5bb0, 0x000000f0, 0x00000070,
1340         0x5bc0, 0xf0311fff, 0x80300000,
1341         0x98f8, 0x73773777, 0x12010001,
1342         0x98fc, 0xffffffff, 0x00000010,
1343         0x9b7c, 0x00ff0000, 0x00fc0000,
1344         0x8030, 0x00001f0f, 0x0000100a,
1345         0x2f48, 0x73773777, 0x12010001,
1346         0x2408, 0x000fffff, 0x000c007f,
1347         0x8a14, 0xf000003f, 0x00000007,
1348         0x8b24, 0x3fff3fff, 0x00ffcfff,
1349         0x30a04, 0x0000ff0f, 0x00000000,
1350         0x28a4c, 0x07ffffff, 0x06000000,
1351         0x4d8, 0x00000fff, 0x00000100,
1352         0x3e78, 0x00000001, 0x00000002,
1353         0xc768, 0x00000008, 0x00000008,
1354         0x8c00, 0x000000ff, 0x00000003,
1355         0x214f8, 0x01ff01ff, 0x00000002,
1356         0x21498, 0x007ff800, 0x00200000,
1357         0x2015c, 0xffffffff, 0x00000f40,
1358         0x88c4, 0x001f3ae3, 0x00000082,
1359         0x88d4, 0x0000001f, 0x00000010,
1360         0x30934, 0xffffffff, 0x00000000
1361 };
1362
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365         0xc420, 0xffffffff, 0xfffffffc,
1366         0x30800, 0xffffffff, 0xe0000000,
1367         0x3c2a0, 0xffffffff, 0x00000100,
1368         0x3c208, 0xffffffff, 0x00000100,
1369         0x3c2c0, 0xffffffff, 0x00000100,
1370         0x3c2c8, 0xffffffff, 0x00000100,
1371         0x3c2c4, 0xffffffff, 0x00000100,
1372         0x55e4, 0xffffffff, 0x00600100,
1373         0x3c280, 0xffffffff, 0x00000100,
1374         0x3c214, 0xffffffff, 0x06000100,
1375         0x3c220, 0xffffffff, 0x00000100,
1376         0x3c218, 0xffffffff, 0x06000100,
1377         0x3c204, 0xffffffff, 0x00000100,
1378         0x3c2e0, 0xffffffff, 0x00000100,
1379         0x3c224, 0xffffffff, 0x00000100,
1380         0x3c200, 0xffffffff, 0x00000100,
1381         0x3c230, 0xffffffff, 0x00000100,
1382         0x3c234, 0xffffffff, 0x00000100,
1383         0x3c250, 0xffffffff, 0x00000100,
1384         0x3c254, 0xffffffff, 0x00000100,
1385         0x3c258, 0xffffffff, 0x00000100,
1386         0x3c25c, 0xffffffff, 0x00000100,
1387         0x3c260, 0xffffffff, 0x00000100,
1388         0x3c27c, 0xffffffff, 0x00000100,
1389         0x3c278, 0xffffffff, 0x00000100,
1390         0x3c210, 0xffffffff, 0x06000100,
1391         0x3c290, 0xffffffff, 0x00000100,
1392         0x3c274, 0xffffffff, 0x00000100,
1393         0x3c2b4, 0xffffffff, 0x00000100,
1394         0x3c2b0, 0xffffffff, 0x00000100,
1395         0x3c270, 0xffffffff, 0x00000100,
1396         0x30800, 0xffffffff, 0xe0000000,
1397         0x3c020, 0xffffffff, 0x00010000,
1398         0x3c024, 0xffffffff, 0x00030002,
1399         0x3c028, 0xffffffff, 0x00040007,
1400         0x3c02c, 0xffffffff, 0x00060005,
1401         0x3c030, 0xffffffff, 0x00090008,
1402         0x3c034, 0xffffffff, 0x00010000,
1403         0x3c038, 0xffffffff, 0x00030002,
1404         0x3c03c, 0xffffffff, 0x00040007,
1405         0x3c040, 0xffffffff, 0x00060005,
1406         0x3c044, 0xffffffff, 0x00090008,
1407         0x3c000, 0xffffffff, 0x96e00200,
1408         0x8708, 0xffffffff, 0x00900100,
1409         0xc424, 0xffffffff, 0x0020003f,
1410         0x38, 0xffffffff, 0x0140001c,
1411         0x3c, 0x000f0000, 0x000f0000,
1412         0x220, 0xffffffff, 0xC060000C,
1413         0x224, 0xc0000fff, 0x00000100,
1414         0x20a8, 0xffffffff, 0x00000104,
1415         0x55e4, 0xff000fff, 0x00000100,
1416         0x30cc, 0xc0000fff, 0x00000104,
1417         0xc1e4, 0x00000001, 0x00000001,
1418         0xd00c, 0xff000ff0, 0x00000100,
1419         0xd80c, 0xff000ff0, 0x00000100
1420 };
1421
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424         0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429         0x30800, 0xffffffff, 0xe0000000,
1430         0x28350, 0xffffffff, 0x3a00161a,
1431         0x28354, 0xffffffff, 0x0000002e,
1432         0x9a10, 0xffffffff, 0x00018208,
1433         0x98f8, 0xffffffff, 0x12011003
1434 };
1435
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438         0x3354, 0x00000333, 0x00000333,
1439         0x9a10, 0x00010000, 0x00058208,
1440         0x9830, 0xffffffff, 0x00000000,
1441         0x9834, 0xf00fffff, 0x00000400,
1442         0x9838, 0x0002021c, 0x00020200,
1443         0xc78, 0x00000080, 0x00000000,
1444         0x5bb0, 0x000000f0, 0x00000070,
1445         0x5bc0, 0xf0311fff, 0x80300000,
1446         0x350c, 0x00810000, 0x408af000,
1447         0x7030, 0x31000111, 0x00000011,
1448         0x2f48, 0x73773777, 0x12010001,
1449         0x2120, 0x0000007f, 0x0000001b,
1450         0x21dc, 0x00007fb6, 0x00002191,
1451         0x3628, 0x0000003f, 0x0000000a,
1452         0x362c, 0x0000003f, 0x0000000a,
1453         0x2ae4, 0x00073ffe, 0x000022a2,
1454         0x240c, 0x000007ff, 0x00000000,
1455         0x8bf0, 0x00002001, 0x00000001,
1456         0x8b24, 0xffffffff, 0x00ffffff,
1457         0x30a04, 0x0000ff0f, 0x00000000,
1458         0x28a4c, 0x07ffffff, 0x06000000,
1459         0x3e78, 0x00000001, 0x00000002,
1460         0xc768, 0x00000008, 0x00000008,
1461         0xc770, 0x00000f00, 0x00000800,
1462         0xc774, 0x00000f00, 0x00000800,
1463         0xc798, 0x00ffffff, 0x00ff7fbf,
1464         0xc79c, 0x00ffffff, 0x00ff7faf,
1465         0x8c00, 0x000000ff, 0x00000800,
1466         0xe40, 0x00001fff, 0x00001fff,
1467         0x9060, 0x0000007f, 0x00000020,
1468         0x9508, 0x00010000, 0x00010000,
1469         0xae00, 0x00100000, 0x000ff07c,
1470         0xac14, 0x000003ff, 0x0000000f,
1471         0xac10, 0xffffffff, 0x7564fdec,
1472         0xac0c, 0xffffffff, 0x3120b9a8,
1473         0xac08, 0x20000000, 0x0f9c0000
1474 };
1475
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478         0xc420, 0xffffffff, 0xfffffffd,
1479         0x30800, 0xffffffff, 0xe0000000,
1480         0x3c2a0, 0xffffffff, 0x00000100,
1481         0x3c208, 0xffffffff, 0x00000100,
1482         0x3c2c0, 0xffffffff, 0x00000100,
1483         0x3c2c8, 0xffffffff, 0x00000100,
1484         0x3c2c4, 0xffffffff, 0x00000100,
1485         0x55e4, 0xffffffff, 0x00200100,
1486         0x3c280, 0xffffffff, 0x00000100,
1487         0x3c214, 0xffffffff, 0x06000100,
1488         0x3c220, 0xffffffff, 0x00000100,
1489         0x3c218, 0xffffffff, 0x06000100,
1490         0x3c204, 0xffffffff, 0x00000100,
1491         0x3c2e0, 0xffffffff, 0x00000100,
1492         0x3c224, 0xffffffff, 0x00000100,
1493         0x3c200, 0xffffffff, 0x00000100,
1494         0x3c230, 0xffffffff, 0x00000100,
1495         0x3c234, 0xffffffff, 0x00000100,
1496         0x3c250, 0xffffffff, 0x00000100,
1497         0x3c254, 0xffffffff, 0x00000100,
1498         0x3c258, 0xffffffff, 0x00000100,
1499         0x3c25c, 0xffffffff, 0x00000100,
1500         0x3c260, 0xffffffff, 0x00000100,
1501         0x3c27c, 0xffffffff, 0x00000100,
1502         0x3c278, 0xffffffff, 0x00000100,
1503         0x3c210, 0xffffffff, 0x06000100,
1504         0x3c290, 0xffffffff, 0x00000100,
1505         0x3c274, 0xffffffff, 0x00000100,
1506         0x3c2b4, 0xffffffff, 0x00000100,
1507         0x3c2b0, 0xffffffff, 0x00000100,
1508         0x3c270, 0xffffffff, 0x00000100,
1509         0x30800, 0xffffffff, 0xe0000000,
1510         0x3c020, 0xffffffff, 0x00010000,
1511         0x3c024, 0xffffffff, 0x00030002,
1512         0x3c028, 0xffffffff, 0x00040007,
1513         0x3c02c, 0xffffffff, 0x00060005,
1514         0x3c030, 0xffffffff, 0x00090008,
1515         0x3c034, 0xffffffff, 0x00010000,
1516         0x3c038, 0xffffffff, 0x00030002,
1517         0x3c03c, 0xffffffff, 0x00040007,
1518         0x3c040, 0xffffffff, 0x00060005,
1519         0x3c044, 0xffffffff, 0x00090008,
1520         0x3c048, 0xffffffff, 0x00010000,
1521         0x3c04c, 0xffffffff, 0x00030002,
1522         0x3c050, 0xffffffff, 0x00040007,
1523         0x3c054, 0xffffffff, 0x00060005,
1524         0x3c058, 0xffffffff, 0x00090008,
1525         0x3c05c, 0xffffffff, 0x00010000,
1526         0x3c060, 0xffffffff, 0x00030002,
1527         0x3c064, 0xffffffff, 0x00040007,
1528         0x3c068, 0xffffffff, 0x00060005,
1529         0x3c06c, 0xffffffff, 0x00090008,
1530         0x3c070, 0xffffffff, 0x00010000,
1531         0x3c074, 0xffffffff, 0x00030002,
1532         0x3c078, 0xffffffff, 0x00040007,
1533         0x3c07c, 0xffffffff, 0x00060005,
1534         0x3c080, 0xffffffff, 0x00090008,
1535         0x3c084, 0xffffffff, 0x00010000,
1536         0x3c088, 0xffffffff, 0x00030002,
1537         0x3c08c, 0xffffffff, 0x00040007,
1538         0x3c090, 0xffffffff, 0x00060005,
1539         0x3c094, 0xffffffff, 0x00090008,
1540         0x3c098, 0xffffffff, 0x00010000,
1541         0x3c09c, 0xffffffff, 0x00030002,
1542         0x3c0a0, 0xffffffff, 0x00040007,
1543         0x3c0a4, 0xffffffff, 0x00060005,
1544         0x3c0a8, 0xffffffff, 0x00090008,
1545         0x3c0ac, 0xffffffff, 0x00010000,
1546         0x3c0b0, 0xffffffff, 0x00030002,
1547         0x3c0b4, 0xffffffff, 0x00040007,
1548         0x3c0b8, 0xffffffff, 0x00060005,
1549         0x3c0bc, 0xffffffff, 0x00090008,
1550         0x3c0c0, 0xffffffff, 0x00010000,
1551         0x3c0c4, 0xffffffff, 0x00030002,
1552         0x3c0c8, 0xffffffff, 0x00040007,
1553         0x3c0cc, 0xffffffff, 0x00060005,
1554         0x3c0d0, 0xffffffff, 0x00090008,
1555         0x3c0d4, 0xffffffff, 0x00010000,
1556         0x3c0d8, 0xffffffff, 0x00030002,
1557         0x3c0dc, 0xffffffff, 0x00040007,
1558         0x3c0e0, 0xffffffff, 0x00060005,
1559         0x3c0e4, 0xffffffff, 0x00090008,
1560         0x3c0e8, 0xffffffff, 0x00010000,
1561         0x3c0ec, 0xffffffff, 0x00030002,
1562         0x3c0f0, 0xffffffff, 0x00040007,
1563         0x3c0f4, 0xffffffff, 0x00060005,
1564         0x3c0f8, 0xffffffff, 0x00090008,
1565         0xc318, 0xffffffff, 0x00020200,
1566         0x3350, 0xffffffff, 0x00000200,
1567         0x15c0, 0xffffffff, 0x00000400,
1568         0x55e8, 0xffffffff, 0x00000000,
1569         0x2f50, 0xffffffff, 0x00000902,
1570         0x3c000, 0xffffffff, 0x96940200,
1571         0x8708, 0xffffffff, 0x00900100,
1572         0xc424, 0xffffffff, 0x0020003f,
1573         0x38, 0xffffffff, 0x0140001c,
1574         0x3c, 0x000f0000, 0x000f0000,
1575         0x220, 0xffffffff, 0xc060000c,
1576         0x224, 0xc0000fff, 0x00000100,
1577         0xf90, 0xffffffff, 0x00000100,
1578         0xf98, 0x00000101, 0x00000000,
1579         0x20a8, 0xffffffff, 0x00000104,
1580         0x55e4, 0xff000fff, 0x00000100,
1581         0x30cc, 0xc0000fff, 0x00000104,
1582         0xc1e4, 0x00000001, 0x00000001,
1583         0xd00c, 0xff000ff0, 0x00000100,
1584         0xd80c, 0xff000ff0, 0x00000100
1585 };
1586
1587 static const u32 godavari_golden_registers[] =
1588 {
1589         0x55e4, 0xff607fff, 0xfc000100,
1590         0x6ed8, 0x00010101, 0x00010000,
1591         0x9830, 0xffffffff, 0x00000000,
1592         0x98302, 0xf00fffff, 0x00000400,
1593         0x6130, 0xffffffff, 0x00010000,
1594         0x5bb0, 0x000000f0, 0x00000070,
1595         0x5bc0, 0xf0311fff, 0x80300000,
1596         0x98f8, 0x73773777, 0x12010001,
1597         0x98fc, 0xffffffff, 0x00000010,
1598         0x8030, 0x00001f0f, 0x0000100a,
1599         0x2f48, 0x73773777, 0x12010001,
1600         0x2408, 0x000fffff, 0x000c007f,
1601         0x8a14, 0xf000003f, 0x00000007,
1602         0x8b24, 0xffffffff, 0x00ff0fff,
1603         0x30a04, 0x0000ff0f, 0x00000000,
1604         0x28a4c, 0x07ffffff, 0x06000000,
1605         0x4d8, 0x00000fff, 0x00000100,
1606         0xd014, 0x00010000, 0x00810001,
1607         0xd814, 0x00010000, 0x00810001,
1608         0x3e78, 0x00000001, 0x00000002,
1609         0xc768, 0x00000008, 0x00000008,
1610         0xc770, 0x00000f00, 0x00000800,
1611         0xc774, 0x00000f00, 0x00000800,
1612         0xc798, 0x00ffffff, 0x00ff7fbf,
1613         0xc79c, 0x00ffffff, 0x00ff7faf,
1614         0x8c00, 0x000000ff, 0x00000001,
1615         0x214f8, 0x01ff01ff, 0x00000002,
1616         0x21498, 0x007ff800, 0x00200000,
1617         0x2015c, 0xffffffff, 0x00000f40,
1618         0x88c4, 0x001f3ae3, 0x00000082,
1619         0x88d4, 0x0000001f, 0x00000010,
1620         0x30934, 0xffffffff, 0x00000000
1621 };
1622
1623
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627         mutex_lock(&rdev->grbm_idx_mutex);
1628         switch (rdev->family) {
1629         case CHIP_BONAIRE:
1630                 radeon_program_register_sequence(rdev,
1631                                                  bonaire_mgcg_cgcg_init,
1632                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633                 radeon_program_register_sequence(rdev,
1634                                                  bonaire_golden_registers,
1635                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  bonaire_golden_common_registers,
1638                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639                 radeon_program_register_sequence(rdev,
1640                                                  bonaire_golden_spm_registers,
1641                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642                 break;
1643         case CHIP_KABINI:
1644                 radeon_program_register_sequence(rdev,
1645                                                  kalindi_mgcg_cgcg_init,
1646                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647                 radeon_program_register_sequence(rdev,
1648                                                  kalindi_golden_registers,
1649                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650                 radeon_program_register_sequence(rdev,
1651                                                  kalindi_golden_common_registers,
1652                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653                 radeon_program_register_sequence(rdev,
1654                                                  kalindi_golden_spm_registers,
1655                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656                 break;
1657         case CHIP_MULLINS:
1658                 radeon_program_register_sequence(rdev,
1659                                                  kalindi_mgcg_cgcg_init,
1660                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661                 radeon_program_register_sequence(rdev,
1662                                                  godavari_golden_registers,
1663                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1664                 radeon_program_register_sequence(rdev,
1665                                                  kalindi_golden_common_registers,
1666                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667                 radeon_program_register_sequence(rdev,
1668                                                  kalindi_golden_spm_registers,
1669                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670                 break;
1671         case CHIP_KAVERI:
1672                 radeon_program_register_sequence(rdev,
1673                                                  spectre_mgcg_cgcg_init,
1674                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675                 radeon_program_register_sequence(rdev,
1676                                                  spectre_golden_registers,
1677                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1678                 radeon_program_register_sequence(rdev,
1679                                                  spectre_golden_common_registers,
1680                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681                 radeon_program_register_sequence(rdev,
1682                                                  spectre_golden_spm_registers,
1683                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684                 break;
1685         case CHIP_HAWAII:
1686                 radeon_program_register_sequence(rdev,
1687                                                  hawaii_mgcg_cgcg_init,
1688                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689                 radeon_program_register_sequence(rdev,
1690                                                  hawaii_golden_registers,
1691                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692                 radeon_program_register_sequence(rdev,
1693                                                  hawaii_golden_common_registers,
1694                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695                 radeon_program_register_sequence(rdev,
1696                                                  hawaii_golden_spm_registers,
1697                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698                 break;
1699         default:
1700                 break;
1701         }
1702         mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716
1717         if (rdev->flags & RADEON_IS_IGP) {
1718                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719                         return reference_clock / 2;
1720         } else {
1721                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722                         return reference_clock / 4;
1723         }
1724         return reference_clock;
1725 }
1726
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738         if (index < rdev->doorbell.num_doorbells) {
1739                 return readl(rdev->doorbell.ptr + index);
1740         } else {
1741                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742                 return 0;
1743         }
1744 }
1745
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758         if (index < rdev->doorbell.num_doorbells) {
1759                 writel(v, rdev->doorbell.ptr + index);
1760         } else {
1761                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762         }
1763 }
1764
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769         {0x00000070, 0x04400000},
1770         {0x00000071, 0x80c01803},
1771         {0x00000072, 0x00004004},
1772         {0x00000073, 0x00000100},
1773         {0x00000074, 0x00ff0000},
1774         {0x00000075, 0x34000000},
1775         {0x00000076, 0x08000014},
1776         {0x00000077, 0x00cc08ec},
1777         {0x00000078, 0x00000400},
1778         {0x00000079, 0x00000000},
1779         {0x0000007a, 0x04090000},
1780         {0x0000007c, 0x00000000},
1781         {0x0000007e, 0x4408a8e8},
1782         {0x0000007f, 0x00000304},
1783         {0x00000080, 0x00000000},
1784         {0x00000082, 0x00000001},
1785         {0x00000083, 0x00000002},
1786         {0x00000084, 0xf3e4f400},
1787         {0x00000085, 0x052024e3},
1788         {0x00000087, 0x00000000},
1789         {0x00000088, 0x01000000},
1790         {0x0000008a, 0x1c0a0000},
1791         {0x0000008b, 0xff010000},
1792         {0x0000008d, 0xffffefff},
1793         {0x0000008e, 0xfff3efff},
1794         {0x0000008f, 0xfff3efbf},
1795         {0x00000092, 0xf7ffffff},
1796         {0x00000093, 0xffffff7f},
1797         {0x00000095, 0x00101101},
1798         {0x00000096, 0x00000fff},
1799         {0x00000097, 0x00116fff},
1800         {0x00000098, 0x60010000},
1801         {0x00000099, 0x10010000},
1802         {0x0000009a, 0x00006000},
1803         {0x0000009b, 0x00001000},
1804         {0x0000009f, 0x00b48000}
1805 };
1806
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811         {0x0000007d, 0x40000000},
1812         {0x0000007e, 0x40180304},
1813         {0x0000007f, 0x0000ff00},
1814         {0x00000081, 0x00000000},
1815         {0x00000083, 0x00000800},
1816         {0x00000086, 0x00000000},
1817         {0x00000087, 0x00000100},
1818         {0x00000088, 0x00020100},
1819         {0x00000089, 0x00000000},
1820         {0x0000008b, 0x00040000},
1821         {0x0000008c, 0x00000100},
1822         {0x0000008e, 0xff010000},
1823         {0x00000090, 0xffffefff},
1824         {0x00000091, 0xfff3efff},
1825         {0x00000092, 0xfff3efbf},
1826         {0x00000093, 0xf7ffffff},
1827         {0x00000094, 0xffffff7f},
1828         {0x00000095, 0x00000fff},
1829         {0x00000096, 0x00116fff},
1830         {0x00000097, 0x60010000},
1831         {0x00000098, 0x10010000},
1832         {0x0000009f, 0x00c79000}
1833 };
1834
1835
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850                             u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853                              MEID(me & 0x3) |
1854                              VMID(vmid & 0xf) |
1855                              QUEUEID(queue & 0x7));
1856         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870         const __be32 *fw_data = NULL;
1871         const __le32 *new_fw_data = NULL;
1872         u32 running, blackout = 0, tmp;
1873         u32 *io_mc_regs = NULL;
1874         const __le32 *new_io_mc_regs = NULL;
1875         int i, regs_size, ucode_size;
1876
1877         if (!rdev->mc_fw)
1878                 return -EINVAL;
1879
1880         if (rdev->new_fw) {
1881                 const struct mc_firmware_header_v1_0 *hdr =
1882                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883
1884                 radeon_ucode_print_mc_hdr(&hdr->header);
1885
1886                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887                 new_io_mc_regs = (const __le32 *)
1888                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890                 new_fw_data = (const __le32 *)
1891                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892         } else {
1893                 ucode_size = rdev->mc_fw->size / 4;
1894
1895                 switch (rdev->family) {
1896                 case CHIP_BONAIRE:
1897                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899                         break;
1900                 case CHIP_HAWAII:
1901                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1903                         break;
1904                 default:
1905                         return -EINVAL;
1906                 }
1907                 fw_data = (const __be32 *)rdev->mc_fw->data;
1908         }
1909
1910         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911
1912         if (running == 0) {
1913                 if (running) {
1914                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916                 }
1917
1918                 /* reset the engine and set to writable */
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922                 /* load mc io regs */
1923                 for (i = 0; i < regs_size; i++) {
1924                         if (rdev->new_fw) {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927                         } else {
1928                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930                         }
1931                 }
1932
1933                 tmp = RREG32(MC_SEQ_MISC0);
1934                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939                 }
1940
1941                 /* load the MC ucode */
1942                 for (i = 0; i < ucode_size; i++) {
1943                         if (rdev->new_fw)
1944                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945                         else
1946                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947                 }
1948
1949                 /* put the engine back into the active state */
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954                 /* wait for training to complete */
1955                 for (i = 0; i < rdev->usec_timeout; i++) {
1956                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957                                 break;
1958                         udelay(1);
1959                 }
1960                 for (i = 0; i < rdev->usec_timeout; i++) {
1961                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962                                 break;
1963                         udelay(1);
1964                 }
1965
1966                 if (running)
1967                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968         }
1969
1970         return 0;
1971 }
1972
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984         const char *chip_name;
1985         const char *new_chip_name;
1986         size_t pfp_req_size, me_req_size, ce_req_size,
1987                 mec_req_size, rlc_req_size, mc_req_size = 0,
1988                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989         char fw_name[30];
1990         int new_fw = 0;
1991         int err;
1992         int num_fw;
1993
1994         DRM_DEBUG("\n");
1995
1996         switch (rdev->family) {
1997         case CHIP_BONAIRE:
1998                 chip_name = "BONAIRE";
1999                 new_chip_name = "bonaire";
2000                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2002                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009                 num_fw = 8;
2010                 break;
2011         case CHIP_HAWAII:
2012                 chip_name = "HAWAII";
2013                 new_chip_name = "hawaii";
2014                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023                 num_fw = 8;
2024                 break;
2025         case CHIP_KAVERI:
2026                 chip_name = "KAVERI";
2027                 new_chip_name = "kaveri";
2028                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2030                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034                 num_fw = 7;
2035                 break;
2036         case CHIP_KABINI:
2037                 chip_name = "KABINI";
2038                 new_chip_name = "kabini";
2039                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2041                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045                 num_fw = 6;
2046                 break;
2047         case CHIP_MULLINS:
2048                 chip_name = "MULLINS";
2049                 new_chip_name = "mullins";
2050                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2052                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056                 num_fw = 6;
2057                 break;
2058         default: BUG();
2059         }
2060
2061         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065         if (err) {
2066                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068                 if (err)
2069                         goto out;
2070                 if (rdev->pfp_fw->size != pfp_req_size) {
2071                         printk(KERN_ERR
2072                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073                                rdev->pfp_fw->size, fw_name);
2074                         err = -EINVAL;
2075                         goto out;
2076                 }
2077         } else {
2078                 err = radeon_ucode_validate(rdev->pfp_fw);
2079                 if (err) {
2080                         printk(KERN_ERR
2081                                "cik_fw: validation failed for firmware \"%s\"\n",
2082                                fw_name);
2083                         goto out;
2084                 } else {
2085                         new_fw++;
2086                 }
2087         }
2088
2089         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091         if (err) {
2092                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094                 if (err)
2095                         goto out;
2096                 if (rdev->me_fw->size != me_req_size) {
2097                         printk(KERN_ERR
2098                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->me_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->me_fw);
2104                 if (err) {
2105                         printk(KERN_ERR
2106                                "cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->ce_fw->size != ce_req_size) {
2122                         printk(KERN_ERR
2123                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124                                rdev->ce_fw->size, fw_name);
2125                         err = -EINVAL;
2126                 }
2127         } else {
2128                 err = radeon_ucode_validate(rdev->ce_fw);
2129                 if (err) {
2130                         printk(KERN_ERR
2131                                "cik_fw: validation failed for firmware \"%s\"\n",
2132                                fw_name);
2133                         goto out;
2134                 } else {
2135                         new_fw++;
2136                 }
2137         }
2138
2139         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141         if (err) {
2142                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144                 if (err)
2145                         goto out;
2146                 if (rdev->mec_fw->size != mec_req_size) {
2147                         printk(KERN_ERR
2148                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149                                rdev->mec_fw->size, fw_name);
2150                         err = -EINVAL;
2151                 }
2152         } else {
2153                 err = radeon_ucode_validate(rdev->mec_fw);
2154                 if (err) {
2155                         printk(KERN_ERR
2156                                "cik_fw: validation failed for firmware \"%s\"\n",
2157                                fw_name);
2158                         goto out;
2159                 } else {
2160                         new_fw++;
2161                 }
2162         }
2163
2164         if (rdev->family == CHIP_KAVERI) {
2165                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167                 if (err) {
2168                         goto out;
2169                 } else {
2170                         err = radeon_ucode_validate(rdev->mec2_fw);
2171                         if (err) {
2172                                 goto out;
2173                         } else {
2174                                 new_fw++;
2175                         }
2176                 }
2177         }
2178
2179         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181         if (err) {
2182                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184                 if (err)
2185                         goto out;
2186                 if (rdev->rlc_fw->size != rlc_req_size) {
2187                         printk(KERN_ERR
2188                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189                                rdev->rlc_fw->size, fw_name);
2190                         err = -EINVAL;
2191                 }
2192         } else {
2193                 err = radeon_ucode_validate(rdev->rlc_fw);
2194                 if (err) {
2195                         printk(KERN_ERR
2196                                "cik_fw: validation failed for firmware \"%s\"\n",
2197                                fw_name);
2198                         goto out;
2199                 } else {
2200                         new_fw++;
2201                 }
2202         }
2203
2204         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206         if (err) {
2207                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209                 if (err)
2210                         goto out;
2211                 if (rdev->sdma_fw->size != sdma_req_size) {
2212                         printk(KERN_ERR
2213                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214                                rdev->sdma_fw->size, fw_name);
2215                         err = -EINVAL;
2216                 }
2217         } else {
2218                 err = radeon_ucode_validate(rdev->sdma_fw);
2219                 if (err) {
2220                         printk(KERN_ERR
2221                                "cik_fw: validation failed for firmware \"%s\"\n",
2222                                fw_name);
2223                         goto out;
2224                 } else {
2225                         new_fw++;
2226                 }
2227         }
2228
2229         /* No SMC, MC ucode on APUs */
2230         if (!(rdev->flags & RADEON_IS_IGP)) {
2231                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233                 if (err) {
2234                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236                         if (err) {
2237                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239                                 if (err)
2240                                         goto out;
2241                         }
2242                         if ((rdev->mc_fw->size != mc_req_size) &&
2243                             (rdev->mc_fw->size != mc2_req_size)){
2244                                 printk(KERN_ERR
2245                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246                                        rdev->mc_fw->size, fw_name);
2247                                 err = -EINVAL;
2248                         }
2249                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250                 } else {
2251                         err = radeon_ucode_validate(rdev->mc_fw);
2252                         if (err) {
2253                                 printk(KERN_ERR
2254                                        "cik_fw: validation failed for firmware \"%s\"\n",
2255                                        fw_name);
2256                                 goto out;
2257                         } else {
2258                                 new_fw++;
2259                         }
2260                 }
2261
2262                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264                 if (err) {
2265                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267                         if (err) {
2268                                 printk(KERN_ERR
2269                                        "smc: error loading firmware \"%s\"\n",
2270                                        fw_name);
2271                                 release_firmware(rdev->smc_fw);
2272                                 rdev->smc_fw = NULL;
2273                                 err = 0;
2274                         } else if (rdev->smc_fw->size != smc_req_size) {
2275                                 printk(KERN_ERR
2276                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277                                        rdev->smc_fw->size, fw_name);
2278                                 err = -EINVAL;
2279                         }
2280                 } else {
2281                         err = radeon_ucode_validate(rdev->smc_fw);
2282                         if (err) {
2283                                 printk(KERN_ERR
2284                                        "cik_fw: validation failed for firmware \"%s\"\n",
2285                                        fw_name);
2286                                 goto out;
2287                         } else {
2288                                 new_fw++;
2289                         }
2290                 }
2291         }
2292
2293         if (new_fw == 0) {
2294                 rdev->new_fw = false;
2295         } else if (new_fw < num_fw) {
2296                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297                 err = -EINVAL;
2298         } else {
2299                 rdev->new_fw = true;
2300         }
2301
2302 out:
2303         if (err) {
2304                 if (err != -EINVAL)
2305                         printk(KERN_ERR
2306                                "cik_cp: Failed to load firmware \"%s\"\n",
2307                                fw_name);
2308                 release_firmware(rdev->pfp_fw);
2309                 rdev->pfp_fw = NULL;
2310                 release_firmware(rdev->me_fw);
2311                 rdev->me_fw = NULL;
2312                 release_firmware(rdev->ce_fw);
2313                 rdev->ce_fw = NULL;
2314                 release_firmware(rdev->mec_fw);
2315                 rdev->mec_fw = NULL;
2316                 release_firmware(rdev->mec2_fw);
2317                 rdev->mec2_fw = NULL;
2318                 release_firmware(rdev->rlc_fw);
2319                 rdev->rlc_fw = NULL;
2320                 release_firmware(rdev->sdma_fw);
2321                 rdev->sdma_fw = NULL;
2322                 release_firmware(rdev->mc_fw);
2323                 rdev->mc_fw = NULL;
2324                 release_firmware(rdev->smc_fw);
2325                 rdev->smc_fw = NULL;
2326         }
2327         return err;
2328 }
2329
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346         u32 *tile = rdev->config.cik.tile_mode_array;
2347         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2348         const u32 num_tile_mode_states =
2349                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2350         const u32 num_secondary_tile_mode_states =
2351                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2352         u32 reg_offset, split_equal_to_row_size;
2353         u32 num_pipe_configs;
2354         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2355                 rdev->config.cik.max_shader_engines;
2356
2357         switch (rdev->config.cik.mem_row_size_in_kb) {
2358         case 1:
2359                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2360                 break;
2361         case 2:
2362         default:
2363                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2364                 break;
2365         case 4:
2366                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2367                 break;
2368         }
2369
2370         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2371         if (num_pipe_configs > 8)
2372                 num_pipe_configs = 16;
2373
2374         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2375                 tile[reg_offset] = 0;
2376         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2377                 macrotile[reg_offset] = 0;
2378
2379         switch(num_pipe_configs) {
2380         case 16:
2381                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2383                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2385                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2389                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2397                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                            TILE_SPLIT(split_equal_to_row_size));
2401                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2402                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2405                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2406                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2408                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                            TILE_SPLIT(split_equal_to_row_size));
2412                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2413                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2414                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2417                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2432                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2434                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2445                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2447                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2449                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2454                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2456                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459
2460                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2463                            NUM_BANKS(ADDR_SURF_16_BANK));
2464                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467                            NUM_BANKS(ADDR_SURF_16_BANK));
2468                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                            NUM_BANKS(ADDR_SURF_16_BANK));
2472                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                            NUM_BANKS(ADDR_SURF_16_BANK));
2476                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479                            NUM_BANKS(ADDR_SURF_8_BANK));
2480                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                            NUM_BANKS(ADDR_SURF_4_BANK));
2484                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                            NUM_BANKS(ADDR_SURF_2_BANK));
2488                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2490                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491                            NUM_BANKS(ADDR_SURF_16_BANK));
2492                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                            NUM_BANKS(ADDR_SURF_16_BANK));
2496                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                             NUM_BANKS(ADDR_SURF_16_BANK));
2500                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503                             NUM_BANKS(ADDR_SURF_8_BANK));
2504                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                             NUM_BANKS(ADDR_SURF_4_BANK));
2508                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                             NUM_BANKS(ADDR_SURF_2_BANK));
2512                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515                             NUM_BANKS(ADDR_SURF_2_BANK));
2516
2517                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2518                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2519                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2520                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2521                 break;
2522
2523         case 8:
2524                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2528                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2532                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2540                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                            TILE_SPLIT(split_equal_to_row_size));
2544                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2548                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2549                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2551                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                            TILE_SPLIT(split_equal_to_row_size));
2555                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2556                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2557                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2560                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2562                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2575                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2590                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2594                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2595                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2597                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602
2603                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2605                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2606                                 NUM_BANKS(ADDR_SURF_16_BANK));
2607                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2610                                 NUM_BANKS(ADDR_SURF_16_BANK));
2611                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614                                 NUM_BANKS(ADDR_SURF_16_BANK));
2615                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618                                 NUM_BANKS(ADDR_SURF_16_BANK));
2619                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2622                                 NUM_BANKS(ADDR_SURF_8_BANK));
2623                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626                                 NUM_BANKS(ADDR_SURF_4_BANK));
2627                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630                                 NUM_BANKS(ADDR_SURF_2_BANK));
2631                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650                                 NUM_BANKS(ADDR_SURF_8_BANK));
2651                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                 NUM_BANKS(ADDR_SURF_4_BANK));
2655                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                 NUM_BANKS(ADDR_SURF_2_BANK));
2659
2660                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2661                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2662                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2664                 break;
2665
2666         case 4:
2667                 if (num_rbs == 4) {
2668                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2672                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2676                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2684                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                            TILE_SPLIT(split_equal_to_row_size));
2688                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2689                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2691                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2692                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2693                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2695                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                            TILE_SPLIT(split_equal_to_row_size));
2699                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2700                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2701                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2704                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2719                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2720                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2721                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2732                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2734                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2735                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2736                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2743                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746
2747                 } else if (num_rbs < 4) {
2748                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2752                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2756                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2764                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                            TILE_SPLIT(split_equal_to_row_size));
2768                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2769                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2771                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2772                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2773                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2775                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                            TILE_SPLIT(split_equal_to_row_size));
2779                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2780                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2781                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2784                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2786                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2799                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2801                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2814                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2815                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2819                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2823                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826                 }
2827
2828                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843                                 NUM_BANKS(ADDR_SURF_16_BANK));
2844                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                 NUM_BANKS(ADDR_SURF_8_BANK));
2852                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2855                                 NUM_BANKS(ADDR_SURF_4_BANK));
2856                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863                                 NUM_BANKS(ADDR_SURF_16_BANK));
2864                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867                                 NUM_BANKS(ADDR_SURF_16_BANK));
2868                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871                                 NUM_BANKS(ADDR_SURF_16_BANK));
2872                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_8_BANK));
2880                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2883                                 NUM_BANKS(ADDR_SURF_4_BANK));
2884
2885                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2886                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2887                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2888                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2889                 break;
2890
2891         case 2:
2892                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894                            PIPE_CONFIG(ADDR_SURF_P2) |
2895                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898                            PIPE_CONFIG(ADDR_SURF_P2) |
2899                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2900                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902                            PIPE_CONFIG(ADDR_SURF_P2) |
2903                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2908                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                            PIPE_CONFIG(ADDR_SURF_P2) |
2911                            TILE_SPLIT(split_equal_to_row_size));
2912                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2913                            PIPE_CONFIG(ADDR_SURF_P2) |
2914                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2916                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2917                            PIPE_CONFIG(ADDR_SURF_P2) |
2918                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2919                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921                            PIPE_CONFIG(ADDR_SURF_P2) |
2922                            TILE_SPLIT(split_equal_to_row_size));
2923                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2924                            PIPE_CONFIG(ADDR_SURF_P2);
2925                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927                            PIPE_CONFIG(ADDR_SURF_P2));
2928                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930                             PIPE_CONFIG(ADDR_SURF_P2) |
2931                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934                             PIPE_CONFIG(ADDR_SURF_P2) |
2935                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938                             PIPE_CONFIG(ADDR_SURF_P2) |
2939                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941                             PIPE_CONFIG(ADDR_SURF_P2) |
2942                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2943                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2944                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2945                             PIPE_CONFIG(ADDR_SURF_P2) |
2946                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949                             PIPE_CONFIG(ADDR_SURF_P2) |
2950                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2956                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957                             PIPE_CONFIG(ADDR_SURF_P2));
2958                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2959                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2960                             PIPE_CONFIG(ADDR_SURF_P2) |
2961                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964                             PIPE_CONFIG(ADDR_SURF_P2) |
2965                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2967                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968                             PIPE_CONFIG(ADDR_SURF_P2) |
2969                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970
2971                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2972                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2973                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2974                                 NUM_BANKS(ADDR_SURF_16_BANK));
2975                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                 NUM_BANKS(ADDR_SURF_16_BANK));
2979                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                 NUM_BANKS(ADDR_SURF_16_BANK));
2983                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986                                 NUM_BANKS(ADDR_SURF_16_BANK));
2987                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2998                                 NUM_BANKS(ADDR_SURF_8_BANK));
2999                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3005                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006                                 NUM_BANKS(ADDR_SURF_16_BANK));
3007                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3026                                 NUM_BANKS(ADDR_SURF_8_BANK));
3027
3028                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3029                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3030                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3032                 break;
3033
3034         default:
3035                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3036         }
3037 }
3038
3039 /**
3040  * cik_select_se_sh - select which SE, SH to address
3041  *
3042  * @rdev: radeon_device pointer
3043  * @se_num: shader engine to address
3044  * @sh_num: sh block to address
3045  *
3046  * Select which SE, SH combinations to address. Certain
3047  * registers are instanced per SE or SH.  0xffffffff means
3048  * broadcast to all SEs or SHs (CIK).
3049  */
3050 static void cik_select_se_sh(struct radeon_device *rdev,
3051                              u32 se_num, u32 sh_num)
3052 {
3053         u32 data = INSTANCE_BROADCAST_WRITES;
3054
3055         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3056                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3057         else if (se_num == 0xffffffff)
3058                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3059         else if (sh_num == 0xffffffff)
3060                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3061         else
3062                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3063         WREG32(GRBM_GFX_INDEX, data);
3064 }
3065
3066 /**
3067  * cik_create_bitmask - create a bitmask
3068  *
3069  * @bit_width: length of the mask
3070  *
3071  * create a variable length bit mask (CIK).
3072  * Returns the bitmask.
3073  */
3074 static u32 cik_create_bitmask(u32 bit_width)
3075 {
3076         u32 i, mask = 0;
3077
3078         for (i = 0; i < bit_width; i++) {
3079                 mask <<= 1;
3080                 mask |= 1;
3081         }
3082         return mask;
3083 }
3084
3085 /**
3086  * cik_get_rb_disabled - computes the mask of disabled RBs
3087  *
3088  * @rdev: radeon_device pointer
3089  * @max_rb_num: max RBs (render backends) for the asic
3090  * @se_num: number of SEs (shader engines) for the asic
3091  * @sh_per_se: number of SH blocks per SE for the asic
3092  *
3093  * Calculates the bitmask of disabled RBs (CIK).
3094  * Returns the disabled RB bitmask.
3095  */
3096 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3097                               u32 max_rb_num_per_se,
3098                               u32 sh_per_se)
3099 {
3100         u32 data, mask;
3101
3102         data = RREG32(CC_RB_BACKEND_DISABLE);
3103         if (data & 1)
3104                 data &= BACKEND_DISABLE_MASK;
3105         else
3106                 data = 0;
3107         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3108
3109         data >>= BACKEND_DISABLE_SHIFT;
3110
3111         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3112
3113         return data & mask;
3114 }
3115
3116 /**
3117  * cik_setup_rb - setup the RBs on the asic
3118  *
3119  * @rdev: radeon_device pointer
3120  * @se_num: number of SEs (shader engines) for the asic
3121  * @sh_per_se: number of SH blocks per SE for the asic
3122  * @max_rb_num: max RBs (render backends) for the asic
3123  *
3124  * Configures per-SE/SH RB registers (CIK).
3125  */
3126 static void cik_setup_rb(struct radeon_device *rdev,
3127                          u32 se_num, u32 sh_per_se,
3128                          u32 max_rb_num_per_se)
3129 {
3130         int i, j;
3131         u32 data, mask;
3132         u32 disabled_rbs = 0;
3133         u32 enabled_rbs = 0;
3134
3135         mutex_lock(&rdev->grbm_idx_mutex);
3136         for (i = 0; i < se_num; i++) {
3137                 for (j = 0; j < sh_per_se; j++) {
3138                         cik_select_se_sh(rdev, i, j);
3139                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3140                         if (rdev->family == CHIP_HAWAII)
3141                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3142                         else
3143                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3144                 }
3145         }
3146         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3147         mutex_unlock(&rdev->grbm_idx_mutex);
3148
3149         mask = 1;
3150         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3151                 if (!(disabled_rbs & mask))
3152                         enabled_rbs |= mask;
3153                 mask <<= 1;
3154         }
3155
3156         rdev->config.cik.backend_enable_mask = enabled_rbs;
3157
3158         mutex_lock(&rdev->grbm_idx_mutex);
3159         for (i = 0; i < se_num; i++) {
3160                 cik_select_se_sh(rdev, i, 0xffffffff);
3161                 data = 0;
3162                 for (j = 0; j < sh_per_se; j++) {
3163                         switch (enabled_rbs & 3) {
3164                         case 0:
3165                                 if (j == 0)
3166                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3167                                 else
3168                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3169                                 break;
3170                         case 1:
3171                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3172                                 break;
3173                         case 2:
3174                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3175                                 break;
3176                         case 3:
3177                         default:
3178                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3179                                 break;
3180                         }
3181                         enabled_rbs >>= 2;
3182                 }
3183                 WREG32(PA_SC_RASTER_CONFIG, data);
3184         }
3185         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3186         mutex_unlock(&rdev->grbm_idx_mutex);
3187 }
3188
3189 /**
3190  * cik_gpu_init - setup the 3D engine
3191  *
3192  * @rdev: radeon_device pointer
3193  *
3194  * Configures the 3D engine and tiling configuration
3195  * registers so that the 3D engine is usable.
3196  */
3197 static void cik_gpu_init(struct radeon_device *rdev)
3198 {
3199         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3200         u32 mc_shared_chmap, mc_arb_ramcfg;
3201         u32 hdp_host_path_cntl;
3202         u32 tmp;
3203         int i, j;
3204
3205         switch (rdev->family) {
3206         case CHIP_BONAIRE:
3207                 rdev->config.cik.max_shader_engines = 2;
3208                 rdev->config.cik.max_tile_pipes = 4;
3209                 rdev->config.cik.max_cu_per_sh = 7;
3210                 rdev->config.cik.max_sh_per_se = 1;
3211                 rdev->config.cik.max_backends_per_se = 2;
3212                 rdev->config.cik.max_texture_channel_caches = 4;
3213                 rdev->config.cik.max_gprs = 256;
3214                 rdev->config.cik.max_gs_threads = 32;
3215                 rdev->config.cik.max_hw_contexts = 8;
3216
3217                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3218                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3219                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3220                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3221                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3222                 break;
3223         case CHIP_HAWAII:
3224                 rdev->config.cik.max_shader_engines = 4;
3225                 rdev->config.cik.max_tile_pipes = 16;
3226                 rdev->config.cik.max_cu_per_sh = 11;
3227                 rdev->config.cik.max_sh_per_se = 1;
3228                 rdev->config.cik.max_backends_per_se = 4;
3229                 rdev->config.cik.max_texture_channel_caches = 16;
3230                 rdev->config.cik.max_gprs = 256;
3231                 rdev->config.cik.max_gs_threads = 32;
3232                 rdev->config.cik.max_hw_contexts = 8;
3233
3234                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3235                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3236                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3237                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3238                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3239                 break;
3240         case CHIP_KAVERI:
3241                 rdev->config.cik.max_shader_engines = 1;
3242                 rdev->config.cik.max_tile_pipes = 4;
3243                 if ((rdev->pdev->device == 0x1304) ||
3244                     (rdev->pdev->device == 0x1305) ||
3245                     (rdev->pdev->device == 0x130C) ||
3246                     (rdev->pdev->device == 0x130F) ||
3247                     (rdev->pdev->device == 0x1310) ||
3248                     (rdev->pdev->device == 0x1311) ||
3249                     (rdev->pdev->device == 0x131C)) {
3250                         rdev->config.cik.max_cu_per_sh = 8;
3251                         rdev->config.cik.max_backends_per_se = 2;
3252                 } else if ((rdev->pdev->device == 0x1309) ||
3253                            (rdev->pdev->device == 0x130A) ||
3254                            (rdev->pdev->device == 0x130D) ||
3255                            (rdev->pdev->device == 0x1313) ||
3256                            (rdev->pdev->device == 0x131D)) {
3257                         rdev->config.cik.max_cu_per_sh = 6;
3258                         rdev->config.cik.max_backends_per_se = 2;
3259                 } else if ((rdev->pdev->device == 0x1306) ||
3260                            (rdev->pdev->device == 0x1307) ||
3261                            (rdev->pdev->device == 0x130B) ||
3262                            (rdev->pdev->device == 0x130E) ||
3263                            (rdev->pdev->device == 0x1315) ||
3264                            (rdev->pdev->device == 0x1318) ||
3265                            (rdev->pdev->device == 0x131B)) {
3266                         rdev->config.cik.max_cu_per_sh = 4;
3267                         rdev->config.cik.max_backends_per_se = 1;
3268                 } else {
3269                         rdev->config.cik.max_cu_per_sh = 3;
3270                         rdev->config.cik.max_backends_per_se = 1;
3271                 }
3272                 rdev->config.cik.max_sh_per_se = 1;
3273                 rdev->config.cik.max_texture_channel_caches = 4;
3274                 rdev->config.cik.max_gprs = 256;
3275                 rdev->config.cik.max_gs_threads = 16;
3276                 rdev->config.cik.max_hw_contexts = 8;
3277
3278                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3279                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3280                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3281                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3282                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3283                 break;
3284         case CHIP_KABINI:
3285         case CHIP_MULLINS:
3286         default:
3287                 rdev->config.cik.max_shader_engines = 1;
3288                 rdev->config.cik.max_tile_pipes = 2;
3289                 rdev->config.cik.max_cu_per_sh = 2;
3290                 rdev->config.cik.max_sh_per_se = 1;
3291                 rdev->config.cik.max_backends_per_se = 1;
3292                 rdev->config.cik.max_texture_channel_caches = 2;
3293                 rdev->config.cik.max_gprs = 256;
3294                 rdev->config.cik.max_gs_threads = 16;
3295                 rdev->config.cik.max_hw_contexts = 8;
3296
3297                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3298                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3299                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3300                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3301                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3302                 break;
3303         }
3304
3305         /* Initialize HDP */
3306         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3307                 WREG32((0x2c14 + j), 0x00000000);
3308                 WREG32((0x2c18 + j), 0x00000000);
3309                 WREG32((0x2c1c + j), 0x00000000);
3310                 WREG32((0x2c20 + j), 0x00000000);
3311                 WREG32((0x2c24 + j), 0x00000000);
3312         }
3313
3314         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3315         WREG32(SRBM_INT_CNTL, 0x1);
3316         WREG32(SRBM_INT_ACK, 0x1);
3317
3318         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3319
3320         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3321         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3322
3323         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3324         rdev->config.cik.mem_max_burst_length_bytes = 256;
3325         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3326         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3327         if (rdev->config.cik.mem_row_size_in_kb > 4)
3328                 rdev->config.cik.mem_row_size_in_kb = 4;
3329         /* XXX use MC settings? */
3330         rdev->config.cik.shader_engine_tile_size = 32;
3331         rdev->config.cik.num_gpus = 1;
3332         rdev->config.cik.multi_gpu_tile_size = 64;
3333
3334         /* fix up row size */
3335         gb_addr_config &= ~ROW_SIZE_MASK;
3336         switch (rdev->config.cik.mem_row_size_in_kb) {
3337         case 1:
3338         default:
3339                 gb_addr_config |= ROW_SIZE(0);
3340                 break;
3341         case 2:
3342                 gb_addr_config |= ROW_SIZE(1);
3343                 break;
3344         case 4:
3345                 gb_addr_config |= ROW_SIZE(2);
3346                 break;
3347         }
3348
3349         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3350          * not have bank info, so create a custom tiling dword.
3351          * bits 3:0   num_pipes
3352          * bits 7:4   num_banks
3353          * bits 11:8  group_size
3354          * bits 15:12 row_size
3355          */
3356         rdev->config.cik.tile_config = 0;
3357         switch (rdev->config.cik.num_tile_pipes) {
3358         case 1:
3359                 rdev->config.cik.tile_config |= (0 << 0);
3360                 break;
3361         case 2:
3362                 rdev->config.cik.tile_config |= (1 << 0);
3363                 break;
3364         case 4:
3365                 rdev->config.cik.tile_config |= (2 << 0);
3366                 break;
3367         case 8:
3368         default:
3369                 /* XXX what about 12? */
3370                 rdev->config.cik.tile_config |= (3 << 0);
3371                 break;
3372         }
3373         rdev->config.cik.tile_config |=
3374                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3375         rdev->config.cik.tile_config |=
3376                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3377         rdev->config.cik.tile_config |=
3378                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3379
3380         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3381         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3382         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3383         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3384         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3385         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3386         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3387         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3388
3389         cik_tiling_mode_table_init(rdev);
3390
3391         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3392                      rdev->config.cik.max_sh_per_se,
3393                      rdev->config.cik.max_backends_per_se);
3394
3395         rdev->config.cik.active_cus = 0;
3396         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3397                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3398                         rdev->config.cik.active_cus +=
3399                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3400                 }
3401         }
3402
3403         /* set HW defaults for 3D engine */
3404         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3405
3406         mutex_lock(&rdev->grbm_idx_mutex);
3407         /*
3408          * making sure that the following register writes will be broadcasted
3409          * to all the shaders
3410          */
3411         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3412         WREG32(SX_DEBUG_1, 0x20);
3413
3414         WREG32(TA_CNTL_AUX, 0x00010000);
3415
3416         tmp = RREG32(SPI_CONFIG_CNTL);
3417         tmp |= 0x03000000;
3418         WREG32(SPI_CONFIG_CNTL, tmp);
3419
3420         WREG32(SQ_CONFIG, 1);
3421
3422         WREG32(DB_DEBUG, 0);
3423
3424         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3425         tmp |= 0x00000400;
3426         WREG32(DB_DEBUG2, tmp);
3427
3428         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3429         tmp |= 0x00020200;
3430         WREG32(DB_DEBUG3, tmp);
3431
3432         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3433         tmp |= 0x00018208;
3434         WREG32(CB_HW_CONTROL, tmp);
3435
3436         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3437
3438         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3439                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3440                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3441                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3442
3443         WREG32(VGT_NUM_INSTANCES, 1);
3444
3445         WREG32(CP_PERFMON_CNTL, 0);
3446
3447         WREG32(SQ_CONFIG, 0);
3448
3449         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3450                                           FORCE_EOV_MAX_REZ_CNT(255)));
3451
3452         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3453                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3454
3455         WREG32(VGT_GS_VERTEX_REUSE, 16);
3456         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3457
3458         tmp = RREG32(HDP_MISC_CNTL);
3459         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3460         WREG32(HDP_MISC_CNTL, tmp);
3461
3462         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3463         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3464
3465         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3466         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3467         mutex_unlock(&rdev->grbm_idx_mutex);
3468
3469         udelay(50);
3470 }
3471
3472 /*
3473  * GPU scratch registers helpers function.
3474  */
3475 /**
3476  * cik_scratch_init - setup driver info for CP scratch regs
3477  *
3478  * @rdev: radeon_device pointer
3479  *
3480  * Set up the number and offset of the CP scratch registers.
3481  * NOTE: use of CP scratch registers is a legacy inferface and
3482  * is not used by default on newer asics (r6xx+).  On newer asics,
3483  * memory buffers are used for fences rather than scratch regs.
3484  */
3485 static void cik_scratch_init(struct radeon_device *rdev)
3486 {
3487         int i;
3488
3489         rdev->scratch.num_reg = 7;
3490         rdev->scratch.reg_base = SCRATCH_REG0;
3491         for (i = 0; i < rdev->scratch.num_reg; i++) {
3492                 rdev->scratch.free[i] = true;
3493                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3494         }
3495 }
3496
3497 /**
3498  * cik_ring_test - basic gfx ring test
3499  *
3500  * @rdev: radeon_device pointer
3501  * @ring: radeon_ring structure holding ring information
3502  *
3503  * Allocate a scratch register and write to it using the gfx ring (CIK).
3504  * Provides a basic gfx ring test to verify that the ring is working.
3505  * Used by cik_cp_gfx_resume();
3506  * Returns 0 on success, error on failure.
3507  */
3508 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3509 {
3510         uint32_t scratch;
3511         uint32_t tmp = 0;
3512         unsigned i;
3513         int r;
3514
3515         r = radeon_scratch_get(rdev, &scratch);
3516         if (r) {
3517                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3518                 return r;
3519         }
3520         WREG32(scratch, 0xCAFEDEAD);
3521         r = radeon_ring_lock(rdev, ring, 3);
3522         if (r) {
3523                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3524                 radeon_scratch_free(rdev, scratch);
3525                 return r;
3526         }
3527         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3528         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3529         radeon_ring_write(ring, 0xDEADBEEF);
3530         radeon_ring_unlock_commit(rdev, ring, false);
3531
3532         for (i = 0; i < rdev->usec_timeout; i++) {
3533                 tmp = RREG32(scratch);
3534                 if (tmp == 0xDEADBEEF)
3535                         break;
3536                 DRM_UDELAY(1);
3537         }
3538         if (i < rdev->usec_timeout) {
3539                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3540         } else {
3541                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3542                           ring->idx, scratch, tmp);
3543                 r = -EINVAL;
3544         }
3545         radeon_scratch_free(rdev, scratch);
3546         return r;
3547 }
3548
3549 /**
3550  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3551  *
3552  * @rdev: radeon_device pointer
3553  * @ridx: radeon ring index
3554  *
3555  * Emits an hdp flush on the cp.
3556  */
3557 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3558                                        int ridx)
3559 {
3560         struct radeon_ring *ring = &rdev->ring[ridx];
3561         u32 ref_and_mask;
3562
3563         switch (ring->idx) {
3564         case CAYMAN_RING_TYPE_CP1_INDEX:
3565         case CAYMAN_RING_TYPE_CP2_INDEX:
3566         default:
3567                 switch (ring->me) {
3568                 case 0:
3569                         ref_and_mask = CP2 << ring->pipe;
3570                         break;
3571                 case 1:
3572                         ref_and_mask = CP6 << ring->pipe;
3573                         break;
3574                 default:
3575                         return;
3576                 }
3577                 break;
3578         case RADEON_RING_TYPE_GFX_INDEX:
3579                 ref_and_mask = CP0;
3580                 break;
3581         }
3582
3583         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3584         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3585                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3586                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3587         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3588         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3589         radeon_ring_write(ring, ref_and_mask);
3590         radeon_ring_write(ring, ref_and_mask);
3591         radeon_ring_write(ring, 0x20); /* poll interval */
3592 }
3593
3594 /**
3595  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3596  *
3597  * @rdev: radeon_device pointer
3598  * @fence: radeon fence object
3599  *
3600  * Emits a fence sequnce number on the gfx ring and flushes
3601  * GPU caches.
3602  */
3603 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3604                              struct radeon_fence *fence)
3605 {
3606         struct radeon_ring *ring = &rdev->ring[fence->ring];
3607         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3608
3609         /* Workaround for cache flush problems. First send a dummy EOP
3610          * event down the pipe with seq one below.
3611          */
3612         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3613         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3614                                  EOP_TC_ACTION_EN |
3615                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3616                                  EVENT_INDEX(5)));
3617         radeon_ring_write(ring, addr & 0xfffffffc);
3618         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3619                                 DATA_SEL(1) | INT_SEL(0));
3620         radeon_ring_write(ring, fence->seq - 1);
3621         radeon_ring_write(ring, 0);
3622
3623         /* Then send the real EOP event down the pipe. */
3624         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3625         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3626                                  EOP_TC_ACTION_EN |
3627                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3628                                  EVENT_INDEX(5)));
3629         radeon_ring_write(ring, addr & 0xfffffffc);
3630         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3631         radeon_ring_write(ring, fence->seq);
3632         radeon_ring_write(ring, 0);
3633 }
3634
3635 /**
3636  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3637  *
3638  * @rdev: radeon_device pointer
3639  * @fence: radeon fence object
3640  *
3641  * Emits a fence sequnce number on the compute ring and flushes
3642  * GPU caches.
3643  */
3644 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3645                                  struct radeon_fence *fence)
3646 {
3647         struct radeon_ring *ring = &rdev->ring[fence->ring];
3648         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3649
3650         /* RELEASE_MEM - flush caches, send int */
3651         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3652         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3653                                  EOP_TC_ACTION_EN |
3654                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3655                                  EVENT_INDEX(5)));
3656         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3657         radeon_ring_write(ring, addr & 0xfffffffc);
3658         radeon_ring_write(ring, upper_32_bits(addr));
3659         radeon_ring_write(ring, fence->seq);
3660         radeon_ring_write(ring, 0);
3661 }
3662
3663 /**
3664  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3665  *
3666  * @rdev: radeon_device pointer
3667  * @ring: radeon ring buffer object
3668  * @semaphore: radeon semaphore object
3669  * @emit_wait: Is this a sempahore wait?
3670  *
3671  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3672  * from running ahead of semaphore waits.
3673  */
3674 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3675                              struct radeon_ring *ring,
3676                              struct radeon_semaphore *semaphore,
3677                              bool emit_wait)
3678 {
3679         uint64_t addr = semaphore->gpu_addr;
3680         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3681
3682         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3683         radeon_ring_write(ring, lower_32_bits(addr));
3684         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3685
3686         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3687                 /* Prevent the PFP from running ahead of the semaphore wait */
3688                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3689                 radeon_ring_write(ring, 0x0);
3690         }
3691
3692         return true;
3693 }
3694
3695 /**
3696  * cik_copy_cpdma - copy pages using the CP DMA engine
3697  *
3698  * @rdev: radeon_device pointer
3699  * @src_offset: src GPU address
3700  * @dst_offset: dst GPU address
3701  * @num_gpu_pages: number of GPU pages to xfer
3702  * @resv: reservation object to sync to
3703  *
3704  * Copy GPU paging using the CP DMA engine (CIK+).
3705  * Used by the radeon ttm implementation to move pages if
3706  * registered as the asic copy callback.
3707  */
3708 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3709                                     uint64_t src_offset, uint64_t dst_offset,
3710                                     unsigned num_gpu_pages,
3711                                     struct reservation_object *resv)
3712 {
3713         struct radeon_fence *fence;
3714         struct radeon_sync sync;
3715         int ring_index = rdev->asic->copy.blit_ring_index;
3716         struct radeon_ring *ring = &rdev->ring[ring_index];
3717         u32 size_in_bytes, cur_size_in_bytes, control;
3718         int i, num_loops;
3719         int r = 0;
3720
3721         radeon_sync_create(&sync);
3722
3723         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3724         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3725         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3726         if (r) {
3727                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3728                 radeon_sync_free(rdev, &sync, NULL);
3729                 return ERR_PTR(r);
3730         }
3731
3732         radeon_sync_resv(rdev, &sync, resv, false);
3733         radeon_sync_rings(rdev, &sync, ring->idx);
3734
3735         for (i = 0; i < num_loops; i++) {
3736                 cur_size_in_bytes = size_in_bytes;
3737                 if (cur_size_in_bytes > 0x1fffff)
3738                         cur_size_in_bytes = 0x1fffff;
3739                 size_in_bytes -= cur_size_in_bytes;
3740                 control = 0;
3741                 if (size_in_bytes == 0)
3742                         control |= PACKET3_DMA_DATA_CP_SYNC;
3743                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3744                 radeon_ring_write(ring, control);
3745                 radeon_ring_write(ring, lower_32_bits(src_offset));
3746                 radeon_ring_write(ring, upper_32_bits(src_offset));
3747                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3748                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3749                 radeon_ring_write(ring, cur_size_in_bytes);
3750                 src_offset += cur_size_in_bytes;
3751                 dst_offset += cur_size_in_bytes;
3752         }
3753
3754         r = radeon_fence_emit(rdev, &fence, ring->idx);
3755         if (r) {
3756                 radeon_ring_unlock_undo(rdev, ring);
3757                 radeon_sync_free(rdev, &sync, NULL);
3758                 return ERR_PTR(r);
3759         }
3760
3761         radeon_ring_unlock_commit(rdev, ring, false);
3762         radeon_sync_free(rdev, &sync, fence);
3763
3764         return fence;
3765 }
3766
3767 /*
3768  * IB stuff
3769  */
3770 /**
3771  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3772  *
3773  * @rdev: radeon_device pointer
3774  * @ib: radeon indirect buffer object
3775  *
3776  * Emits a DE (drawing engine) or CE (constant engine) IB
3777  * on the gfx ring.  IBs are usually generated by userspace
3778  * acceleration drivers and submitted to the kernel for
3779  * scheduling on the ring.  This function schedules the IB
3780  * on the gfx ring for execution by the GPU.
3781  */
3782 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3783 {
3784         struct radeon_ring *ring = &rdev->ring[ib->ring];
3785         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3786         u32 header, control = INDIRECT_BUFFER_VALID;
3787
3788         if (ib->is_const_ib) {
3789                 /* set switch buffer packet before const IB */
3790                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3791                 radeon_ring_write(ring, 0);
3792
3793                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3794         } else {
3795                 u32 next_rptr;
3796                 if (ring->rptr_save_reg) {
3797                         next_rptr = ring->wptr + 3 + 4;
3798                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3799                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3800                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3801                         radeon_ring_write(ring, next_rptr);
3802                 } else if (rdev->wb.enabled) {
3803                         next_rptr = ring->wptr + 5 + 4;
3804                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3805                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3806                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3807                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3808                         radeon_ring_write(ring, next_rptr);
3809                 }
3810
3811                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3812         }
3813
3814         control |= ib->length_dw | (vm_id << 24);
3815
3816         radeon_ring_write(ring, header);
3817         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3818         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3819         radeon_ring_write(ring, control);
3820 }
3821
3822 /**
3823  * cik_ib_test - basic gfx ring IB test
3824  *
3825  * @rdev: radeon_device pointer
3826  * @ring: radeon_ring structure holding ring information
3827  *
3828  * Allocate an IB and execute it on the gfx ring (CIK).
3829  * Provides a basic gfx ring test to verify that IBs are working.
3830  * Returns 0 on success, error on failure.
3831  */
3832 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3833 {
3834         struct radeon_ib ib;
3835         uint32_t scratch;
3836         uint32_t tmp = 0;
3837         unsigned i;
3838         int r;
3839
3840         r = radeon_scratch_get(rdev, &scratch);
3841         if (r) {
3842                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3843                 return r;
3844         }
3845         WREG32(scratch, 0xCAFEDEAD);
3846         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3847         if (r) {
3848                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3849                 radeon_scratch_free(rdev, scratch);
3850                 return r;
3851         }
3852         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3853         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3854         ib.ptr[2] = 0xDEADBEEF;
3855         ib.length_dw = 3;
3856         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3857         if (r) {
3858                 radeon_scratch_free(rdev, scratch);
3859                 radeon_ib_free(rdev, &ib);
3860                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3861                 return r;
3862         }
3863         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3864                 RADEON_USEC_IB_TEST_TIMEOUT));
3865         if (r < 0) {
3866                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3867                 radeon_scratch_free(rdev, scratch);
3868                 radeon_ib_free(rdev, &ib);
3869                 return r;
3870         } else if (r == 0) {
3871                 DRM_ERROR("radeon: fence wait timed out.\n");
3872                 radeon_scratch_free(rdev, scratch);
3873                 radeon_ib_free(rdev, &ib);
3874                 return -ETIMEDOUT;
3875         }
3876         r = 0;
3877         for (i = 0; i < rdev->usec_timeout; i++) {
3878                 tmp = RREG32(scratch);
3879                 if (tmp == 0xDEADBEEF)
3880                         break;
3881                 DRM_UDELAY(1);
3882         }
3883         if (i < rdev->usec_timeout) {
3884                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3885         } else {
3886                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3887                           scratch, tmp);
3888                 r = -EINVAL;
3889         }
3890         radeon_scratch_free(rdev, scratch);
3891         radeon_ib_free(rdev, &ib);
3892         return r;
3893 }
3894
3895 /*
3896  * CP.
3897  * On CIK, gfx and compute now have independant command processors.
3898  *
3899  * GFX
3900  * Gfx consists of a single ring and can process both gfx jobs and
3901  * compute jobs.  The gfx CP consists of three microengines (ME):
3902  * PFP - Pre-Fetch Parser
3903  * ME - Micro Engine
3904  * CE - Constant Engine
3905  * The PFP and ME make up what is considered the Drawing Engine (DE).
3906  * The CE is an asynchronous engine used for updating buffer desciptors
3907  * used by the DE so that they can be loaded into cache in parallel
3908  * while the DE is processing state update packets.
3909  *
3910  * Compute
3911  * The compute CP consists of two microengines (ME):
3912  * MEC1 - Compute MicroEngine 1
3913  * MEC2 - Compute MicroEngine 2
3914  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3915  * The queues are exposed to userspace and are programmed directly
3916  * by the compute runtime.
3917  */
3918 /**
3919  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3920  *
3921  * @rdev: radeon_device pointer
3922  * @enable: enable or disable the MEs
3923  *
3924  * Halts or unhalts the gfx MEs.
3925  */
3926 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3927 {
3928         if (enable)
3929                 WREG32(CP_ME_CNTL, 0);
3930         else {
3931                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3932                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3933                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3934                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3935         }
3936         udelay(50);
3937 }
3938
3939 /**
3940  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3941  *
3942  * @rdev: radeon_device pointer
3943  *
3944  * Loads the gfx PFP, ME, and CE ucode.
3945  * Returns 0 for success, -EINVAL if the ucode is not available.
3946  */
3947 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3948 {
3949         int i;
3950
3951         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3952                 return -EINVAL;
3953
3954         cik_cp_gfx_enable(rdev, false);
3955
3956         if (rdev->new_fw) {
3957                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3958                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3959                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3960                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3961                 const struct gfx_firmware_header_v1_0 *me_hdr =
3962                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3963                 const __le32 *fw_data;
3964                 u32 fw_size;
3965
3966                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3967                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3968                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3969
3970                 /* PFP */
3971                 fw_data = (const __le32 *)
3972                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3973                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3974                 WREG32(CP_PFP_UCODE_ADDR, 0);
3975                 for (i = 0; i < fw_size; i++)
3976                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3977                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3978
3979                 /* CE */
3980                 fw_data = (const __le32 *)
3981                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3982                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3983                 WREG32(CP_CE_UCODE_ADDR, 0);
3984                 for (i = 0; i < fw_size; i++)
3985                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3986                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3987
3988                 /* ME */
3989                 fw_data = (const __be32 *)
3990                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3991                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3992                 WREG32(CP_ME_RAM_WADDR, 0);
3993                 for (i = 0; i < fw_size; i++)
3994                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3995                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3996                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3997         } else {
3998                 const __be32 *fw_data;
3999
4000                 /* PFP */
4001                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4002                 WREG32(CP_PFP_UCODE_ADDR, 0);
4003                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4004                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4005                 WREG32(CP_PFP_UCODE_ADDR, 0);
4006
4007                 /* CE */
4008                 fw_data = (const __be32 *)rdev->ce_fw->data;
4009                 WREG32(CP_CE_UCODE_ADDR, 0);
4010                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4011                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4012                 WREG32(CP_CE_UCODE_ADDR, 0);
4013
4014                 /* ME */
4015                 fw_data = (const __be32 *)rdev->me_fw->data;
4016                 WREG32(CP_ME_RAM_WADDR, 0);
4017                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4018                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4019                 WREG32(CP_ME_RAM_WADDR, 0);
4020         }
4021
4022         return 0;
4023 }
4024
4025 /**
4026  * cik_cp_gfx_start - start the gfx ring
4027  *
4028  * @rdev: radeon_device pointer
4029  *
4030  * Enables the ring and loads the clear state context and other
4031  * packets required to init the ring.
4032  * Returns 0 for success, error for failure.
4033  */
4034 static int cik_cp_gfx_start(struct radeon_device *rdev)
4035 {
4036         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4037         int r, i;
4038
4039         /* init the CP */
4040         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4041         WREG32(CP_ENDIAN_SWAP, 0);
4042         WREG32(CP_DEVICE_ID, 1);
4043
4044         cik_cp_gfx_enable(rdev, true);
4045
4046         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4047         if (r) {
4048                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4049                 return r;
4050         }
4051
4052         /* init the CE partitions.  CE only used for gfx on CIK */
4053         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4054         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4055         radeon_ring_write(ring, 0x8000);
4056         radeon_ring_write(ring, 0x8000);
4057
4058         /* setup clear context state */
4059         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4060         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4061
4062         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4063         radeon_ring_write(ring, 0x80000000);
4064         radeon_ring_write(ring, 0x80000000);
4065
4066         for (i = 0; i < cik_default_size; i++)
4067                 radeon_ring_write(ring, cik_default_state[i]);
4068
4069         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4070         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4071
4072         /* set clear context state */
4073         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4074         radeon_ring_write(ring, 0);
4075
4076         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4077         radeon_ring_write(ring, 0x00000316);
4078         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4079         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4080
4081         radeon_ring_unlock_commit(rdev, ring, false);
4082
4083         return 0;
4084 }
4085
4086 /**
4087  * cik_cp_gfx_fini - stop the gfx ring
4088  *
4089  * @rdev: radeon_device pointer
4090  *
4091  * Stop the gfx ring and tear down the driver ring
4092  * info.
4093  */
4094 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4095 {
4096         cik_cp_gfx_enable(rdev, false);
4097         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4098 }
4099
4100 /**
4101  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4102  *
4103  * @rdev: radeon_device pointer
4104  *
4105  * Program the location and size of the gfx ring buffer
4106  * and test it to make sure it's working.
4107  * Returns 0 for success, error for failure.
4108  */
4109 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4110 {
4111         struct radeon_ring *ring;
4112         u32 tmp;
4113         u32 rb_bufsz;
4114         u64 rb_addr;
4115         int r;
4116
4117         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4118         if (rdev->family != CHIP_HAWAII)
4119                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4120
4121         /* Set the write pointer delay */
4122         WREG32(CP_RB_WPTR_DELAY, 0);
4123
4124         /* set the RB to use vmid 0 */
4125         WREG32(CP_RB_VMID, 0);
4126
4127         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4128
4129         /* ring 0 - compute and gfx */
4130         /* Set ring buffer size */
4131         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4132         rb_bufsz = order_base_2(ring->ring_size / 8);
4133         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4134 #ifdef __BIG_ENDIAN
4135         tmp |= BUF_SWAP_32BIT;
4136 #endif
4137         WREG32(CP_RB0_CNTL, tmp);
4138
4139         /* Initialize the ring buffer's read and write pointers */
4140         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4141         ring->wptr = 0;
4142         WREG32(CP_RB0_WPTR, ring->wptr);
4143
4144         /* set the wb address wether it's enabled or not */
4145         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4146         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4147
4148         /* scratch register shadowing is no longer supported */
4149         WREG32(SCRATCH_UMSK, 0);
4150
4151         if (!rdev->wb.enabled)
4152                 tmp |= RB_NO_UPDATE;
4153
4154         mdelay(1);
4155         WREG32(CP_RB0_CNTL, tmp);
4156
4157         rb_addr = ring->gpu_addr >> 8;
4158         WREG32(CP_RB0_BASE, rb_addr);
4159         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4160
4161         /* start the ring */
4162         cik_cp_gfx_start(rdev);
4163         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4164         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4165         if (r) {
4166                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4167                 return r;
4168         }
4169
4170         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4171                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4172
4173         return 0;
4174 }
4175
4176 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4177                      struct radeon_ring *ring)
4178 {
4179         u32 rptr;
4180
4181         if (rdev->wb.enabled)
4182                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4183         else
4184                 rptr = RREG32(CP_RB0_RPTR);
4185
4186         return rptr;
4187 }
4188
4189 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4190                      struct radeon_ring *ring)
4191 {
4192         u32 wptr;
4193
4194         wptr = RREG32(CP_RB0_WPTR);
4195
4196         return wptr;
4197 }
4198
4199 void cik_gfx_set_wptr(struct radeon_device *rdev,
4200                       struct radeon_ring *ring)
4201 {
4202         WREG32(CP_RB0_WPTR, ring->wptr);
4203         (void)RREG32(CP_RB0_WPTR);
4204 }
4205
4206 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4207                          struct radeon_ring *ring)
4208 {
4209         u32 rptr;
4210
4211         if (rdev->wb.enabled) {
4212                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4213         } else {
4214                 mutex_lock(&rdev->srbm_mutex);
4215                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4216                 rptr = RREG32(CP_HQD_PQ_RPTR);
4217                 cik_srbm_select(rdev, 0, 0, 0, 0);
4218                 mutex_unlock(&rdev->srbm_mutex);
4219         }
4220
4221         return rptr;
4222 }
4223
4224 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4225                          struct radeon_ring *ring)
4226 {
4227         u32 wptr;
4228
4229         if (rdev->wb.enabled) {
4230                 /* XXX check if swapping is necessary on BE */
4231                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4232         } else {
4233                 mutex_lock(&rdev->srbm_mutex);
4234                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4235                 wptr = RREG32(CP_HQD_PQ_WPTR);
4236                 cik_srbm_select(rdev, 0, 0, 0, 0);
4237                 mutex_unlock(&rdev->srbm_mutex);
4238         }
4239
4240         return wptr;
4241 }
4242
4243 void cik_compute_set_wptr(struct radeon_device *rdev,
4244                           struct radeon_ring *ring)
4245 {
4246         /* XXX check if swapping is necessary on BE */
4247         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4248         WDOORBELL32(ring->doorbell_index, ring->wptr);
4249 }
4250
4251 static void cik_compute_stop(struct radeon_device *rdev,
4252                              struct radeon_ring *ring)
4253 {
4254         u32 j, tmp;
4255
4256         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4257         /* Disable wptr polling. */
4258         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4259         tmp &= ~WPTR_POLL_EN;
4260         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4261         /* Disable HQD. */
4262         if (RREG32(CP_HQD_ACTIVE) & 1) {
4263                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4264                 for (j = 0; j < rdev->usec_timeout; j++) {
4265                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4266                                 break;
4267                         udelay(1);
4268                 }
4269                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4270                 WREG32(CP_HQD_PQ_RPTR, 0);
4271                 WREG32(CP_HQD_PQ_WPTR, 0);
4272         }
4273         cik_srbm_select(rdev, 0, 0, 0, 0);
4274 }
4275
4276 /**
4277  * cik_cp_compute_enable - enable/disable the compute CP MEs
4278  *
4279  * @rdev: radeon_device pointer
4280  * @enable: enable or disable the MEs
4281  *
4282  * Halts or unhalts the compute MEs.
4283  */
4284 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4285 {
4286         if (enable)
4287                 WREG32(CP_MEC_CNTL, 0);
4288         else {
4289                 /*
4290                  * To make hibernation reliable we need to clear compute ring
4291                  * configuration before halting the compute ring.
4292                  */
4293                 mutex_lock(&rdev->srbm_mutex);
4294                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4295                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4296                 mutex_unlock(&rdev->srbm_mutex);
4297
4298                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4299                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4300                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4301         }
4302         udelay(50);
4303 }
4304
4305 /**
4306  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4307  *
4308  * @rdev: radeon_device pointer
4309  *
4310  * Loads the compute MEC1&2 ucode.
4311  * Returns 0 for success, -EINVAL if the ucode is not available.
4312  */
4313 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4314 {
4315         int i;
4316
4317         if (!rdev->mec_fw)
4318                 return -EINVAL;
4319
4320         cik_cp_compute_enable(rdev, false);
4321
4322         if (rdev->new_fw) {
4323                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4324                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4325                 const __le32 *fw_data;
4326                 u32 fw_size;
4327
4328                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4329
4330                 /* MEC1 */
4331                 fw_data = (const __le32 *)
4332                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4333                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4334                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335                 for (i = 0; i < fw_size; i++)
4336                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4337                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4338
4339                 /* MEC2 */
4340                 if (rdev->family == CHIP_KAVERI) {
4341                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4342                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4343
4344                         fw_data = (const __le32 *)
4345                                 (rdev->mec2_fw->data +
4346                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4347                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4348                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4349                         for (i = 0; i < fw_size; i++)
4350                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4351                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4352                 }
4353         } else {
4354                 const __be32 *fw_data;
4355
4356                 /* MEC1 */
4357                 fw_data = (const __be32 *)rdev->mec_fw->data;
4358                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4359                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4360                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4361                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4362
4363                 if (rdev->family == CHIP_KAVERI) {
4364                         /* MEC2 */
4365                         fw_data = (const __be32 *)rdev->mec_fw->data;
4366                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4367                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4368                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4369                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4370                 }
4371         }
4372
4373         return 0;
4374 }
4375
4376 /**
4377  * cik_cp_compute_start - start the compute queues
4378  *
4379  * @rdev: radeon_device pointer
4380  *
4381  * Enable the compute queues.
4382  * Returns 0 for success, error for failure.
4383  */
4384 static int cik_cp_compute_start(struct radeon_device *rdev)
4385 {
4386         cik_cp_compute_enable(rdev, true);
4387
4388         return 0;
4389 }
4390
4391 /**
4392  * cik_cp_compute_fini - stop the compute queues
4393  *
4394  * @rdev: radeon_device pointer
4395  *
4396  * Stop the compute queues and tear down the driver queue
4397  * info.
4398  */
4399 static void cik_cp_compute_fini(struct radeon_device *rdev)
4400 {
4401         int i, idx, r;
4402
4403         cik_cp_compute_enable(rdev, false);
4404
4405         for (i = 0; i < 2; i++) {
4406                 if (i == 0)
4407                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4408                 else
4409                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4410
4411                 if (rdev->ring[idx].mqd_obj) {
4412                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4413                         if (unlikely(r != 0))
4414                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4415
4416                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4417                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4418
4419                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4420                         rdev->ring[idx].mqd_obj = NULL;
4421                 }
4422         }
4423 }
4424
4425 static void cik_mec_fini(struct radeon_device *rdev)
4426 {
4427         int r;
4428
4429         if (rdev->mec.hpd_eop_obj) {
4430                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4431                 if (unlikely(r != 0))
4432                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4433                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4434                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4435
4436                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4437                 rdev->mec.hpd_eop_obj = NULL;
4438         }
4439 }
4440
4441 #define MEC_HPD_SIZE 2048
4442
4443 static int cik_mec_init(struct radeon_device *rdev)
4444 {
4445         int r;
4446         u32 *hpd;
4447
4448         /*
4449          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4450          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4451          * Nonetheless, we assign only 1 pipe because all other pipes will
4452          * be handled by KFD
4453          */
4454         rdev->mec.num_mec = 1;
4455         rdev->mec.num_pipe = 1;
4456         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4457
4458         if (rdev->mec.hpd_eop_obj == NULL) {
4459                 r = radeon_bo_create(rdev,
4460                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4461                                      PAGE_SIZE, true,
4462                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4463                                      &rdev->mec.hpd_eop_obj);
4464                 if (r) {
4465                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4466                         return r;
4467                 }
4468         }
4469
4470         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4471         if (unlikely(r != 0)) {
4472                 cik_mec_fini(rdev);
4473                 return r;
4474         }
4475         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4476                           &rdev->mec.hpd_eop_gpu_addr);
4477         if (r) {
4478                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4479                 cik_mec_fini(rdev);
4480                 return r;
4481         }
4482         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4483         if (r) {
4484                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4485                 cik_mec_fini(rdev);
4486                 return r;
4487         }
4488
4489         /* clear memory.  Not sure if this is required or not */
4490         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4491
4492         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4493         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4494
4495         return 0;
4496 }
4497
4498 struct hqd_registers
4499 {
4500         u32 cp_mqd_base_addr;
4501         u32 cp_mqd_base_addr_hi;
4502         u32 cp_hqd_active;
4503         u32 cp_hqd_vmid;
4504         u32 cp_hqd_persistent_state;
4505         u32 cp_hqd_pipe_priority;
4506         u32 cp_hqd_queue_priority;
4507         u32 cp_hqd_quantum;
4508         u32 cp_hqd_pq_base;
4509         u32 cp_hqd_pq_base_hi;
4510         u32 cp_hqd_pq_rptr;
4511         u32 cp_hqd_pq_rptr_report_addr;
4512         u32 cp_hqd_pq_rptr_report_addr_hi;
4513         u32 cp_hqd_pq_wptr_poll_addr;
4514         u32 cp_hqd_pq_wptr_poll_addr_hi;
4515         u32 cp_hqd_pq_doorbell_control;
4516         u32 cp_hqd_pq_wptr;
4517         u32 cp_hqd_pq_control;
4518         u32 cp_hqd_ib_base_addr;
4519         u32 cp_hqd_ib_base_addr_hi;
4520         u32 cp_hqd_ib_rptr;
4521         u32 cp_hqd_ib_control;
4522         u32 cp_hqd_iq_timer;
4523         u32 cp_hqd_iq_rptr;
4524         u32 cp_hqd_dequeue_request;
4525         u32 cp_hqd_dma_offload;
4526         u32 cp_hqd_sema_cmd;
4527         u32 cp_hqd_msg_type;
4528         u32 cp_hqd_atomic0_preop_lo;
4529         u32 cp_hqd_atomic0_preop_hi;
4530         u32 cp_hqd_atomic1_preop_lo;
4531         u32 cp_hqd_atomic1_preop_hi;
4532         u32 cp_hqd_hq_scheduler0;
4533         u32 cp_hqd_hq_scheduler1;
4534         u32 cp_mqd_control;
4535 };
4536
4537 struct bonaire_mqd
4538 {
4539         u32 header;
4540         u32 dispatch_initiator;
4541         u32 dimensions[3];
4542         u32 start_idx[3];
4543         u32 num_threads[3];
4544         u32 pipeline_stat_enable;
4545         u32 perf_counter_enable;
4546         u32 pgm[2];
4547         u32 tba[2];
4548         u32 tma[2];
4549         u32 pgm_rsrc[2];
4550         u32 vmid;
4551         u32 resource_limits;
4552         u32 static_thread_mgmt01[2];
4553         u32 tmp_ring_size;
4554         u32 static_thread_mgmt23[2];
4555         u32 restart[3];
4556         u32 thread_trace_enable;
4557         u32 reserved1;
4558         u32 user_data[16];
4559         u32 vgtcs_invoke_count[2];
4560         struct hqd_registers queue_state;
4561         u32 dequeue_cntr;
4562         u32 interrupt_queue[64];
4563 };
4564
4565 /**
4566  * cik_cp_compute_resume - setup the compute queue registers
4567  *
4568  * @rdev: radeon_device pointer
4569  *
4570  * Program the compute queues and test them to make sure they
4571  * are working.
4572  * Returns 0 for success, error for failure.
4573  */
4574 static int cik_cp_compute_resume(struct radeon_device *rdev)
4575 {
4576         int r, i, j, idx;
4577         u32 tmp;
4578         bool use_doorbell = true;
4579         u64 hqd_gpu_addr;
4580         u64 mqd_gpu_addr;
4581         u64 eop_gpu_addr;
4582         u64 wb_gpu_addr;
4583         u32 *buf;
4584         struct bonaire_mqd *mqd;
4585
4586         r = cik_cp_compute_start(rdev);
4587         if (r)
4588                 return r;
4589
4590         /* fix up chicken bits */
4591         tmp = RREG32(CP_CPF_DEBUG);
4592         tmp |= (1 << 23);
4593         WREG32(CP_CPF_DEBUG, tmp);
4594
4595         /* init the pipes */
4596         mutex_lock(&rdev->srbm_mutex);
4597
4598         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4599
4600         cik_srbm_select(rdev, 0, 0, 0, 0);
4601
4602         /* write the EOP addr */
4603         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4604         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4605
4606         /* set the VMID assigned */
4607         WREG32(CP_HPD_EOP_VMID, 0);
4608
4609         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4610         tmp = RREG32(CP_HPD_EOP_CONTROL);
4611         tmp &= ~EOP_SIZE_MASK;
4612         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4613         WREG32(CP_HPD_EOP_CONTROL, tmp);
4614
4615         mutex_unlock(&rdev->srbm_mutex);
4616
4617         /* init the queues.  Just two for now. */
4618         for (i = 0; i < 2; i++) {
4619                 if (i == 0)
4620                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4621                 else
4622                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4623
4624                 if (rdev->ring[idx].mqd_obj == NULL) {
4625                         r = radeon_bo_create(rdev,
4626                                              sizeof(struct bonaire_mqd),
4627                                              PAGE_SIZE, true,
4628                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4629                                              NULL, &rdev->ring[idx].mqd_obj);
4630                         if (r) {
4631                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4632                                 return r;
4633                         }
4634                 }
4635
4636                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4637                 if (unlikely(r != 0)) {
4638                         cik_cp_compute_fini(rdev);
4639                         return r;
4640                 }
4641                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4642                                   &mqd_gpu_addr);
4643                 if (r) {
4644                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4645                         cik_cp_compute_fini(rdev);
4646                         return r;
4647                 }
4648                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4649                 if (r) {
4650                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4651                         cik_cp_compute_fini(rdev);
4652                         return r;
4653                 }
4654
4655                 /* init the mqd struct */
4656                 memset(buf, 0, sizeof(struct bonaire_mqd));
4657
4658                 mqd = (struct bonaire_mqd *)buf;
4659                 mqd->header = 0xC0310800;
4660                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4661                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4662                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4663                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4664
4665                 mutex_lock(&rdev->srbm_mutex);
4666                 cik_srbm_select(rdev, rdev->ring[idx].me,
4667                                 rdev->ring[idx].pipe,
4668                                 rdev->ring[idx].queue, 0);
4669
4670                 /* disable wptr polling */
4671                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4672                 tmp &= ~WPTR_POLL_EN;
4673                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4674
4675                 /* enable doorbell? */
4676                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4677                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4678                 if (use_doorbell)
4679                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4680                 else
4681                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4682                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4683                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4684
4685                 /* disable the queue if it's active */
4686                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4687                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4688                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4689                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4690                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4691                         for (j = 0; j < rdev->usec_timeout; j++) {
4692                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4693                                         break;
4694                                 udelay(1);
4695                         }
4696                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4697                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4698                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4699                 }
4700
4701                 /* set the pointer to the MQD */
4702                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4703                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4704                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4705                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4706                 /* set MQD vmid to 0 */
4707                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4708                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4709                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4710
4711                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4712                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4713                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4714                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4715                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4716                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4717
4718                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4719                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4720                 mqd->queue_state.cp_hqd_pq_control &=
4721                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4722
4723                 mqd->queue_state.cp_hqd_pq_control |=
4724                         order_base_2(rdev->ring[idx].ring_size / 8);
4725                 mqd->queue_state.cp_hqd_pq_control |=
4726                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4727 #ifdef __BIG_ENDIAN
4728                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4729 #endif
4730                 mqd->queue_state.cp_hqd_pq_control &=
4731                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4732                 mqd->queue_state.cp_hqd_pq_control |=
4733                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4734                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4735
4736                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4737                 if (i == 0)
4738                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4739                 else
4740                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4741                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4742                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4743                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4744                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4745                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4746
4747                 /* set the wb address wether it's enabled or not */
4748                 if (i == 0)
4749                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4750                 else
4751                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4752                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4753                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4754                         upper_32_bits(wb_gpu_addr) & 0xffff;
4755                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4756                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4757                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4758                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4759
4760                 /* enable the doorbell if requested */
4761                 if (use_doorbell) {
4762                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4763                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4764                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4765                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4766                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4767                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4768                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4769                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4770
4771                 } else {
4772                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4773                 }
4774                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4775                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4776
4777                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4778                 rdev->ring[idx].wptr = 0;
4779                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4780                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4781                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4782
4783                 /* set the vmid for the queue */
4784                 mqd->queue_state.cp_hqd_vmid = 0;
4785                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4786
4787                 /* activate the queue */
4788                 mqd->queue_state.cp_hqd_active = 1;
4789                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4790
4791                 cik_srbm_select(rdev, 0, 0, 0, 0);
4792                 mutex_unlock(&rdev->srbm_mutex);
4793
4794                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4795                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4796
4797                 rdev->ring[idx].ready = true;
4798                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4799                 if (r)
4800                         rdev->ring[idx].ready = false;
4801         }
4802
4803         return 0;
4804 }
4805
4806 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4807 {
4808         cik_cp_gfx_enable(rdev, enable);
4809         cik_cp_compute_enable(rdev, enable);
4810 }
4811
4812 static int cik_cp_load_microcode(struct radeon_device *rdev)
4813 {
4814         int r;
4815
4816         r = cik_cp_gfx_load_microcode(rdev);
4817         if (r)
4818                 return r;
4819         r = cik_cp_compute_load_microcode(rdev);
4820         if (r)
4821                 return r;
4822
4823         return 0;
4824 }
4825
4826 static void cik_cp_fini(struct radeon_device *rdev)
4827 {
4828         cik_cp_gfx_fini(rdev);
4829         cik_cp_compute_fini(rdev);
4830 }
4831
4832 static int cik_cp_resume(struct radeon_device *rdev)
4833 {
4834         int r;
4835
4836         cik_enable_gui_idle_interrupt(rdev, false);
4837
4838         r = cik_cp_load_microcode(rdev);
4839         if (r)
4840                 return r;
4841
4842         r = cik_cp_gfx_resume(rdev);
4843         if (r)
4844                 return r;
4845         r = cik_cp_compute_resume(rdev);
4846         if (r)
4847                 return r;
4848
4849         cik_enable_gui_idle_interrupt(rdev, true);
4850
4851         return 0;
4852 }
4853
4854 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4855 {
4856         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4857                 RREG32(GRBM_STATUS));
4858         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4859                 RREG32(GRBM_STATUS2));
4860         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4861                 RREG32(GRBM_STATUS_SE0));
4862         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4863                 RREG32(GRBM_STATUS_SE1));
4864         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4865                 RREG32(GRBM_STATUS_SE2));
4866         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4867                 RREG32(GRBM_STATUS_SE3));
4868         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4869                 RREG32(SRBM_STATUS));
4870         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4871                 RREG32(SRBM_STATUS2));
4872         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4873                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4874         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4875                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4876         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4877         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4878                  RREG32(CP_STALLED_STAT1));
4879         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4880                  RREG32(CP_STALLED_STAT2));
4881         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4882                  RREG32(CP_STALLED_STAT3));
4883         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4884                  RREG32(CP_CPF_BUSY_STAT));
4885         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4886                  RREG32(CP_CPF_STALLED_STAT1));
4887         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4888         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4889         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4890                  RREG32(CP_CPC_STALLED_STAT1));
4891         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4892 }
4893
4894 /**
4895  * cik_gpu_check_soft_reset - check which blocks are busy
4896  *
4897  * @rdev: radeon_device pointer
4898  *
4899  * Check which blocks are busy and return the relevant reset
4900  * mask to be used by cik_gpu_soft_reset().
4901  * Returns a mask of the blocks to be reset.
4902  */
4903 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4904 {
4905         u32 reset_mask = 0;
4906         u32 tmp;
4907
4908         /* GRBM_STATUS */
4909         tmp = RREG32(GRBM_STATUS);
4910         if (tmp & (PA_BUSY | SC_BUSY |
4911                    BCI_BUSY | SX_BUSY |
4912                    TA_BUSY | VGT_BUSY |
4913                    DB_BUSY | CB_BUSY |
4914                    GDS_BUSY | SPI_BUSY |
4915                    IA_BUSY | IA_BUSY_NO_DMA))
4916                 reset_mask |= RADEON_RESET_GFX;
4917
4918         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4919                 reset_mask |= RADEON_RESET_CP;
4920
4921         /* GRBM_STATUS2 */
4922         tmp = RREG32(GRBM_STATUS2);
4923         if (tmp & RLC_BUSY)
4924                 reset_mask |= RADEON_RESET_RLC;
4925
4926         /* SDMA0_STATUS_REG */
4927         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4928         if (!(tmp & SDMA_IDLE))
4929                 reset_mask |= RADEON_RESET_DMA;
4930
4931         /* SDMA1_STATUS_REG */
4932         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4933         if (!(tmp & SDMA_IDLE))
4934                 reset_mask |= RADEON_RESET_DMA1;
4935
4936         /* SRBM_STATUS2 */
4937         tmp = RREG32(SRBM_STATUS2);
4938         if (tmp & SDMA_BUSY)
4939                 reset_mask |= RADEON_RESET_DMA;
4940
4941         if (tmp & SDMA1_BUSY)
4942                 reset_mask |= RADEON_RESET_DMA1;
4943
4944         /* SRBM_STATUS */
4945         tmp = RREG32(SRBM_STATUS);
4946
4947         if (tmp & IH_BUSY)
4948                 reset_mask |= RADEON_RESET_IH;
4949
4950         if (tmp & SEM_BUSY)
4951                 reset_mask |= RADEON_RESET_SEM;
4952
4953         if (tmp & GRBM_RQ_PENDING)
4954                 reset_mask |= RADEON_RESET_GRBM;
4955
4956         if (tmp & VMC_BUSY)
4957                 reset_mask |= RADEON_RESET_VMC;
4958
4959         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4960                    MCC_BUSY | MCD_BUSY))
4961                 reset_mask |= RADEON_RESET_MC;
4962
4963         if (evergreen_is_display_hung(rdev))
4964                 reset_mask |= RADEON_RESET_DISPLAY;
4965
4966         /* Skip MC reset as it's mostly likely not hung, just busy */
4967         if (reset_mask & RADEON_RESET_MC) {
4968                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4969                 reset_mask &= ~RADEON_RESET_MC;
4970         }
4971
4972         return reset_mask;
4973 }
4974
4975 /**
4976  * cik_gpu_soft_reset - soft reset GPU
4977  *
4978  * @rdev: radeon_device pointer
4979  * @reset_mask: mask of which blocks to reset
4980  *
4981  * Soft reset the blocks specified in @reset_mask.
4982  */
4983 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4984 {
4985         struct evergreen_mc_save save;
4986         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4987         u32 tmp;
4988
4989         if (reset_mask == 0)
4990                 return;
4991
4992         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4993
4994         cik_print_gpu_status_regs(rdev);
4995         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4996                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4997         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4998                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4999
5000         /* disable CG/PG */
5001         cik_fini_pg(rdev);
5002         cik_fini_cg(rdev);
5003
5004         /* stop the rlc */
5005         cik_rlc_stop(rdev);
5006
5007         /* Disable GFX parsing/prefetching */
5008         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5009
5010         /* Disable MEC parsing/prefetching */
5011         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5012
5013         if (reset_mask & RADEON_RESET_DMA) {
5014                 /* sdma0 */
5015                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5016                 tmp |= SDMA_HALT;
5017                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5018         }
5019         if (reset_mask & RADEON_RESET_DMA1) {
5020                 /* sdma1 */
5021                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5022                 tmp |= SDMA_HALT;
5023                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5024         }
5025
5026         evergreen_mc_stop(rdev, &save);
5027         if (evergreen_mc_wait_for_idle(rdev)) {
5028                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5029         }
5030
5031         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5032                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5033
5034         if (reset_mask & RADEON_RESET_CP) {
5035                 grbm_soft_reset |= SOFT_RESET_CP;
5036
5037                 srbm_soft_reset |= SOFT_RESET_GRBM;
5038         }
5039
5040         if (reset_mask & RADEON_RESET_DMA)
5041                 srbm_soft_reset |= SOFT_RESET_SDMA;
5042
5043         if (reset_mask & RADEON_RESET_DMA1)
5044                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5045
5046         if (reset_mask & RADEON_RESET_DISPLAY)
5047                 srbm_soft_reset |= SOFT_RESET_DC;
5048
5049         if (reset_mask & RADEON_RESET_RLC)
5050                 grbm_soft_reset |= SOFT_RESET_RLC;
5051
5052         if (reset_mask & RADEON_RESET_SEM)
5053                 srbm_soft_reset |= SOFT_RESET_SEM;
5054
5055         if (reset_mask & RADEON_RESET_IH)
5056                 srbm_soft_reset |= SOFT_RESET_IH;
5057
5058         if (reset_mask & RADEON_RESET_GRBM)
5059                 srbm_soft_reset |= SOFT_RESET_GRBM;
5060
5061         if (reset_mask & RADEON_RESET_VMC)
5062                 srbm_soft_reset |= SOFT_RESET_VMC;
5063
5064         if (!(rdev->flags & RADEON_IS_IGP)) {
5065                 if (reset_mask & RADEON_RESET_MC)
5066                         srbm_soft_reset |= SOFT_RESET_MC;
5067         }
5068
5069         if (grbm_soft_reset) {
5070                 tmp = RREG32(GRBM_SOFT_RESET);
5071                 tmp |= grbm_soft_reset;
5072                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5073                 WREG32(GRBM_SOFT_RESET, tmp);
5074                 tmp = RREG32(GRBM_SOFT_RESET);
5075
5076                 udelay(50);
5077
5078                 tmp &= ~grbm_soft_reset;
5079                 WREG32(GRBM_SOFT_RESET, tmp);
5080                 tmp = RREG32(GRBM_SOFT_RESET);
5081         }
5082
5083         if (srbm_soft_reset) {
5084                 tmp = RREG32(SRBM_SOFT_RESET);
5085                 tmp |= srbm_soft_reset;
5086                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5087                 WREG32(SRBM_SOFT_RESET, tmp);
5088                 tmp = RREG32(SRBM_SOFT_RESET);
5089
5090                 udelay(50);
5091
5092                 tmp &= ~srbm_soft_reset;
5093                 WREG32(SRBM_SOFT_RESET, tmp);
5094                 tmp = RREG32(SRBM_SOFT_RESET);
5095         }
5096
5097         /* Wait a little for things to settle down */
5098         udelay(50);
5099
5100         evergreen_mc_resume(rdev, &save);
5101         udelay(50);
5102
5103         cik_print_gpu_status_regs(rdev);
5104 }
5105
5106 struct kv_reset_save_regs {
5107         u32 gmcon_reng_execute;
5108         u32 gmcon_misc;
5109         u32 gmcon_misc3;
5110 };
5111
5112 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5113                                    struct kv_reset_save_regs *save)
5114 {
5115         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5116         save->gmcon_misc = RREG32(GMCON_MISC);
5117         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5118
5119         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5120         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5121                                                 STCTRL_STUTTER_EN));
5122 }
5123
5124 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5125                                       struct kv_reset_save_regs *save)
5126 {
5127         int i;
5128
5129         WREG32(GMCON_PGFSM_WRITE, 0);
5130         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5131
5132         for (i = 0; i < 5; i++)
5133                 WREG32(GMCON_PGFSM_WRITE, 0);
5134
5135         WREG32(GMCON_PGFSM_WRITE, 0);
5136         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5137
5138         for (i = 0; i < 5; i++)
5139                 WREG32(GMCON_PGFSM_WRITE, 0);
5140
5141         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5142         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5143
5144         for (i = 0; i < 5; i++)
5145                 WREG32(GMCON_PGFSM_WRITE, 0);
5146
5147         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5148         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5149
5150         for (i = 0; i < 5; i++)
5151                 WREG32(GMCON_PGFSM_WRITE, 0);
5152
5153         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5154         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5155
5156         for (i = 0; i < 5; i++)
5157                 WREG32(GMCON_PGFSM_WRITE, 0);
5158
5159         WREG32(GMCON_PGFSM_WRITE, 0);
5160         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5161
5162         for (i = 0; i < 5; i++)
5163                 WREG32(GMCON_PGFSM_WRITE, 0);
5164
5165         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5166         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5167
5168         for (i = 0; i < 5; i++)
5169                 WREG32(GMCON_PGFSM_WRITE, 0);
5170
5171         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5172         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5173
5174         for (i = 0; i < 5; i++)
5175                 WREG32(GMCON_PGFSM_WRITE, 0);
5176
5177         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5178         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5179
5180         for (i = 0; i < 5; i++)
5181                 WREG32(GMCON_PGFSM_WRITE, 0);
5182
5183         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5184         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5185
5186         for (i = 0; i < 5; i++)
5187                 WREG32(GMCON_PGFSM_WRITE, 0);
5188
5189         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5190         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5191
5192         WREG32(GMCON_MISC3, save->gmcon_misc3);
5193         WREG32(GMCON_MISC, save->gmcon_misc);
5194         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5195 }
5196
5197 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5198 {
5199         struct evergreen_mc_save save;
5200         struct kv_reset_save_regs kv_save = { 0 };
5201         u32 tmp, i;
5202
5203         dev_info(rdev->dev, "GPU pci config reset\n");
5204
5205         /* disable dpm? */
5206
5207         /* disable cg/pg */
5208         cik_fini_pg(rdev);
5209         cik_fini_cg(rdev);
5210
5211         /* Disable GFX parsing/prefetching */
5212         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5213
5214         /* Disable MEC parsing/prefetching */
5215         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5216
5217         /* sdma0 */
5218         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5219         tmp |= SDMA_HALT;
5220         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5221         /* sdma1 */
5222         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5223         tmp |= SDMA_HALT;
5224         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5225         /* XXX other engines? */
5226
5227         /* halt the rlc, disable cp internal ints */
5228         cik_rlc_stop(rdev);
5229
5230         udelay(50);
5231
5232         /* disable mem access */
5233         evergreen_mc_stop(rdev, &save);
5234         if (evergreen_mc_wait_for_idle(rdev)) {
5235                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5236         }
5237
5238         if (rdev->flags & RADEON_IS_IGP)
5239                 kv_save_regs_for_reset(rdev, &kv_save);
5240
5241         /* disable BM */
5242         pci_clear_master(rdev->pdev);
5243         /* reset */
5244         radeon_pci_config_reset(rdev);
5245
5246         udelay(100);
5247
5248         /* wait for asic to come out of reset */
5249         for (i = 0; i < rdev->usec_timeout; i++) {
5250                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5251                         break;
5252                 udelay(1);
5253         }
5254
5255         /* does asic init need to be run first??? */
5256         if (rdev->flags & RADEON_IS_IGP)
5257                 kv_restore_regs_for_reset(rdev, &kv_save);
5258 }
5259
5260 /**
5261  * cik_asic_reset - soft reset GPU
5262  *
5263  * @rdev: radeon_device pointer
5264  * @hard: force hard reset
5265  *
5266  * Look up which blocks are hung and attempt
5267  * to reset them.
5268  * Returns 0 for success.
5269  */
5270 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5271 {
5272         u32 reset_mask;
5273
5274         if (hard) {
5275                 cik_gpu_pci_config_reset(rdev);
5276                 return 0;
5277         }
5278
5279         reset_mask = cik_gpu_check_soft_reset(rdev);
5280
5281         if (reset_mask)
5282                 r600_set_bios_scratch_engine_hung(rdev, true);
5283
5284         /* try soft reset */
5285         cik_gpu_soft_reset(rdev, reset_mask);
5286
5287         reset_mask = cik_gpu_check_soft_reset(rdev);
5288
5289         /* try pci config reset */
5290         if (reset_mask && radeon_hard_reset)
5291                 cik_gpu_pci_config_reset(rdev);
5292
5293         reset_mask = cik_gpu_check_soft_reset(rdev);
5294
5295         if (!reset_mask)
5296                 r600_set_bios_scratch_engine_hung(rdev, false);
5297
5298         return 0;
5299 }
5300
5301 /**
5302  * cik_gfx_is_lockup - check if the 3D engine is locked up
5303  *
5304  * @rdev: radeon_device pointer
5305  * @ring: radeon_ring structure holding ring information
5306  *
5307  * Check if the 3D engine is locked up (CIK).
5308  * Returns true if the engine is locked, false if not.
5309  */
5310 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5311 {
5312         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5313
5314         if (!(reset_mask & (RADEON_RESET_GFX |
5315                             RADEON_RESET_COMPUTE |
5316                             RADEON_RESET_CP))) {
5317                 radeon_ring_lockup_update(rdev, ring);
5318                 return false;
5319         }
5320         return radeon_ring_test_lockup(rdev, ring);
5321 }
5322
5323 /* MC */
5324 /**
5325  * cik_mc_program - program the GPU memory controller
5326  *
5327  * @rdev: radeon_device pointer
5328  *
5329  * Set the location of vram, gart, and AGP in the GPU's
5330  * physical address space (CIK).
5331  */
5332 static void cik_mc_program(struct radeon_device *rdev)
5333 {
5334         struct evergreen_mc_save save;
5335         u32 tmp;
5336         int i, j;
5337
5338         /* Initialize HDP */
5339         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5340                 WREG32((0x2c14 + j), 0x00000000);
5341                 WREG32((0x2c18 + j), 0x00000000);
5342                 WREG32((0x2c1c + j), 0x00000000);
5343                 WREG32((0x2c20 + j), 0x00000000);
5344                 WREG32((0x2c24 + j), 0x00000000);
5345         }
5346         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5347
5348         evergreen_mc_stop(rdev, &save);
5349         if (radeon_mc_wait_for_idle(rdev)) {
5350                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5351         }
5352         /* Lockout access through VGA aperture*/
5353         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5354         /* Update configuration */
5355         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5356                rdev->mc.vram_start >> 12);
5357         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5358                rdev->mc.vram_end >> 12);
5359         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5360                rdev->vram_scratch.gpu_addr >> 12);
5361         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5362         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5363         WREG32(MC_VM_FB_LOCATION, tmp);
5364         /* XXX double check these! */
5365         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5366         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5367         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5368         WREG32(MC_VM_AGP_BASE, 0);
5369         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5370         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5371         if (radeon_mc_wait_for_idle(rdev)) {
5372                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5373         }
5374         evergreen_mc_resume(rdev, &save);
5375         /* we need to own VRAM, so turn off the VGA renderer here
5376          * to stop it overwriting our objects */
5377         rv515_vga_render_disable(rdev);
5378 }
5379
5380 /**
5381  * cik_mc_init - initialize the memory controller driver params
5382  *
5383  * @rdev: radeon_device pointer
5384  *
5385  * Look up the amount of vram, vram width, and decide how to place
5386  * vram and gart within the GPU's physical address space (CIK).
5387  * Returns 0 for success.
5388  */
5389 static int cik_mc_init(struct radeon_device *rdev)
5390 {
5391         u32 tmp;
5392         int chansize, numchan;
5393
5394         /* Get VRAM informations */
5395         rdev->mc.vram_is_ddr = true;
5396         tmp = RREG32(MC_ARB_RAMCFG);
5397         if (tmp & CHANSIZE_MASK) {
5398                 chansize = 64;
5399         } else {
5400                 chansize = 32;
5401         }
5402         tmp = RREG32(MC_SHARED_CHMAP);
5403         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5404         case 0:
5405         default:
5406                 numchan = 1;
5407                 break;
5408         case 1:
5409                 numchan = 2;
5410                 break;
5411         case 2:
5412                 numchan = 4;
5413                 break;
5414         case 3:
5415                 numchan = 8;
5416                 break;
5417         case 4:
5418                 numchan = 3;
5419                 break;
5420         case 5:
5421                 numchan = 6;
5422                 break;
5423         case 6:
5424                 numchan = 10;
5425                 break;
5426         case 7:
5427                 numchan = 12;
5428                 break;
5429         case 8:
5430                 numchan = 16;
5431                 break;
5432         }
5433         rdev->mc.vram_width = numchan * chansize;
5434         /* Could aper size report 0 ? */
5435         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5436         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5437         /* size in MB on si */
5438         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5439         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5440         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5441         si_vram_gtt_location(rdev, &rdev->mc);
5442         radeon_update_bandwidth_info(rdev);
5443
5444         return 0;
5445 }
5446
5447 /*
5448  * GART
5449  * VMID 0 is the physical GPU addresses as used by the kernel.
5450  * VMIDs 1-15 are used for userspace clients and are handled
5451  * by the radeon vm/hsa code.
5452  */
5453 /**
5454  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5455  *
5456  * @rdev: radeon_device pointer
5457  *
5458  * Flush the TLB for the VMID 0 page table (CIK).
5459  */
5460 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5461 {
5462         /* flush hdp cache */
5463         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5464
5465         /* bits 0-15 are the VM contexts0-15 */
5466         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5467 }
5468
5469 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5470 {
5471         int i;
5472         uint32_t sh_mem_bases, sh_mem_config;
5473
5474         sh_mem_bases = 0x6000 | 0x6000 << 16;
5475         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5476         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5477
5478         mutex_lock(&rdev->srbm_mutex);
5479         for (i = 8; i < 16; i++) {
5480                 cik_srbm_select(rdev, 0, 0, 0, i);
5481                 /* CP and shaders */
5482                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5483                 WREG32(SH_MEM_APE1_BASE, 1);
5484                 WREG32(SH_MEM_APE1_LIMIT, 0);
5485                 WREG32(SH_MEM_BASES, sh_mem_bases);
5486         }
5487         cik_srbm_select(rdev, 0, 0, 0, 0);
5488         mutex_unlock(&rdev->srbm_mutex);
5489 }
5490
5491 /**
5492  * cik_pcie_gart_enable - gart enable
5493  *
5494  * @rdev: radeon_device pointer
5495  *
5496  * This sets up the TLBs, programs the page tables for VMID0,
5497  * sets up the hw for VMIDs 1-15 which are allocated on
5498  * demand, and sets up the global locations for the LDS, GDS,
5499  * and GPUVM for FSA64 clients (CIK).
5500  * Returns 0 for success, errors for failure.
5501  */
5502 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5503 {
5504         int r, i;
5505
5506         if (rdev->gart.robj == NULL) {
5507                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5508                 return -EINVAL;
5509         }
5510         r = radeon_gart_table_vram_pin(rdev);
5511         if (r)
5512                 return r;
5513         /* Setup TLB control */
5514         WREG32(MC_VM_MX_L1_TLB_CNTL,
5515                (0xA << 7) |
5516                ENABLE_L1_TLB |
5517                ENABLE_L1_FRAGMENT_PROCESSING |
5518                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5519                ENABLE_ADVANCED_DRIVER_MODEL |
5520                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5521         /* Setup L2 cache */
5522         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5523                ENABLE_L2_FRAGMENT_PROCESSING |
5524                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5525                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5526                EFFECTIVE_L2_QUEUE_SIZE(7) |
5527                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5528         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5529         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5530                BANK_SELECT(4) |
5531                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5532         /* setup context0 */
5533         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5534         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5535         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5536         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5537                         (u32)(rdev->dummy_page.addr >> 12));
5538         WREG32(VM_CONTEXT0_CNTL2, 0);
5539         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5540                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5541
5542         WREG32(0x15D4, 0);
5543         WREG32(0x15D8, 0);
5544         WREG32(0x15DC, 0);
5545
5546         /* restore context1-15 */
5547         /* set vm size, must be a multiple of 4 */
5548         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5549         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5550         for (i = 1; i < 16; i++) {
5551                 if (i < 8)
5552                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5553                                rdev->vm_manager.saved_table_addr[i]);
5554                 else
5555                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5556                                rdev->vm_manager.saved_table_addr[i]);
5557         }
5558
5559         /* enable context1-15 */
5560         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5561                (u32)(rdev->dummy_page.addr >> 12));
5562         WREG32(VM_CONTEXT1_CNTL2, 4);
5563         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5564                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5565                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5566                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5567                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5568                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5569                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5570                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5571                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5572                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5573                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5574                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5575                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5576                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5577
5578         if (rdev->family == CHIP_KAVERI) {
5579                 u32 tmp = RREG32(CHUB_CONTROL);
5580                 tmp &= ~BYPASS_VM;
5581                 WREG32(CHUB_CONTROL, tmp);
5582         }
5583
5584         /* XXX SH_MEM regs */
5585         /* where to put LDS, scratch, GPUVM in FSA64 space */
5586         mutex_lock(&rdev->srbm_mutex);
5587         for (i = 0; i < 16; i++) {
5588                 cik_srbm_select(rdev, 0, 0, 0, i);
5589                 /* CP and shaders */
5590                 WREG32(SH_MEM_CONFIG, 0);
5591                 WREG32(SH_MEM_APE1_BASE, 1);
5592                 WREG32(SH_MEM_APE1_LIMIT, 0);
5593                 WREG32(SH_MEM_BASES, 0);
5594                 /* SDMA GFX */
5595                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5596                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5597                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5598                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5599                 /* XXX SDMA RLC - todo */
5600         }
5601         cik_srbm_select(rdev, 0, 0, 0, 0);
5602         mutex_unlock(&rdev->srbm_mutex);
5603
5604         cik_pcie_init_compute_vmid(rdev);
5605
5606         cik_pcie_gart_tlb_flush(rdev);
5607         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5608                  (unsigned)(rdev->mc.gtt_size >> 20),
5609                  (unsigned long long)rdev->gart.table_addr);
5610         rdev->gart.ready = true;
5611         return 0;
5612 }
5613
5614 /**
5615  * cik_pcie_gart_disable - gart disable
5616  *
5617  * @rdev: radeon_device pointer
5618  *
5619  * This disables all VM page table (CIK).
5620  */
5621 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5622 {
5623         unsigned i;
5624
5625         for (i = 1; i < 16; ++i) {
5626                 uint32_t reg;
5627                 if (i < 8)
5628                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5629                 else
5630                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5631                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5632         }
5633
5634         /* Disable all tables */
5635         WREG32(VM_CONTEXT0_CNTL, 0);
5636         WREG32(VM_CONTEXT1_CNTL, 0);
5637         /* Setup TLB control */
5638         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5639                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5640         /* Setup L2 cache */
5641         WREG32(VM_L2_CNTL,
5642                ENABLE_L2_FRAGMENT_PROCESSING |
5643                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5644                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5645                EFFECTIVE_L2_QUEUE_SIZE(7) |
5646                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5647         WREG32(VM_L2_CNTL2, 0);
5648         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5649                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5650         radeon_gart_table_vram_unpin(rdev);
5651 }
5652
5653 /**
5654  * cik_pcie_gart_fini - vm fini callback
5655  *
5656  * @rdev: radeon_device pointer
5657  *
5658  * Tears down the driver GART/VM setup (CIK).
5659  */
5660 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5661 {
5662         cik_pcie_gart_disable(rdev);
5663         radeon_gart_table_vram_free(rdev);
5664         radeon_gart_fini(rdev);
5665 }
5666
5667 /* vm parser */
5668 /**
5669  * cik_ib_parse - vm ib_parse callback
5670  *
5671  * @rdev: radeon_device pointer
5672  * @ib: indirect buffer pointer
5673  *
5674  * CIK uses hw IB checking so this is a nop (CIK).
5675  */
5676 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5677 {
5678         return 0;
5679 }
5680
5681 /*
5682  * vm
5683  * VMID 0 is the physical GPU addresses as used by the kernel.
5684  * VMIDs 1-15 are used for userspace clients and are handled
5685  * by the radeon vm/hsa code.
5686  */
5687 /**
5688  * cik_vm_init - cik vm init callback
5689  *
5690  * @rdev: radeon_device pointer
5691  *
5692  * Inits cik specific vm parameters (number of VMs, base of vram for
5693  * VMIDs 1-15) (CIK).
5694  * Returns 0 for success.
5695  */
5696 int cik_vm_init(struct radeon_device *rdev)
5697 {
5698         /*
5699          * number of VMs
5700          * VMID 0 is reserved for System
5701          * radeon graphics/compute will use VMIDs 1-7
5702          * amdkfd will use VMIDs 8-15
5703          */
5704         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5705         /* base offset of vram pages */
5706         if (rdev->flags & RADEON_IS_IGP) {
5707                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5708                 tmp <<= 22;
5709                 rdev->vm_manager.vram_base_offset = tmp;
5710         } else
5711                 rdev->vm_manager.vram_base_offset = 0;
5712
5713         return 0;
5714 }
5715
5716 /**
5717  * cik_vm_fini - cik vm fini callback
5718  *
5719  * @rdev: radeon_device pointer
5720  *
5721  * Tear down any asic specific VM setup (CIK).
5722  */
5723 void cik_vm_fini(struct radeon_device *rdev)
5724 {
5725 }
5726
5727 /**
5728  * cik_vm_decode_fault - print human readable fault info
5729  *
5730  * @rdev: radeon_device pointer
5731  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5732  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5733  *
5734  * Print human readable fault information (CIK).
5735  */
5736 static void cik_vm_decode_fault(struct radeon_device *rdev,
5737                                 u32 status, u32 addr, u32 mc_client)
5738 {
5739         u32 mc_id;
5740         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5741         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5742         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5743                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5744
5745         if (rdev->family == CHIP_HAWAII)
5746                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5747         else
5748                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5749
5750         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5751                protections, vmid, addr,
5752                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5753                block, mc_client, mc_id);
5754 }
5755
5756 /**
5757  * cik_vm_flush - cik vm flush using the CP
5758  *
5759  * @rdev: radeon_device pointer
5760  *
5761  * Update the page table base and flush the VM TLB
5762  * using the CP (CIK).
5763  */
5764 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5765                   unsigned vm_id, uint64_t pd_addr)
5766 {
5767         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5768
5769         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5770         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5771                                  WRITE_DATA_DST_SEL(0)));
5772         if (vm_id < 8) {
5773                 radeon_ring_write(ring,
5774                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5775         } else {
5776                 radeon_ring_write(ring,
5777                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5778         }
5779         radeon_ring_write(ring, 0);
5780         radeon_ring_write(ring, pd_addr >> 12);
5781
5782         /* update SH_MEM_* regs */
5783         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5784         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5785                                  WRITE_DATA_DST_SEL(0)));
5786         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5787         radeon_ring_write(ring, 0);
5788         radeon_ring_write(ring, VMID(vm_id));
5789
5790         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5791         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5792                                  WRITE_DATA_DST_SEL(0)));
5793         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5794         radeon_ring_write(ring, 0);
5795
5796         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5797         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5798         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5799         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5800
5801         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5802         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5803                                  WRITE_DATA_DST_SEL(0)));
5804         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5805         radeon_ring_write(ring, 0);
5806         radeon_ring_write(ring, VMID(0));
5807
5808         /* HDP flush */
5809         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5810
5811         /* bits 0-15 are the VM contexts0-15 */
5812         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5813         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5814                                  WRITE_DATA_DST_SEL(0)));
5815         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5816         radeon_ring_write(ring, 0);
5817         radeon_ring_write(ring, 1 << vm_id);
5818
5819         /* wait for the invalidate to complete */
5820         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5821         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5822                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5823                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5824         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5825         radeon_ring_write(ring, 0);
5826         radeon_ring_write(ring, 0); /* ref */
5827         radeon_ring_write(ring, 0); /* mask */
5828         radeon_ring_write(ring, 0x20); /* poll interval */
5829
5830         /* compute doesn't have PFP */
5831         if (usepfp) {
5832                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5833                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5834                 radeon_ring_write(ring, 0x0);
5835         }
5836 }
5837
5838 /*
5839  * RLC
5840  * The RLC is a multi-purpose microengine that handles a
5841  * variety of functions, the most important of which is
5842  * the interrupt controller.
5843  */
5844 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5845                                           bool enable)
5846 {
5847         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5848
5849         if (enable)
5850                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5851         else
5852                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5853         WREG32(CP_INT_CNTL_RING0, tmp);
5854 }
5855
5856 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5857 {
5858         u32 tmp;
5859
5860         tmp = RREG32(RLC_LB_CNTL);
5861         if (enable)
5862                 tmp |= LOAD_BALANCE_ENABLE;
5863         else
5864                 tmp &= ~LOAD_BALANCE_ENABLE;
5865         WREG32(RLC_LB_CNTL, tmp);
5866 }
5867
5868 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5869 {
5870         u32 i, j, k;
5871         u32 mask;
5872
5873         mutex_lock(&rdev->grbm_idx_mutex);
5874         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5875                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5876                         cik_select_se_sh(rdev, i, j);
5877                         for (k = 0; k < rdev->usec_timeout; k++) {
5878                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5879                                         break;
5880                                 udelay(1);
5881                         }
5882                 }
5883         }
5884         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5885         mutex_unlock(&rdev->grbm_idx_mutex);
5886
5887         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5888         for (k = 0; k < rdev->usec_timeout; k++) {
5889                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5890                         break;
5891                 udelay(1);
5892         }
5893 }
5894
5895 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5896 {
5897         u32 tmp;
5898
5899         tmp = RREG32(RLC_CNTL);
5900         if (tmp != rlc)
5901                 WREG32(RLC_CNTL, rlc);
5902 }
5903
5904 static u32 cik_halt_rlc(struct radeon_device *rdev)
5905 {
5906         u32 data, orig;
5907
5908         orig = data = RREG32(RLC_CNTL);
5909
5910         if (data & RLC_ENABLE) {
5911                 u32 i;
5912
5913                 data &= ~RLC_ENABLE;
5914                 WREG32(RLC_CNTL, data);
5915
5916                 for (i = 0; i < rdev->usec_timeout; i++) {
5917                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5918                                 break;
5919                         udelay(1);
5920                 }
5921
5922                 cik_wait_for_rlc_serdes(rdev);
5923         }
5924
5925         return orig;
5926 }
5927
5928 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5929 {
5930         u32 tmp, i, mask;
5931
5932         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5933         WREG32(RLC_GPR_REG2, tmp);
5934
5935         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5936         for (i = 0; i < rdev->usec_timeout; i++) {
5937                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5938                         break;
5939                 udelay(1);
5940         }
5941
5942         for (i = 0; i < rdev->usec_timeout; i++) {
5943                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5944                         break;
5945                 udelay(1);
5946         }
5947 }
5948
5949 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5950 {
5951         u32 tmp;
5952
5953         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5954         WREG32(RLC_GPR_REG2, tmp);
5955 }
5956
5957 /**
5958  * cik_rlc_stop - stop the RLC ME
5959  *
5960  * @rdev: radeon_device pointer
5961  *
5962  * Halt the RLC ME (MicroEngine) (CIK).
5963  */
5964 static void cik_rlc_stop(struct radeon_device *rdev)
5965 {
5966         WREG32(RLC_CNTL, 0);
5967
5968         cik_enable_gui_idle_interrupt(rdev, false);
5969
5970         cik_wait_for_rlc_serdes(rdev);
5971 }
5972
5973 /**
5974  * cik_rlc_start - start the RLC ME
5975  *
5976  * @rdev: radeon_device pointer
5977  *
5978  * Unhalt the RLC ME (MicroEngine) (CIK).
5979  */
5980 static void cik_rlc_start(struct radeon_device *rdev)
5981 {
5982         WREG32(RLC_CNTL, RLC_ENABLE);
5983
5984         cik_enable_gui_idle_interrupt(rdev, true);
5985
5986         udelay(50);
5987 }
5988
5989 /**
5990  * cik_rlc_resume - setup the RLC hw
5991  *
5992  * @rdev: radeon_device pointer
5993  *
5994  * Initialize the RLC registers, load the ucode,
5995  * and start the RLC (CIK).
5996  * Returns 0 for success, -EINVAL if the ucode is not available.
5997  */
5998 static int cik_rlc_resume(struct radeon_device *rdev)
5999 {
6000         u32 i, size, tmp;
6001
6002         if (!rdev->rlc_fw)
6003                 return -EINVAL;
6004
6005         cik_rlc_stop(rdev);
6006
6007         /* disable CG */
6008         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6009         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6010
6011         si_rlc_reset(rdev);
6012
6013         cik_init_pg(rdev);
6014
6015         cik_init_cg(rdev);
6016
6017         WREG32(RLC_LB_CNTR_INIT, 0);
6018         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6019
6020         mutex_lock(&rdev->grbm_idx_mutex);
6021         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6022         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6023         WREG32(RLC_LB_PARAMS, 0x00600408);
6024         WREG32(RLC_LB_CNTL, 0x80000004);
6025         mutex_unlock(&rdev->grbm_idx_mutex);
6026
6027         WREG32(RLC_MC_CNTL, 0);
6028         WREG32(RLC_UCODE_CNTL, 0);
6029
6030         if (rdev->new_fw) {
6031                 const struct rlc_firmware_header_v1_0 *hdr =
6032                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6033                 const __le32 *fw_data = (const __le32 *)
6034                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6035
6036                 radeon_ucode_print_rlc_hdr(&hdr->header);
6037
6038                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6039                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6040                 for (i = 0; i < size; i++)
6041                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6042                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6043         } else {
6044                 const __be32 *fw_data;
6045
6046                 switch (rdev->family) {
6047                 case CHIP_BONAIRE:
6048                 case CHIP_HAWAII:
6049                 default:
6050                         size = BONAIRE_RLC_UCODE_SIZE;
6051                         break;
6052                 case CHIP_KAVERI:
6053                         size = KV_RLC_UCODE_SIZE;
6054                         break;
6055                 case CHIP_KABINI:
6056                         size = KB_RLC_UCODE_SIZE;
6057                         break;
6058                 case CHIP_MULLINS:
6059                         size = ML_RLC_UCODE_SIZE;
6060                         break;
6061                 }
6062
6063                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6064                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6065                 for (i = 0; i < size; i++)
6066                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6067                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6068         }
6069
6070         /* XXX - find out what chips support lbpw */
6071         cik_enable_lbpw(rdev, false);
6072
6073         if (rdev->family == CHIP_BONAIRE)
6074                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6075
6076         cik_rlc_start(rdev);
6077
6078         return 0;
6079 }
6080
6081 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6082 {
6083         u32 data, orig, tmp, tmp2;
6084
6085         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6086
6087         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6088                 cik_enable_gui_idle_interrupt(rdev, true);
6089
6090                 tmp = cik_halt_rlc(rdev);
6091
6092                 mutex_lock(&rdev->grbm_idx_mutex);
6093                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6094                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6095                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6096                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6097                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6098                 mutex_unlock(&rdev->grbm_idx_mutex);
6099
6100                 cik_update_rlc(rdev, tmp);
6101
6102                 data |= CGCG_EN | CGLS_EN;
6103         } else {
6104                 cik_enable_gui_idle_interrupt(rdev, false);
6105
6106                 RREG32(CB_CGTT_SCLK_CTRL);
6107                 RREG32(CB_CGTT_SCLK_CTRL);
6108                 RREG32(CB_CGTT_SCLK_CTRL);
6109                 RREG32(CB_CGTT_SCLK_CTRL);
6110
6111                 data &= ~(CGCG_EN | CGLS_EN);
6112         }
6113
6114         if (orig != data)
6115                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6116
6117 }
6118
6119 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6120 {
6121         u32 data, orig, tmp = 0;
6122
6123         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6124                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6125                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6126                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6127                                 data |= CP_MEM_LS_EN;
6128                                 if (orig != data)
6129                                         WREG32(CP_MEM_SLP_CNTL, data);
6130                         }
6131                 }
6132
6133                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6134                 data |= 0x00000001;
6135                 data &= 0xfffffffd;
6136                 if (orig != data)
6137                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6138
6139                 tmp = cik_halt_rlc(rdev);
6140
6141                 mutex_lock(&rdev->grbm_idx_mutex);
6142                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6143                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6144                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6145                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6146                 WREG32(RLC_SERDES_WR_CTRL, data);
6147                 mutex_unlock(&rdev->grbm_idx_mutex);
6148
6149                 cik_update_rlc(rdev, tmp);
6150
6151                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6152                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6153                         data &= ~SM_MODE_MASK;
6154                         data |= SM_MODE(0x2);
6155                         data |= SM_MODE_ENABLE;
6156                         data &= ~CGTS_OVERRIDE;
6157                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6158                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6159                                 data &= ~CGTS_LS_OVERRIDE;
6160                         data &= ~ON_MONITOR_ADD_MASK;
6161                         data |= ON_MONITOR_ADD_EN;
6162                         data |= ON_MONITOR_ADD(0x96);
6163                         if (orig != data)
6164                                 WREG32(CGTS_SM_CTRL_REG, data);
6165                 }
6166         } else {
6167                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6168                 data |= 0x00000003;
6169                 if (orig != data)
6170                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6171
6172                 data = RREG32(RLC_MEM_SLP_CNTL);
6173                 if (data & RLC_MEM_LS_EN) {
6174                         data &= ~RLC_MEM_LS_EN;
6175                         WREG32(RLC_MEM_SLP_CNTL, data);
6176                 }
6177
6178                 data = RREG32(CP_MEM_SLP_CNTL);
6179                 if (data & CP_MEM_LS_EN) {
6180                         data &= ~CP_MEM_LS_EN;
6181                         WREG32(CP_MEM_SLP_CNTL, data);
6182                 }
6183
6184                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6185                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6186                 if (orig != data)
6187                         WREG32(CGTS_SM_CTRL_REG, data);
6188
6189                 tmp = cik_halt_rlc(rdev);
6190
6191                 mutex_lock(&rdev->grbm_idx_mutex);
6192                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6193                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6194                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6195                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6196                 WREG32(RLC_SERDES_WR_CTRL, data);
6197                 mutex_unlock(&rdev->grbm_idx_mutex);
6198
6199                 cik_update_rlc(rdev, tmp);
6200         }
6201 }
6202
6203 static const u32 mc_cg_registers[] =
6204 {
6205         MC_HUB_MISC_HUB_CG,
6206         MC_HUB_MISC_SIP_CG,
6207         MC_HUB_MISC_VM_CG,
6208         MC_XPB_CLK_GAT,
6209         ATC_MISC_CG,
6210         MC_CITF_MISC_WR_CG,
6211         MC_CITF_MISC_RD_CG,
6212         MC_CITF_MISC_VM_CG,
6213         VM_L2_CG,
6214 };
6215
6216 static void cik_enable_mc_ls(struct radeon_device *rdev,
6217                              bool enable)
6218 {
6219         int i;
6220         u32 orig, data;
6221
6222         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6223                 orig = data = RREG32(mc_cg_registers[i]);
6224                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6225                         data |= MC_LS_ENABLE;
6226                 else
6227                         data &= ~MC_LS_ENABLE;
6228                 if (data != orig)
6229                         WREG32(mc_cg_registers[i], data);
6230         }
6231 }
6232
6233 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6234                                bool enable)
6235 {
6236         int i;
6237         u32 orig, data;
6238
6239         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6240                 orig = data = RREG32(mc_cg_registers[i]);
6241                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6242                         data |= MC_CG_ENABLE;
6243                 else
6244                         data &= ~MC_CG_ENABLE;
6245                 if (data != orig)
6246                         WREG32(mc_cg_registers[i], data);
6247         }
6248 }
6249
6250 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6251                                  bool enable)
6252 {
6253         u32 orig, data;
6254
6255         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6256                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6257                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6258         } else {
6259                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6260                 data |= 0xff000000;
6261                 if (data != orig)
6262                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6263
6264                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6265                 data |= 0xff000000;
6266                 if (data != orig)
6267                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6268         }
6269 }
6270
6271 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6272                                  bool enable)
6273 {
6274         u32 orig, data;
6275
6276         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6277                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6278                 data |= 0x100;
6279                 if (orig != data)
6280                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6281
6282                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6283                 data |= 0x100;
6284                 if (orig != data)
6285                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6286         } else {
6287                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6288                 data &= ~0x100;
6289                 if (orig != data)
6290                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6291
6292                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6293                 data &= ~0x100;
6294                 if (orig != data)
6295                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6296         }
6297 }
6298
6299 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6300                                 bool enable)
6301 {
6302         u32 orig, data;
6303
6304         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6305                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6306                 data = 0xfff;
6307                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6308
6309                 orig = data = RREG32(UVD_CGC_CTRL);
6310                 data |= DCM;
6311                 if (orig != data)
6312                         WREG32(UVD_CGC_CTRL, data);
6313         } else {
6314                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6315                 data &= ~0xfff;
6316                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6317
6318                 orig = data = RREG32(UVD_CGC_CTRL);
6319                 data &= ~DCM;
6320                 if (orig != data)
6321                         WREG32(UVD_CGC_CTRL, data);
6322         }
6323 }
6324
6325 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6326                                bool enable)
6327 {
6328         u32 orig, data;
6329
6330         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6331
6332         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6333                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6334                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6335         else
6336                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6337                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6338
6339         if (orig != data)
6340                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6341 }
6342
6343 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6344                                 bool enable)
6345 {
6346         u32 orig, data;
6347
6348         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6349
6350         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6351                 data &= ~CLOCK_GATING_DIS;
6352         else
6353                 data |= CLOCK_GATING_DIS;
6354
6355         if (orig != data)
6356                 WREG32(HDP_HOST_PATH_CNTL, data);
6357 }
6358
6359 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6360                               bool enable)
6361 {
6362         u32 orig, data;
6363
6364         orig = data = RREG32(HDP_MEM_POWER_LS);
6365
6366         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6367                 data |= HDP_LS_ENABLE;
6368         else
6369                 data &= ~HDP_LS_ENABLE;
6370
6371         if (orig != data)
6372                 WREG32(HDP_MEM_POWER_LS, data);
6373 }
6374
6375 void cik_update_cg(struct radeon_device *rdev,
6376                    u32 block, bool enable)
6377 {
6378
6379         if (block & RADEON_CG_BLOCK_GFX) {
6380                 cik_enable_gui_idle_interrupt(rdev, false);
6381                 /* order matters! */
6382                 if (enable) {
6383                         cik_enable_mgcg(rdev, true);
6384                         cik_enable_cgcg(rdev, true);
6385                 } else {
6386                         cik_enable_cgcg(rdev, false);
6387                         cik_enable_mgcg(rdev, false);
6388                 }
6389                 cik_enable_gui_idle_interrupt(rdev, true);
6390         }
6391
6392         if (block & RADEON_CG_BLOCK_MC) {
6393                 if (!(rdev->flags & RADEON_IS_IGP)) {
6394                         cik_enable_mc_mgcg(rdev, enable);
6395                         cik_enable_mc_ls(rdev, enable);
6396                 }
6397         }
6398
6399         if (block & RADEON_CG_BLOCK_SDMA) {
6400                 cik_enable_sdma_mgcg(rdev, enable);
6401                 cik_enable_sdma_mgls(rdev, enable);
6402         }
6403
6404         if (block & RADEON_CG_BLOCK_BIF) {
6405                 cik_enable_bif_mgls(rdev, enable);
6406         }
6407
6408         if (block & RADEON_CG_BLOCK_UVD) {
6409                 if (rdev->has_uvd)
6410                         cik_enable_uvd_mgcg(rdev, enable);
6411         }
6412
6413         if (block & RADEON_CG_BLOCK_HDP) {
6414                 cik_enable_hdp_mgcg(rdev, enable);
6415                 cik_enable_hdp_ls(rdev, enable);
6416         }
6417
6418         if (block & RADEON_CG_BLOCK_VCE) {
6419                 vce_v2_0_enable_mgcg(rdev, enable);
6420         }
6421 }
6422
6423 static void cik_init_cg(struct radeon_device *rdev)
6424 {
6425
6426         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6427
6428         if (rdev->has_uvd)
6429                 si_init_uvd_internal_cg(rdev);
6430
6431         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6432                              RADEON_CG_BLOCK_SDMA |
6433                              RADEON_CG_BLOCK_BIF |
6434                              RADEON_CG_BLOCK_UVD |
6435                              RADEON_CG_BLOCK_HDP), true);
6436 }
6437
6438 static void cik_fini_cg(struct radeon_device *rdev)
6439 {
6440         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6441                              RADEON_CG_BLOCK_SDMA |
6442                              RADEON_CG_BLOCK_BIF |
6443                              RADEON_CG_BLOCK_UVD |
6444                              RADEON_CG_BLOCK_HDP), false);
6445
6446         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6447 }
6448
6449 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6450                                           bool enable)
6451 {
6452         u32 data, orig;
6453
6454         orig = data = RREG32(RLC_PG_CNTL);
6455         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6456                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6457         else
6458                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6459         if (orig != data)
6460                 WREG32(RLC_PG_CNTL, data);
6461 }
6462
6463 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6464                                           bool enable)
6465 {
6466         u32 data, orig;
6467
6468         orig = data = RREG32(RLC_PG_CNTL);
6469         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6470                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6471         else
6472                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6473         if (orig != data)
6474                 WREG32(RLC_PG_CNTL, data);
6475 }
6476
6477 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6478 {
6479         u32 data, orig;
6480
6481         orig = data = RREG32(RLC_PG_CNTL);
6482         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6483                 data &= ~DISABLE_CP_PG;
6484         else
6485                 data |= DISABLE_CP_PG;
6486         if (orig != data)
6487                 WREG32(RLC_PG_CNTL, data);
6488 }
6489
6490 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6491 {
6492         u32 data, orig;
6493
6494         orig = data = RREG32(RLC_PG_CNTL);
6495         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6496                 data &= ~DISABLE_GDS_PG;
6497         else
6498                 data |= DISABLE_GDS_PG;
6499         if (orig != data)
6500                 WREG32(RLC_PG_CNTL, data);
6501 }
6502
6503 #define CP_ME_TABLE_SIZE    96
6504 #define CP_ME_TABLE_OFFSET  2048
6505 #define CP_MEC_TABLE_OFFSET 4096
6506
6507 void cik_init_cp_pg_table(struct radeon_device *rdev)
6508 {
6509         volatile u32 *dst_ptr;
6510         int me, i, max_me = 4;
6511         u32 bo_offset = 0;
6512         u32 table_offset, table_size;
6513
6514         if (rdev->family == CHIP_KAVERI)
6515                 max_me = 5;
6516
6517         if (rdev->rlc.cp_table_ptr == NULL)
6518                 return;
6519
6520         /* write the cp table buffer */
6521         dst_ptr = rdev->rlc.cp_table_ptr;
6522         for (me = 0; me < max_me; me++) {
6523                 if (rdev->new_fw) {
6524                         const __le32 *fw_data;
6525                         const struct gfx_firmware_header_v1_0 *hdr;
6526
6527                         if (me == 0) {
6528                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6529                                 fw_data = (const __le32 *)
6530                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6531                                 table_offset = le32_to_cpu(hdr->jt_offset);
6532                                 table_size = le32_to_cpu(hdr->jt_size);
6533                         } else if (me == 1) {
6534                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6535                                 fw_data = (const __le32 *)
6536                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6537                                 table_offset = le32_to_cpu(hdr->jt_offset);
6538                                 table_size = le32_to_cpu(hdr->jt_size);
6539                         } else if (me == 2) {
6540                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6541                                 fw_data = (const __le32 *)
6542                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6543                                 table_offset = le32_to_cpu(hdr->jt_offset);
6544                                 table_size = le32_to_cpu(hdr->jt_size);
6545                         } else if (me == 3) {
6546                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6547                                 fw_data = (const __le32 *)
6548                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549                                 table_offset = le32_to_cpu(hdr->jt_offset);
6550                                 table_size = le32_to_cpu(hdr->jt_size);
6551                         } else {
6552                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6553                                 fw_data = (const __le32 *)
6554                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6555                                 table_offset = le32_to_cpu(hdr->jt_offset);
6556                                 table_size = le32_to_cpu(hdr->jt_size);
6557                         }
6558
6559                         for (i = 0; i < table_size; i ++) {
6560                                 dst_ptr[bo_offset + i] =
6561                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6562                         }
6563                         bo_offset += table_size;
6564                 } else {
6565                         const __be32 *fw_data;
6566                         table_size = CP_ME_TABLE_SIZE;
6567
6568                         if (me == 0) {
6569                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6570                                 table_offset = CP_ME_TABLE_OFFSET;
6571                         } else if (me == 1) {
6572                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6573                                 table_offset = CP_ME_TABLE_OFFSET;
6574                         } else if (me == 2) {
6575                                 fw_data = (const __be32 *)rdev->me_fw->data;
6576                                 table_offset = CP_ME_TABLE_OFFSET;
6577                         } else {
6578                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6579                                 table_offset = CP_MEC_TABLE_OFFSET;
6580                         }
6581
6582                         for (i = 0; i < table_size; i ++) {
6583                                 dst_ptr[bo_offset + i] =
6584                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6585                         }
6586                         bo_offset += table_size;
6587                 }
6588         }
6589 }
6590
6591 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6592                                 bool enable)
6593 {
6594         u32 data, orig;
6595
6596         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6597                 orig = data = RREG32(RLC_PG_CNTL);
6598                 data |= GFX_PG_ENABLE;
6599                 if (orig != data)
6600                         WREG32(RLC_PG_CNTL, data);
6601
6602                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6603                 data |= AUTO_PG_EN;
6604                 if (orig != data)
6605                         WREG32(RLC_AUTO_PG_CTRL, data);
6606         } else {
6607                 orig = data = RREG32(RLC_PG_CNTL);
6608                 data &= ~GFX_PG_ENABLE;
6609                 if (orig != data)
6610                         WREG32(RLC_PG_CNTL, data);
6611
6612                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6613                 data &= ~AUTO_PG_EN;
6614                 if (orig != data)
6615                         WREG32(RLC_AUTO_PG_CTRL, data);
6616
6617                 data = RREG32(DB_RENDER_CONTROL);
6618         }
6619 }
6620
6621 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6622 {
6623         u32 mask = 0, tmp, tmp1;
6624         int i;
6625
6626         mutex_lock(&rdev->grbm_idx_mutex);
6627         cik_select_se_sh(rdev, se, sh);
6628         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6629         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6630         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6631         mutex_unlock(&rdev->grbm_idx_mutex);
6632
6633         tmp &= 0xffff0000;
6634
6635         tmp |= tmp1;
6636         tmp >>= 16;
6637
6638         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6639                 mask <<= 1;
6640                 mask |= 1;
6641         }
6642
6643         return (~tmp) & mask;
6644 }
6645
6646 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6647 {
6648         u32 i, j, k, active_cu_number = 0;
6649         u32 mask, counter, cu_bitmap;
6650         u32 tmp = 0;
6651
6652         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6653                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6654                         mask = 1;
6655                         cu_bitmap = 0;
6656                         counter = 0;
6657                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6658                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6659                                         if (counter < 2)
6660                                                 cu_bitmap |= mask;
6661                                         counter ++;
6662                                 }
6663                                 mask <<= 1;
6664                         }
6665
6666                         active_cu_number += counter;
6667                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6668                 }
6669         }
6670
6671         WREG32(RLC_PG_AO_CU_MASK, tmp);
6672
6673         tmp = RREG32(RLC_MAX_PG_CU);
6674         tmp &= ~MAX_PU_CU_MASK;
6675         tmp |= MAX_PU_CU(active_cu_number);
6676         WREG32(RLC_MAX_PG_CU, tmp);
6677 }
6678
6679 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6680                                        bool enable)
6681 {
6682         u32 data, orig;
6683
6684         orig = data = RREG32(RLC_PG_CNTL);
6685         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6686                 data |= STATIC_PER_CU_PG_ENABLE;
6687         else
6688                 data &= ~STATIC_PER_CU_PG_ENABLE;
6689         if (orig != data)
6690                 WREG32(RLC_PG_CNTL, data);
6691 }
6692
6693 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6694                                         bool enable)
6695 {
6696         u32 data, orig;
6697
6698         orig = data = RREG32(RLC_PG_CNTL);
6699         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6700                 data |= DYN_PER_CU_PG_ENABLE;
6701         else
6702                 data &= ~DYN_PER_CU_PG_ENABLE;
6703         if (orig != data)
6704                 WREG32(RLC_PG_CNTL, data);
6705 }
6706
6707 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6708 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6709
6710 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6711 {
6712         u32 data, orig;
6713         u32 i;
6714
6715         if (rdev->rlc.cs_data) {
6716                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6717                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6718                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6719                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6720         } else {
6721                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6722                 for (i = 0; i < 3; i++)
6723                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6724         }
6725         if (rdev->rlc.reg_list) {
6726                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6727                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6728                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6729         }
6730
6731         orig = data = RREG32(RLC_PG_CNTL);
6732         data |= GFX_PG_SRC;
6733         if (orig != data)
6734                 WREG32(RLC_PG_CNTL, data);
6735
6736         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6737         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6738
6739         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6740         data &= ~IDLE_POLL_COUNT_MASK;
6741         data |= IDLE_POLL_COUNT(0x60);
6742         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6743
6744         data = 0x10101010;
6745         WREG32(RLC_PG_DELAY, data);
6746
6747         data = RREG32(RLC_PG_DELAY_2);
6748         data &= ~0xff;
6749         data |= 0x3;
6750         WREG32(RLC_PG_DELAY_2, data);
6751
6752         data = RREG32(RLC_AUTO_PG_CTRL);
6753         data &= ~GRBM_REG_SGIT_MASK;
6754         data |= GRBM_REG_SGIT(0x700);
6755         WREG32(RLC_AUTO_PG_CTRL, data);
6756
6757 }
6758
6759 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6760 {
6761         cik_enable_gfx_cgpg(rdev, enable);
6762         cik_enable_gfx_static_mgpg(rdev, enable);
6763         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6764 }
6765
6766 u32 cik_get_csb_size(struct radeon_device *rdev)
6767 {
6768         u32 count = 0;
6769         const struct cs_section_def *sect = NULL;
6770         const struct cs_extent_def *ext = NULL;
6771
6772         if (rdev->rlc.cs_data == NULL)
6773                 return 0;
6774
6775         /* begin clear state */
6776         count += 2;
6777         /* context control state */
6778         count += 3;
6779
6780         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6781                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6782                         if (sect->id == SECT_CONTEXT)
6783                                 count += 2 + ext->reg_count;
6784                         else
6785                                 return 0;
6786                 }
6787         }
6788         /* pa_sc_raster_config/pa_sc_raster_config1 */
6789         count += 4;
6790         /* end clear state */
6791         count += 2;
6792         /* clear state */
6793         count += 2;
6794
6795         return count;
6796 }
6797
6798 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6799 {
6800         u32 count = 0, i;
6801         const struct cs_section_def *sect = NULL;
6802         const struct cs_extent_def *ext = NULL;
6803
6804         if (rdev->rlc.cs_data == NULL)
6805                 return;
6806         if (buffer == NULL)
6807                 return;
6808
6809         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6810         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6811
6812         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6813         buffer[count++] = cpu_to_le32(0x80000000);
6814         buffer[count++] = cpu_to_le32(0x80000000);
6815
6816         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6817                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6818                         if (sect->id == SECT_CONTEXT) {
6819                                 buffer[count++] =
6820                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6821                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6822                                 for (i = 0; i < ext->reg_count; i++)
6823                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6824                         } else {
6825                                 return;
6826                         }
6827                 }
6828         }
6829
6830         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6831         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6832         switch (rdev->family) {
6833         case CHIP_BONAIRE:
6834                 buffer[count++] = cpu_to_le32(0x16000012);
6835                 buffer[count++] = cpu_to_le32(0x00000000);
6836                 break;
6837         case CHIP_KAVERI:
6838                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6839                 buffer[count++] = cpu_to_le32(0x00000000);
6840                 break;
6841         case CHIP_KABINI:
6842         case CHIP_MULLINS:
6843                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6844                 buffer[count++] = cpu_to_le32(0x00000000);
6845                 break;
6846         case CHIP_HAWAII:
6847                 buffer[count++] = cpu_to_le32(0x3a00161a);
6848                 buffer[count++] = cpu_to_le32(0x0000002e);
6849                 break;
6850         default:
6851                 buffer[count++] = cpu_to_le32(0x00000000);
6852                 buffer[count++] = cpu_to_le32(0x00000000);
6853                 break;
6854         }
6855
6856         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6857         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6858
6859         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6860         buffer[count++] = cpu_to_le32(0);
6861 }
6862
6863 static void cik_init_pg(struct radeon_device *rdev)
6864 {
6865         if (rdev->pg_flags) {
6866                 cik_enable_sck_slowdown_on_pu(rdev, true);
6867                 cik_enable_sck_slowdown_on_pd(rdev, true);
6868                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6869                         cik_init_gfx_cgpg(rdev);
6870                         cik_enable_cp_pg(rdev, true);
6871                         cik_enable_gds_pg(rdev, true);
6872                 }
6873                 cik_init_ao_cu_mask(rdev);
6874                 cik_update_gfx_pg(rdev, true);
6875         }
6876 }
6877
6878 static void cik_fini_pg(struct radeon_device *rdev)
6879 {
6880         if (rdev->pg_flags) {
6881                 cik_update_gfx_pg(rdev, false);
6882                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6883                         cik_enable_cp_pg(rdev, false);
6884                         cik_enable_gds_pg(rdev, false);
6885                 }
6886         }
6887 }
6888
6889 /*
6890  * Interrupts
6891  * Starting with r6xx, interrupts are handled via a ring buffer.
6892  * Ring buffers are areas of GPU accessible memory that the GPU
6893  * writes interrupt vectors into and the host reads vectors out of.
6894  * There is a rptr (read pointer) that determines where the
6895  * host is currently reading, and a wptr (write pointer)
6896  * which determines where the GPU has written.  When the
6897  * pointers are equal, the ring is idle.  When the GPU
6898  * writes vectors to the ring buffer, it increments the
6899  * wptr.  When there is an interrupt, the host then starts
6900  * fetching commands and processing them until the pointers are
6901  * equal again at which point it updates the rptr.
6902  */
6903
6904 /**
6905  * cik_enable_interrupts - Enable the interrupt ring buffer
6906  *
6907  * @rdev: radeon_device pointer
6908  *
6909  * Enable the interrupt ring buffer (CIK).
6910  */
6911 static void cik_enable_interrupts(struct radeon_device *rdev)
6912 {
6913         u32 ih_cntl = RREG32(IH_CNTL);
6914         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6915
6916         ih_cntl |= ENABLE_INTR;
6917         ih_rb_cntl |= IH_RB_ENABLE;
6918         WREG32(IH_CNTL, ih_cntl);
6919         WREG32(IH_RB_CNTL, ih_rb_cntl);
6920         rdev->ih.enabled = true;
6921 }
6922
6923 /**
6924  * cik_disable_interrupts - Disable the interrupt ring buffer
6925  *
6926  * @rdev: radeon_device pointer
6927  *
6928  * Disable the interrupt ring buffer (CIK).
6929  */
6930 static void cik_disable_interrupts(struct radeon_device *rdev)
6931 {
6932         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6933         u32 ih_cntl = RREG32(IH_CNTL);
6934
6935         ih_rb_cntl &= ~IH_RB_ENABLE;
6936         ih_cntl &= ~ENABLE_INTR;
6937         WREG32(IH_RB_CNTL, ih_rb_cntl);
6938         WREG32(IH_CNTL, ih_cntl);
6939         /* set rptr, wptr to 0 */
6940         WREG32(IH_RB_RPTR, 0);
6941         WREG32(IH_RB_WPTR, 0);
6942         rdev->ih.enabled = false;
6943         rdev->ih.rptr = 0;
6944 }
6945
6946 /**
6947  * cik_disable_interrupt_state - Disable all interrupt sources
6948  *
6949  * @rdev: radeon_device pointer
6950  *
6951  * Clear all interrupt enable bits used by the driver (CIK).
6952  */
6953 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6954 {
6955         u32 tmp;
6956
6957         /* gfx ring */
6958         tmp = RREG32(CP_INT_CNTL_RING0) &
6959                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6960         WREG32(CP_INT_CNTL_RING0, tmp);
6961         /* sdma */
6962         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6963         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6964         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6965         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6966         /* compute queues */
6967         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6968         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6969         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6970         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6971         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6972         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6973         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6974         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6975         /* grbm */
6976         WREG32(GRBM_INT_CNTL, 0);
6977         /* SRBM */
6978         WREG32(SRBM_INT_CNTL, 0);
6979         /* vline/vblank, etc. */
6980         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6981         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6982         if (rdev->num_crtc >= 4) {
6983                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6984                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6985         }
6986         if (rdev->num_crtc >= 6) {
6987                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6988                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6989         }
6990         /* pflip */
6991         if (rdev->num_crtc >= 2) {
6992                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6993                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6994         }
6995         if (rdev->num_crtc >= 4) {
6996                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6997                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6998         }
6999         if (rdev->num_crtc >= 6) {
7000                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7001                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7002         }
7003
7004         /* dac hotplug */
7005         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7006
7007         /* digital hotplug */
7008         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7009         WREG32(DC_HPD1_INT_CONTROL, tmp);
7010         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7011         WREG32(DC_HPD2_INT_CONTROL, tmp);
7012         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7013         WREG32(DC_HPD3_INT_CONTROL, tmp);
7014         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7015         WREG32(DC_HPD4_INT_CONTROL, tmp);
7016         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7017         WREG32(DC_HPD5_INT_CONTROL, tmp);
7018         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7019         WREG32(DC_HPD6_INT_CONTROL, tmp);
7020
7021 }
7022
7023 /**
7024  * cik_irq_init - init and enable the interrupt ring
7025  *
7026  * @rdev: radeon_device pointer
7027  *
7028  * Allocate a ring buffer for the interrupt controller,
7029  * enable the RLC, disable interrupts, enable the IH
7030  * ring buffer and enable it (CIK).
7031  * Called at device load and reume.
7032  * Returns 0 for success, errors for failure.
7033  */
7034 static int cik_irq_init(struct radeon_device *rdev)
7035 {
7036         int ret = 0;
7037         int rb_bufsz;
7038         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7039
7040         /* allocate ring */
7041         ret = r600_ih_ring_alloc(rdev);
7042         if (ret)
7043                 return ret;
7044
7045         /* disable irqs */
7046         cik_disable_interrupts(rdev);
7047
7048         /* init rlc */
7049         ret = cik_rlc_resume(rdev);
7050         if (ret) {
7051                 r600_ih_ring_fini(rdev);
7052                 return ret;
7053         }
7054
7055         /* setup interrupt control */
7056         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7057         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7058         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7059         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7060          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7061          */
7062         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7063         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7064         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7065         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7066
7067         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7068         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7069
7070         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7071                       IH_WPTR_OVERFLOW_CLEAR |
7072                       (rb_bufsz << 1));
7073
7074         if (rdev->wb.enabled)
7075                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7076
7077         /* set the writeback address whether it's enabled or not */
7078         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7079         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7080
7081         WREG32(IH_RB_CNTL, ih_rb_cntl);
7082
7083         /* set rptr, wptr to 0 */
7084         WREG32(IH_RB_RPTR, 0);
7085         WREG32(IH_RB_WPTR, 0);
7086
7087         /* Default settings for IH_CNTL (disabled at first) */
7088         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7089         /* RPTR_REARM only works if msi's are enabled */
7090         if (rdev->msi_enabled)
7091                 ih_cntl |= RPTR_REARM;
7092         WREG32(IH_CNTL, ih_cntl);
7093
7094         /* force the active interrupt state to all disabled */
7095         cik_disable_interrupt_state(rdev);
7096
7097         pci_set_master(rdev->pdev);
7098
7099         /* enable irqs */
7100         cik_enable_interrupts(rdev);
7101
7102         return ret;
7103 }
7104
7105 /**
7106  * cik_irq_set - enable/disable interrupt sources
7107  *
7108  * @rdev: radeon_device pointer
7109  *
7110  * Enable interrupt sources on the GPU (vblanks, hpd,
7111  * etc.) (CIK).
7112  * Returns 0 for success, errors for failure.
7113  */
7114 int cik_irq_set(struct radeon_device *rdev)
7115 {
7116         u32 cp_int_cntl;
7117         u32 cp_m1p0;
7118         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7119         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7120         u32 grbm_int_cntl = 0;
7121         u32 dma_cntl, dma_cntl1;
7122
7123         if (!rdev->irq.installed) {
7124                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7125                 return -EINVAL;
7126         }
7127         /* don't enable anything if the ih is disabled */
7128         if (!rdev->ih.enabled) {
7129                 cik_disable_interrupts(rdev);
7130                 /* force the active interrupt state to all disabled */
7131                 cik_disable_interrupt_state(rdev);
7132                 return 0;
7133         }
7134
7135         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7136                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7137         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7138
7139         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7140         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7141         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7142         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7143         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7144         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7145
7146         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7147         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7148
7149         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7150
7151         /* enable CP interrupts on all rings */
7152         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7153                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7154                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7155         }
7156         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7157                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7158                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7159                 if (ring->me == 1) {
7160                         switch (ring->pipe) {
7161                         case 0:
7162                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7163                                 break;
7164                         default:
7165                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7166                                 break;
7167                         }
7168                 } else {
7169                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7170                 }
7171         }
7172         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7173                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7174                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7175                 if (ring->me == 1) {
7176                         switch (ring->pipe) {
7177                         case 0:
7178                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7179                                 break;
7180                         default:
7181                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7182                                 break;
7183                         }
7184                 } else {
7185                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7186                 }
7187         }
7188
7189         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7190                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7191                 dma_cntl |= TRAP_ENABLE;
7192         }
7193
7194         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7195                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7196                 dma_cntl1 |= TRAP_ENABLE;
7197         }
7198
7199         if (rdev->irq.crtc_vblank_int[0] ||
7200             atomic_read(&rdev->irq.pflip[0])) {
7201                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7202                 crtc1 |= VBLANK_INTERRUPT_MASK;
7203         }
7204         if (rdev->irq.crtc_vblank_int[1] ||
7205             atomic_read(&rdev->irq.pflip[1])) {
7206                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7207                 crtc2 |= VBLANK_INTERRUPT_MASK;
7208         }
7209         if (rdev->irq.crtc_vblank_int[2] ||
7210             atomic_read(&rdev->irq.pflip[2])) {
7211                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7212                 crtc3 |= VBLANK_INTERRUPT_MASK;
7213         }
7214         if (rdev->irq.crtc_vblank_int[3] ||
7215             atomic_read(&rdev->irq.pflip[3])) {
7216                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7217                 crtc4 |= VBLANK_INTERRUPT_MASK;
7218         }
7219         if (rdev->irq.crtc_vblank_int[4] ||
7220             atomic_read(&rdev->irq.pflip[4])) {
7221                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7222                 crtc5 |= VBLANK_INTERRUPT_MASK;
7223         }
7224         if (rdev->irq.crtc_vblank_int[5] ||
7225             atomic_read(&rdev->irq.pflip[5])) {
7226                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7227                 crtc6 |= VBLANK_INTERRUPT_MASK;
7228         }
7229         if (rdev->irq.hpd[0]) {
7230                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7231                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7232         }
7233         if (rdev->irq.hpd[1]) {
7234                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7235                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7236         }
7237         if (rdev->irq.hpd[2]) {
7238                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7239                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7240         }
7241         if (rdev->irq.hpd[3]) {
7242                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7243                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7244         }
7245         if (rdev->irq.hpd[4]) {
7246                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7247                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7248         }
7249         if (rdev->irq.hpd[5]) {
7250                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7251                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7252         }
7253
7254         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7255
7256         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7257         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7258
7259         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7260
7261         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7262
7263         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7264         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7265         if (rdev->num_crtc >= 4) {
7266                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7267                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7268         }
7269         if (rdev->num_crtc >= 6) {
7270                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7271                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7272         }
7273
7274         if (rdev->num_crtc >= 2) {
7275                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7276                        GRPH_PFLIP_INT_MASK);
7277                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7278                        GRPH_PFLIP_INT_MASK);
7279         }
7280         if (rdev->num_crtc >= 4) {
7281                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7282                        GRPH_PFLIP_INT_MASK);
7283                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7284                        GRPH_PFLIP_INT_MASK);
7285         }
7286         if (rdev->num_crtc >= 6) {
7287                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7288                        GRPH_PFLIP_INT_MASK);
7289                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7290                        GRPH_PFLIP_INT_MASK);
7291         }
7292
7293         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7294         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7295         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7296         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7297         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7298         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7299
7300         /* posting read */
7301         RREG32(SRBM_STATUS);
7302
7303         return 0;
7304 }
7305
7306 /**
7307  * cik_irq_ack - ack interrupt sources
7308  *
7309  * @rdev: radeon_device pointer
7310  *
7311  * Ack interrupt sources on the GPU (vblanks, hpd,
7312  * etc.) (CIK).  Certain interrupts sources are sw
7313  * generated and do not require an explicit ack.
7314  */
7315 static inline void cik_irq_ack(struct radeon_device *rdev)
7316 {
7317         u32 tmp;
7318
7319         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7320         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7321         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7322         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7323         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7324         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7325         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7326
7327         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7328                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7329         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7330                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7331         if (rdev->num_crtc >= 4) {
7332                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7333                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7334                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7335                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7336         }
7337         if (rdev->num_crtc >= 6) {
7338                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7339                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7340                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7341                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7342         }
7343
7344         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7345                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7346                        GRPH_PFLIP_INT_CLEAR);
7347         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7348                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7349                        GRPH_PFLIP_INT_CLEAR);
7350         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7351                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7352         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7353                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7354         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7355                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7356         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7357                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7358
7359         if (rdev->num_crtc >= 4) {
7360                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7361                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7362                                GRPH_PFLIP_INT_CLEAR);
7363                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7364                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7365                                GRPH_PFLIP_INT_CLEAR);
7366                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7367                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7368                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7369                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7370                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7371                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7372                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7373                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7374         }
7375
7376         if (rdev->num_crtc >= 6) {
7377                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7378                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7379                                GRPH_PFLIP_INT_CLEAR);
7380                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7381                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7382                                GRPH_PFLIP_INT_CLEAR);
7383                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7384                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7385                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7386                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7387                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7388                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7389                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7390                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7391         }
7392
7393         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7394                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7395                 tmp |= DC_HPDx_INT_ACK;
7396                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7397         }
7398         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7399                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7400                 tmp |= DC_HPDx_INT_ACK;
7401                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7402         }
7403         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7404                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7405                 tmp |= DC_HPDx_INT_ACK;
7406                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7407         }
7408         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7409                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7410                 tmp |= DC_HPDx_INT_ACK;
7411                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7412         }
7413         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7414                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7415                 tmp |= DC_HPDx_INT_ACK;
7416                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7417         }
7418         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7419                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7420                 tmp |= DC_HPDx_INT_ACK;
7421                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7422         }
7423         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7424                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7425                 tmp |= DC_HPDx_RX_INT_ACK;
7426                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7427         }
7428         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7429                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7430                 tmp |= DC_HPDx_RX_INT_ACK;
7431                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7432         }
7433         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7434                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7435                 tmp |= DC_HPDx_RX_INT_ACK;
7436                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7437         }
7438         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7439                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7440                 tmp |= DC_HPDx_RX_INT_ACK;
7441                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7442         }
7443         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7444                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7445                 tmp |= DC_HPDx_RX_INT_ACK;
7446                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7447         }
7448         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7449                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7450                 tmp |= DC_HPDx_RX_INT_ACK;
7451                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7452         }
7453 }
7454
7455 /**
7456  * cik_irq_disable - disable interrupts
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Disable interrupts on the hw (CIK).
7461  */
7462 static void cik_irq_disable(struct radeon_device *rdev)
7463 {
7464         cik_disable_interrupts(rdev);
7465         /* Wait and acknowledge irq */
7466         mdelay(1);
7467         cik_irq_ack(rdev);
7468         cik_disable_interrupt_state(rdev);
7469 }
7470
7471 /**
7472  * cik_irq_disable - disable interrupts for suspend
7473  *
7474  * @rdev: radeon_device pointer
7475  *
7476  * Disable interrupts and stop the RLC (CIK).
7477  * Used for suspend.
7478  */
7479 static void cik_irq_suspend(struct radeon_device *rdev)
7480 {
7481         cik_irq_disable(rdev);
7482         cik_rlc_stop(rdev);
7483 }
7484
7485 /**
7486  * cik_irq_fini - tear down interrupt support
7487  *
7488  * @rdev: radeon_device pointer
7489  *
7490  * Disable interrupts on the hw and free the IH ring
7491  * buffer (CIK).
7492  * Used for driver unload.
7493  */
7494 static void cik_irq_fini(struct radeon_device *rdev)
7495 {
7496         cik_irq_suspend(rdev);
7497         r600_ih_ring_fini(rdev);
7498 }
7499
7500 /**
7501  * cik_get_ih_wptr - get the IH ring buffer wptr
7502  *
7503  * @rdev: radeon_device pointer
7504  *
7505  * Get the IH ring buffer wptr from either the register
7506  * or the writeback memory buffer (CIK).  Also check for
7507  * ring buffer overflow and deal with it.
7508  * Used by cik_irq_process().
7509  * Returns the value of the wptr.
7510  */
7511 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7512 {
7513         u32 wptr, tmp;
7514
7515         if (rdev->wb.enabled)
7516                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7517         else
7518                 wptr = RREG32(IH_RB_WPTR);
7519
7520         if (wptr & RB_OVERFLOW) {
7521                 wptr &= ~RB_OVERFLOW;
7522                 /* When a ring buffer overflow happen start parsing interrupt
7523                  * from the last not overwritten vector (wptr + 16). Hopefully
7524                  * this should allow us to catchup.
7525                  */
7526                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7527                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7528                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7529                 tmp = RREG32(IH_RB_CNTL);
7530                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7531                 WREG32(IH_RB_CNTL, tmp);
7532         }
7533         return (wptr & rdev->ih.ptr_mask);
7534 }
7535
7536 /*        CIK IV Ring
7537  * Each IV ring entry is 128 bits:
7538  * [7:0]    - interrupt source id
7539  * [31:8]   - reserved
7540  * [59:32]  - interrupt source data
7541  * [63:60]  - reserved
7542  * [71:64]  - RINGID
7543  *            CP:
7544  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7545  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7546  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7547  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7548  *            PIPE_ID - ME0 0=3D
7549  *                    - ME1&2 compute dispatcher (4 pipes each)
7550  *            SDMA:
7551  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7552  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7553  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7554  * [79:72]  - VMID
7555  * [95:80]  - PASID
7556  * [127:96] - reserved
7557  */
7558 /**
7559  * cik_irq_process - interrupt handler
7560  *
7561  * @rdev: radeon_device pointer
7562  *
7563  * Interrupt hander (CIK).  Walk the IH ring,
7564  * ack interrupts and schedule work to handle
7565  * interrupt events.
7566  * Returns irq process return code.
7567  */
7568 int cik_irq_process(struct radeon_device *rdev)
7569 {
7570         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7571         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7572         u32 wptr;
7573         u32 rptr;
7574         u32 src_id, src_data, ring_id;
7575         u8 me_id, pipe_id, queue_id;
7576         u32 ring_index;
7577         bool queue_hotplug = false;
7578         bool queue_dp = false;
7579         bool queue_reset = false;
7580         u32 addr, status, mc_client;
7581         bool queue_thermal = false;
7582
7583         if (!rdev->ih.enabled || rdev->shutdown)
7584                 return IRQ_NONE;
7585
7586         wptr = cik_get_ih_wptr(rdev);
7587
7588 restart_ih:
7589         /* is somebody else already processing irqs? */
7590         if (atomic_xchg(&rdev->ih.lock, 1))
7591                 return IRQ_NONE;
7592
7593         rptr = rdev->ih.rptr;
7594         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7595
7596         /* Order reading of wptr vs. reading of IH ring data */
7597         rmb();
7598
7599         /* display interrupts */
7600         cik_irq_ack(rdev);
7601
7602         while (rptr != wptr) {
7603                 /* wptr/rptr are in bytes! */
7604                 ring_index = rptr / 4;
7605
7606                 radeon_kfd_interrupt(rdev,
7607                                 (const void *) &rdev->ih.ring[ring_index]);
7608
7609                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7610                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7611                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7612
7613                 switch (src_id) {
7614                 case 1: /* D1 vblank/vline */
7615                         switch (src_data) {
7616                         case 0: /* D1 vblank */
7617                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7618                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7619
7620                                 if (rdev->irq.crtc_vblank_int[0]) {
7621                                         drm_handle_vblank(rdev->ddev, 0);
7622                                         rdev->pm.vblank_sync = true;
7623                                         wake_up(&rdev->irq.vblank_queue);
7624                                 }
7625                                 if (atomic_read(&rdev->irq.pflip[0]))
7626                                         radeon_crtc_handle_vblank(rdev, 0);
7627                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7628                                 DRM_DEBUG("IH: D1 vblank\n");
7629
7630                                 break;
7631                         case 1: /* D1 vline */
7632                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7633                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7634
7635                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7636                                 DRM_DEBUG("IH: D1 vline\n");
7637
7638                                 break;
7639                         default:
7640                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7641                                 break;
7642                         }
7643                         break;
7644                 case 2: /* D2 vblank/vline */
7645                         switch (src_data) {
7646                         case 0: /* D2 vblank */
7647                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7648                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7649
7650                                 if (rdev->irq.crtc_vblank_int[1]) {
7651                                         drm_handle_vblank(rdev->ddev, 1);
7652                                         rdev->pm.vblank_sync = true;
7653                                         wake_up(&rdev->irq.vblank_queue);
7654                                 }
7655                                 if (atomic_read(&rdev->irq.pflip[1]))
7656                                         radeon_crtc_handle_vblank(rdev, 1);
7657                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7658                                 DRM_DEBUG("IH: D2 vblank\n");
7659
7660                                 break;
7661                         case 1: /* D2 vline */
7662                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7663                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7664
7665                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7666                                 DRM_DEBUG("IH: D2 vline\n");
7667
7668                                 break;
7669                         default:
7670                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7671                                 break;
7672                         }
7673                         break;
7674                 case 3: /* D3 vblank/vline */
7675                         switch (src_data) {
7676                         case 0: /* D3 vblank */
7677                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7678                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7679
7680                                 if (rdev->irq.crtc_vblank_int[2]) {
7681                                         drm_handle_vblank(rdev->ddev, 2);
7682                                         rdev->pm.vblank_sync = true;
7683                                         wake_up(&rdev->irq.vblank_queue);
7684                                 }
7685                                 if (atomic_read(&rdev->irq.pflip[2]))
7686                                         radeon_crtc_handle_vblank(rdev, 2);
7687                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7688                                 DRM_DEBUG("IH: D3 vblank\n");
7689
7690                                 break;
7691                         case 1: /* D3 vline */
7692                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7693                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7694
7695                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7696                                 DRM_DEBUG("IH: D3 vline\n");
7697
7698                                 break;
7699                         default:
7700                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7701                                 break;
7702                         }
7703                         break;
7704                 case 4: /* D4 vblank/vline */
7705                         switch (src_data) {
7706                         case 0: /* D4 vblank */
7707                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7708                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7709
7710                                 if (rdev->irq.crtc_vblank_int[3]) {
7711                                         drm_handle_vblank(rdev->ddev, 3);
7712                                         rdev->pm.vblank_sync = true;
7713                                         wake_up(&rdev->irq.vblank_queue);
7714                                 }
7715                                 if (atomic_read(&rdev->irq.pflip[3]))
7716                                         radeon_crtc_handle_vblank(rdev, 3);
7717                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7718                                 DRM_DEBUG("IH: D4 vblank\n");
7719
7720                                 break;
7721                         case 1: /* D4 vline */
7722                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7723                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7724
7725                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7726                                 DRM_DEBUG("IH: D4 vline\n");
7727
7728                                 break;
7729                         default:
7730                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7731                                 break;
7732                         }
7733                         break;
7734                 case 5: /* D5 vblank/vline */
7735                         switch (src_data) {
7736                         case 0: /* D5 vblank */
7737                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7738                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7739
7740                                 if (rdev->irq.crtc_vblank_int[4]) {
7741                                         drm_handle_vblank(rdev->ddev, 4);
7742                                         rdev->pm.vblank_sync = true;
7743                                         wake_up(&rdev->irq.vblank_queue);
7744                                 }
7745                                 if (atomic_read(&rdev->irq.pflip[4]))
7746                                         radeon_crtc_handle_vblank(rdev, 4);
7747                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7748                                 DRM_DEBUG("IH: D5 vblank\n");
7749
7750                                 break;
7751                         case 1: /* D5 vline */
7752                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7753                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7754
7755                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7756                                 DRM_DEBUG("IH: D5 vline\n");
7757
7758                                 break;
7759                         default:
7760                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7761                                 break;
7762                         }
7763                         break;
7764                 case 6: /* D6 vblank/vline */
7765                         switch (src_data) {
7766                         case 0: /* D6 vblank */
7767                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7768                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7769
7770                                 if (rdev->irq.crtc_vblank_int[5]) {
7771                                         drm_handle_vblank(rdev->ddev, 5);
7772                                         rdev->pm.vblank_sync = true;
7773                                         wake_up(&rdev->irq.vblank_queue);
7774                                 }
7775                                 if (atomic_read(&rdev->irq.pflip[5]))
7776                                         radeon_crtc_handle_vblank(rdev, 5);
7777                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7778                                 DRM_DEBUG("IH: D6 vblank\n");
7779
7780                                 break;
7781                         case 1: /* D6 vline */
7782                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7783                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7784
7785                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7786                                 DRM_DEBUG("IH: D6 vline\n");
7787
7788                                 break;
7789                         default:
7790                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7791                                 break;
7792                         }
7793                         break;
7794                 case 8: /* D1 page flip */
7795                 case 10: /* D2 page flip */
7796                 case 12: /* D3 page flip */
7797                 case 14: /* D4 page flip */
7798                 case 16: /* D5 page flip */
7799                 case 18: /* D6 page flip */
7800                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7801                         if (radeon_use_pflipirq > 0)
7802                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7803                         break;
7804                 case 42: /* HPD hotplug */
7805                         switch (src_data) {
7806                         case 0:
7807                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7808                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809
7810                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7811                                 queue_hotplug = true;
7812                                 DRM_DEBUG("IH: HPD1\n");
7813
7814                                 break;
7815                         case 1:
7816                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7817                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818
7819                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7820                                 queue_hotplug = true;
7821                                 DRM_DEBUG("IH: HPD2\n");
7822
7823                                 break;
7824                         case 2:
7825                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7826                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827
7828                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7829                                 queue_hotplug = true;
7830                                 DRM_DEBUG("IH: HPD3\n");
7831
7832                                 break;
7833                         case 3:
7834                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7835                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836
7837                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7838                                 queue_hotplug = true;
7839                                 DRM_DEBUG("IH: HPD4\n");
7840
7841                                 break;
7842                         case 4:
7843                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7844                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845
7846                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7847                                 queue_hotplug = true;
7848                                 DRM_DEBUG("IH: HPD5\n");
7849
7850                                 break;
7851                         case 5:
7852                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7853                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854
7855                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7856                                 queue_hotplug = true;
7857                                 DRM_DEBUG("IH: HPD6\n");
7858
7859                                 break;
7860                         case 6:
7861                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7862                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863
7864                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7865                                 queue_dp = true;
7866                                 DRM_DEBUG("IH: HPD_RX 1\n");
7867
7868                                 break;
7869                         case 7:
7870                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7871                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872
7873                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7874                                 queue_dp = true;
7875                                 DRM_DEBUG("IH: HPD_RX 2\n");
7876
7877                                 break;
7878                         case 8:
7879                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7880                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881
7882                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7883                                 queue_dp = true;
7884                                 DRM_DEBUG("IH: HPD_RX 3\n");
7885
7886                                 break;
7887                         case 9:
7888                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7889                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890
7891                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7892                                 queue_dp = true;
7893                                 DRM_DEBUG("IH: HPD_RX 4\n");
7894
7895                                 break;
7896                         case 10:
7897                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7898                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7899
7900                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7901                                 queue_dp = true;
7902                                 DRM_DEBUG("IH: HPD_RX 5\n");
7903
7904                                 break;
7905                         case 11:
7906                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7907                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7908
7909                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7910                                 queue_dp = true;
7911                                 DRM_DEBUG("IH: HPD_RX 6\n");
7912
7913                                 break;
7914                         default:
7915                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7916                                 break;
7917                         }
7918                         break;
7919                 case 96:
7920                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7921                         WREG32(SRBM_INT_ACK, 0x1);
7922                         break;
7923                 case 124: /* UVD */
7924                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7925                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7926                         break;
7927                 case 146:
7928                 case 147:
7929                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7930                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7931                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7932                         /* reset addr and status */
7933                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7934                         if (addr == 0x0 && status == 0x0)
7935                                 break;
7936                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7937                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7938                                 addr);
7939                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7940                                 status);
7941                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7942                         break;
7943                 case 167: /* VCE */
7944                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7945                         switch (src_data) {
7946                         case 0:
7947                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7948                                 break;
7949                         case 1:
7950                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7951                                 break;
7952                         default:
7953                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7954                                 break;
7955                         }
7956                         break;
7957                 case 176: /* GFX RB CP_INT */
7958                 case 177: /* GFX IB CP_INT */
7959                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7960                         break;
7961                 case 181: /* CP EOP event */
7962                         DRM_DEBUG("IH: CP EOP\n");
7963                         /* XXX check the bitfield order! */
7964                         me_id = (ring_id & 0x60) >> 5;
7965                         pipe_id = (ring_id & 0x18) >> 3;
7966                         queue_id = (ring_id & 0x7) >> 0;
7967                         switch (me_id) {
7968                         case 0:
7969                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7970                                 break;
7971                         case 1:
7972                         case 2:
7973                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7974                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7975                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7976                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7977                                 break;
7978                         }
7979                         break;
7980                 case 184: /* CP Privileged reg access */
7981                         DRM_ERROR("Illegal register access in command stream\n");
7982                         /* XXX check the bitfield order! */
7983                         me_id = (ring_id & 0x60) >> 5;
7984                         pipe_id = (ring_id & 0x18) >> 3;
7985                         queue_id = (ring_id & 0x7) >> 0;
7986                         switch (me_id) {
7987                         case 0:
7988                                 /* This results in a full GPU reset, but all we need to do is soft
7989                                  * reset the CP for gfx
7990                                  */
7991                                 queue_reset = true;
7992                                 break;
7993                         case 1:
7994                                 /* XXX compute */
7995                                 queue_reset = true;
7996                                 break;
7997                         case 2:
7998                                 /* XXX compute */
7999                                 queue_reset = true;
8000                                 break;
8001                         }
8002                         break;
8003                 case 185: /* CP Privileged inst */
8004                         DRM_ERROR("Illegal instruction in command stream\n");
8005                         /* XXX check the bitfield order! */
8006                         me_id = (ring_id & 0x60) >> 5;
8007                         pipe_id = (ring_id & 0x18) >> 3;
8008                         queue_id = (ring_id & 0x7) >> 0;
8009                         switch (me_id) {
8010                         case 0:
8011                                 /* This results in a full GPU reset, but all we need to do is soft
8012                                  * reset the CP for gfx
8013                                  */
8014                                 queue_reset = true;
8015                                 break;
8016                         case 1:
8017                                 /* XXX compute */
8018                                 queue_reset = true;
8019                                 break;
8020                         case 2:
8021                                 /* XXX compute */
8022                                 queue_reset = true;
8023                                 break;
8024                         }
8025                         break;
8026                 case 224: /* SDMA trap event */
8027                         /* XXX check the bitfield order! */
8028                         me_id = (ring_id & 0x3) >> 0;
8029                         queue_id = (ring_id & 0xc) >> 2;
8030                         DRM_DEBUG("IH: SDMA trap\n");
8031                         switch (me_id) {
8032                         case 0:
8033                                 switch (queue_id) {
8034                                 case 0:
8035                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8036                                         break;
8037                                 case 1:
8038                                         /* XXX compute */
8039                                         break;
8040                                 case 2:
8041                                         /* XXX compute */
8042                                         break;
8043                                 }
8044                                 break;
8045                         case 1:
8046                                 switch (queue_id) {
8047                                 case 0:
8048                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8049                                         break;
8050                                 case 1:
8051                                         /* XXX compute */
8052                                         break;
8053                                 case 2:
8054                                         /* XXX compute */
8055                                         break;
8056                                 }
8057                                 break;
8058                         }
8059                         break;
8060                 case 230: /* thermal low to high */
8061                         DRM_DEBUG("IH: thermal low to high\n");
8062                         rdev->pm.dpm.thermal.high_to_low = false;
8063                         queue_thermal = true;
8064                         break;
8065                 case 231: /* thermal high to low */
8066                         DRM_DEBUG("IH: thermal high to low\n");
8067                         rdev->pm.dpm.thermal.high_to_low = true;
8068                         queue_thermal = true;
8069                         break;
8070                 case 233: /* GUI IDLE */
8071                         DRM_DEBUG("IH: GUI idle\n");
8072                         break;
8073                 case 241: /* SDMA Privileged inst */
8074                 case 247: /* SDMA Privileged inst */
8075                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8076                         /* XXX check the bitfield order! */
8077                         me_id = (ring_id & 0x3) >> 0;
8078                         queue_id = (ring_id & 0xc) >> 2;
8079                         switch (me_id) {
8080                         case 0:
8081                                 switch (queue_id) {
8082                                 case 0:
8083                                         queue_reset = true;
8084                                         break;
8085                                 case 1:
8086                                         /* XXX compute */
8087                                         queue_reset = true;
8088                                         break;
8089                                 case 2:
8090                                         /* XXX compute */
8091                                         queue_reset = true;
8092                                         break;
8093                                 }
8094                                 break;
8095                         case 1:
8096                                 switch (queue_id) {
8097                                 case 0:
8098                                         queue_reset = true;
8099                                         break;
8100                                 case 1:
8101                                         /* XXX compute */
8102                                         queue_reset = true;
8103                                         break;
8104                                 case 2:
8105                                         /* XXX compute */
8106                                         queue_reset = true;
8107                                         break;
8108                                 }
8109                                 break;
8110                         }
8111                         break;
8112                 default:
8113                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8114                         break;
8115                 }
8116
8117                 /* wptr/rptr are in bytes! */
8118                 rptr += 16;
8119                 rptr &= rdev->ih.ptr_mask;
8120                 WREG32(IH_RB_RPTR, rptr);
8121         }
8122         if (queue_dp)
8123                 schedule_work(&rdev->dp_work);
8124         if (queue_hotplug)
8125                 schedule_delayed_work(&rdev->hotplug_work, 0);
8126         if (queue_reset) {
8127                 rdev->needs_reset = true;
8128                 wake_up_all(&rdev->fence_queue);
8129         }
8130         if (queue_thermal)
8131                 schedule_work(&rdev->pm.dpm.thermal.work);
8132         rdev->ih.rptr = rptr;
8133         atomic_set(&rdev->ih.lock, 0);
8134
8135         /* make sure wptr hasn't changed while processing */
8136         wptr = cik_get_ih_wptr(rdev);
8137         if (wptr != rptr)
8138                 goto restart_ih;
8139
8140         return IRQ_HANDLED;
8141 }
8142
8143 /*
8144  * startup/shutdown callbacks
8145  */
8146 static void cik_uvd_init(struct radeon_device *rdev)
8147 {
8148         int r;
8149
8150         if (!rdev->has_uvd)
8151                 return;
8152
8153         r = radeon_uvd_init(rdev);
8154         if (r) {
8155                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8156                 /*
8157                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8158                  * to early fails cik_uvd_start() and thus nothing happens
8159                  * there. So it is pointless to try to go through that code
8160                  * hence why we disable uvd here.
8161                  */
8162                 rdev->has_uvd = 0;
8163                 return;
8164         }
8165         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8166         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8167 }
8168
8169 static void cik_uvd_start(struct radeon_device *rdev)
8170 {
8171         int r;
8172
8173         if (!rdev->has_uvd)
8174                 return;
8175
8176         r = radeon_uvd_resume(rdev);
8177         if (r) {
8178                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8179                 goto error;
8180         }
8181         r = uvd_v4_2_resume(rdev);
8182         if (r) {
8183                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8184                 goto error;
8185         }
8186         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8187         if (r) {
8188                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8189                 goto error;
8190         }
8191         return;
8192
8193 error:
8194         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8195 }
8196
8197 static void cik_uvd_resume(struct radeon_device *rdev)
8198 {
8199         struct radeon_ring *ring;
8200         int r;
8201
8202         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8203                 return;
8204
8205         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8206         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, RADEON_CP_PACKET2);
8207         if (r) {
8208                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8209                 return;
8210         }
8211         r = uvd_v1_0_init(rdev);
8212         if (r) {
8213                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8214                 return;
8215         }
8216 }
8217
8218 static void cik_vce_init(struct radeon_device *rdev)
8219 {
8220         int r;
8221
8222         if (!rdev->has_vce)
8223                 return;
8224
8225         r = radeon_vce_init(rdev);
8226         if (r) {
8227                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8228                 /*
8229                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8230                  * to early fails cik_vce_start() and thus nothing happens
8231                  * there. So it is pointless to try to go through that code
8232                  * hence why we disable vce here.
8233                  */
8234                 rdev->has_vce = 0;
8235                 return;
8236         }
8237         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8238         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8239         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8240         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8241 }
8242
8243 static void cik_vce_start(struct radeon_device *rdev)
8244 {
8245         int r;
8246
8247         if (!rdev->has_vce)
8248                 return;
8249
8250         r = radeon_vce_resume(rdev);
8251         if (r) {
8252                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8253                 goto error;
8254         }
8255         r = vce_v2_0_resume(rdev);
8256         if (r) {
8257                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8258                 goto error;
8259         }
8260         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8261         if (r) {
8262                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8263                 goto error;
8264         }
8265         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8266         if (r) {
8267                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8268                 goto error;
8269         }
8270         return;
8271
8272 error:
8273         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8274         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8275 }
8276
8277 static void cik_vce_resume(struct radeon_device *rdev)
8278 {
8279         struct radeon_ring *ring;
8280         int r;
8281
8282         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8283                 return;
8284
8285         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8286         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8287         if (r) {
8288                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8289                 return;
8290         }
8291         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8292         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8293         if (r) {
8294                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8295                 return;
8296         }
8297         r = vce_v1_0_init(rdev);
8298         if (r) {
8299                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8300                 return;
8301         }
8302 }
8303
8304 /**
8305  * cik_startup - program the asic to a functional state
8306  *
8307  * @rdev: radeon_device pointer
8308  *
8309  * Programs the asic to a functional state (CIK).
8310  * Called by cik_init() and cik_resume().
8311  * Returns 0 for success, error for failure.
8312  */
8313 static int cik_startup(struct radeon_device *rdev)
8314 {
8315         struct radeon_ring *ring;
8316         u32 nop;
8317         int r;
8318
8319         /* enable pcie gen2/3 link */
8320         cik_pcie_gen3_enable(rdev);
8321         /* enable aspm */
8322         cik_program_aspm(rdev);
8323
8324         /* scratch needs to be initialized before MC */
8325         r = r600_vram_scratch_init(rdev);
8326         if (r)
8327                 return r;
8328
8329         cik_mc_program(rdev);
8330
8331         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8332                 r = ci_mc_load_microcode(rdev);
8333                 if (r) {
8334                         DRM_ERROR("Failed to load MC firmware!\n");
8335                         return r;
8336                 }
8337         }
8338
8339         r = cik_pcie_gart_enable(rdev);
8340         if (r)
8341                 return r;
8342         cik_gpu_init(rdev);
8343
8344         /* allocate rlc buffers */
8345         if (rdev->flags & RADEON_IS_IGP) {
8346                 if (rdev->family == CHIP_KAVERI) {
8347                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8348                         rdev->rlc.reg_list_size =
8349                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8350                 } else {
8351                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8352                         rdev->rlc.reg_list_size =
8353                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8354                 }
8355         }
8356         rdev->rlc.cs_data = ci_cs_data;
8357         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8358         r = sumo_rlc_init(rdev);
8359         if (r) {
8360                 DRM_ERROR("Failed to init rlc BOs!\n");
8361                 return r;
8362         }
8363
8364         /* allocate wb buffer */
8365         r = radeon_wb_init(rdev);
8366         if (r)
8367                 return r;
8368
8369         /* allocate mec buffers */
8370         r = cik_mec_init(rdev);
8371         if (r) {
8372                 DRM_ERROR("Failed to init MEC BOs!\n");
8373                 return r;
8374         }
8375
8376         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8377         if (r) {
8378                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8379                 return r;
8380         }
8381
8382         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8383         if (r) {
8384                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8385                 return r;
8386         }
8387
8388         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8389         if (r) {
8390                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8391                 return r;
8392         }
8393
8394         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8395         if (r) {
8396                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8397                 return r;
8398         }
8399
8400         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8401         if (r) {
8402                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8403                 return r;
8404         }
8405
8406         cik_uvd_start(rdev);
8407         cik_vce_start(rdev);
8408
8409         /* Enable IRQ */
8410         if (!rdev->irq.installed) {
8411                 r = radeon_irq_kms_init(rdev);
8412                 if (r)
8413                         return r;
8414         }
8415
8416         r = cik_irq_init(rdev);
8417         if (r) {
8418                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8419                 radeon_irq_kms_fini(rdev);
8420                 return r;
8421         }
8422         cik_irq_set(rdev);
8423
8424         if (rdev->family == CHIP_HAWAII) {
8425                 if (rdev->new_fw)
8426                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8427                 else
8428                         nop = RADEON_CP_PACKET2;
8429         } else {
8430                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8431         }
8432
8433         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8434         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8435                              nop);
8436         if (r)
8437                 return r;
8438
8439         /* set up the compute queues */
8440         /* type-2 packets are deprecated on MEC, use type-3 instead */
8441         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8442         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8443                              nop);
8444         if (r)
8445                 return r;
8446         ring->me = 1; /* first MEC */
8447         ring->pipe = 0; /* first pipe */
8448         ring->queue = 0; /* first queue */
8449         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8450
8451         /* type-2 packets are deprecated on MEC, use type-3 instead */
8452         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8453         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8454                              nop);
8455         if (r)
8456                 return r;
8457         /* dGPU only have 1 MEC */
8458         ring->me = 1; /* first MEC */
8459         ring->pipe = 0; /* first pipe */
8460         ring->queue = 1; /* second queue */
8461         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8462
8463         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8464         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8465                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8466         if (r)
8467                 return r;
8468
8469         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8470         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8471                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8472         if (r)
8473                 return r;
8474
8475         r = cik_cp_resume(rdev);
8476         if (r)
8477                 return r;
8478
8479         r = cik_sdma_resume(rdev);
8480         if (r)
8481                 return r;
8482
8483         cik_uvd_resume(rdev);
8484         cik_vce_resume(rdev);
8485
8486         r = radeon_ib_pool_init(rdev);
8487         if (r) {
8488                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8489                 return r;
8490         }
8491
8492         r = radeon_vm_manager_init(rdev);
8493         if (r) {
8494                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8495                 return r;
8496         }
8497
8498         r = radeon_audio_init(rdev);
8499         if (r)
8500                 return r;
8501
8502         r = radeon_kfd_resume(rdev);
8503         if (r)
8504                 return r;
8505
8506         return 0;
8507 }
8508
8509 /**
8510  * cik_resume - resume the asic to a functional state
8511  *
8512  * @rdev: radeon_device pointer
8513  *
8514  * Programs the asic to a functional state (CIK).
8515  * Called at resume.
8516  * Returns 0 for success, error for failure.
8517  */
8518 int cik_resume(struct radeon_device *rdev)
8519 {
8520         int r;
8521
8522         /* post card */
8523         atom_asic_init(rdev->mode_info.atom_context);
8524
8525         /* init golden registers */
8526         cik_init_golden_registers(rdev);
8527
8528         if (rdev->pm.pm_method == PM_METHOD_DPM)
8529                 radeon_pm_resume(rdev);
8530
8531         rdev->accel_working = true;
8532         r = cik_startup(rdev);
8533         if (r) {
8534                 DRM_ERROR("cik startup failed on resume\n");
8535                 rdev->accel_working = false;
8536                 return r;
8537         }
8538
8539         return r;
8540
8541 }
8542
8543 /**
8544  * cik_suspend - suspend the asic
8545  *
8546  * @rdev: radeon_device pointer
8547  *
8548  * Bring the chip into a state suitable for suspend (CIK).
8549  * Called at suspend.
8550  * Returns 0 for success.
8551  */
8552 int cik_suspend(struct radeon_device *rdev)
8553 {
8554         radeon_kfd_suspend(rdev);
8555         radeon_pm_suspend(rdev);
8556         radeon_audio_fini(rdev);
8557         radeon_vm_manager_fini(rdev);
8558         cik_cp_enable(rdev, false);
8559         cik_sdma_enable(rdev, false);
8560         if (rdev->has_uvd) {
8561                 uvd_v1_0_fini(rdev);
8562                 radeon_uvd_suspend(rdev);
8563         }
8564         if (rdev->has_vce)
8565                 radeon_vce_suspend(rdev);
8566         cik_fini_pg(rdev);
8567         cik_fini_cg(rdev);
8568         cik_irq_suspend(rdev);
8569         radeon_wb_disable(rdev);
8570         cik_pcie_gart_disable(rdev);
8571         return 0;
8572 }
8573
8574 /* Plan is to move initialization in that function and use
8575  * helper function so that radeon_device_init pretty much
8576  * do nothing more than calling asic specific function. This
8577  * should also allow to remove a bunch of callback function
8578  * like vram_info.
8579  */
8580 /**
8581  * cik_init - asic specific driver and hw init
8582  *
8583  * @rdev: radeon_device pointer
8584  *
8585  * Setup asic specific driver variables and program the hw
8586  * to a functional state (CIK).
8587  * Called at driver startup.
8588  * Returns 0 for success, errors for failure.
8589  */
8590 int cik_init(struct radeon_device *rdev)
8591 {
8592         struct radeon_ring *ring;
8593         int r;
8594
8595         /* Read BIOS */
8596         if (!radeon_get_bios(rdev)) {
8597                 if (ASIC_IS_AVIVO(rdev))
8598                         return -EINVAL;
8599         }
8600         /* Must be an ATOMBIOS */
8601         if (!rdev->is_atom_bios) {
8602                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8603                 return -EINVAL;
8604         }
8605         r = radeon_atombios_init(rdev);
8606         if (r)
8607                 return r;
8608
8609         /* Post card if necessary */
8610         if (!radeon_card_posted(rdev)) {
8611                 if (!rdev->bios) {
8612                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8613                         return -EINVAL;
8614                 }
8615                 DRM_INFO("GPU not posted. posting now...\n");
8616                 atom_asic_init(rdev->mode_info.atom_context);
8617         }
8618         /* init golden registers */
8619         cik_init_golden_registers(rdev);
8620         /* Initialize scratch registers */
8621         cik_scratch_init(rdev);
8622         /* Initialize surface registers */
8623         radeon_surface_init(rdev);
8624         /* Initialize clocks */
8625         radeon_get_clock_info(rdev->ddev);
8626
8627         /* Fence driver */
8628         r = radeon_fence_driver_init(rdev);
8629         if (r)
8630                 return r;
8631
8632         /* initialize memory controller */
8633         r = cik_mc_init(rdev);
8634         if (r)
8635                 return r;
8636         /* Memory manager */
8637         r = radeon_bo_init(rdev);
8638         if (r)
8639                 return r;
8640
8641         if (rdev->flags & RADEON_IS_IGP) {
8642                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8643                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8644                         r = cik_init_microcode(rdev);
8645                         if (r) {
8646                                 DRM_ERROR("Failed to load firmware!\n");
8647                                 return r;
8648                         }
8649                 }
8650         } else {
8651                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8652                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8653                     !rdev->mc_fw) {
8654                         r = cik_init_microcode(rdev);
8655                         if (r) {
8656                                 DRM_ERROR("Failed to load firmware!\n");
8657                                 return r;
8658                         }
8659                 }
8660         }
8661
8662         /* Initialize power management */
8663         radeon_pm_init(rdev);
8664
8665         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8666         ring->ring_obj = NULL;
8667         r600_ring_init(rdev, ring, 1024 * 1024);
8668
8669         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8670         ring->ring_obj = NULL;
8671         r600_ring_init(rdev, ring, 1024 * 1024);
8672         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8673         if (r)
8674                 return r;
8675
8676         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8677         ring->ring_obj = NULL;
8678         r600_ring_init(rdev, ring, 1024 * 1024);
8679         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8680         if (r)
8681                 return r;
8682
8683         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8684         ring->ring_obj = NULL;
8685         r600_ring_init(rdev, ring, 256 * 1024);
8686
8687         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8688         ring->ring_obj = NULL;
8689         r600_ring_init(rdev, ring, 256 * 1024);
8690
8691         cik_uvd_init(rdev);
8692         cik_vce_init(rdev);
8693
8694         rdev->ih.ring_obj = NULL;
8695         r600_ih_ring_init(rdev, 64 * 1024);
8696
8697         r = r600_pcie_gart_init(rdev);
8698         if (r)
8699                 return r;
8700
8701         rdev->accel_working = true;
8702         r = cik_startup(rdev);
8703         if (r) {
8704                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8705                 cik_cp_fini(rdev);
8706                 cik_sdma_fini(rdev);
8707                 cik_irq_fini(rdev);
8708                 sumo_rlc_fini(rdev);
8709                 cik_mec_fini(rdev);
8710                 radeon_wb_fini(rdev);
8711                 radeon_ib_pool_fini(rdev);
8712                 radeon_vm_manager_fini(rdev);
8713                 radeon_irq_kms_fini(rdev);
8714                 cik_pcie_gart_fini(rdev);
8715                 rdev->accel_working = false;
8716         }
8717
8718         /* Don't start up if the MC ucode is missing.
8719          * The default clocks and voltages before the MC ucode
8720          * is loaded are not suffient for advanced operations.
8721          */
8722         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8723                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8724                 return -EINVAL;
8725         }
8726
8727         return 0;
8728 }
8729
8730 /**
8731  * cik_fini - asic specific driver and hw fini
8732  *
8733  * @rdev: radeon_device pointer
8734  *
8735  * Tear down the asic specific driver variables and program the hw
8736  * to an idle state (CIK).
8737  * Called at driver unload.
8738  */
8739 void cik_fini(struct radeon_device *rdev)
8740 {
8741         radeon_pm_fini(rdev);
8742         cik_cp_fini(rdev);
8743         cik_sdma_fini(rdev);
8744         cik_fini_pg(rdev);
8745         cik_fini_cg(rdev);
8746         cik_irq_fini(rdev);
8747         sumo_rlc_fini(rdev);
8748         cik_mec_fini(rdev);
8749         radeon_wb_fini(rdev);
8750         radeon_vm_manager_fini(rdev);
8751         radeon_ib_pool_fini(rdev);
8752         radeon_irq_kms_fini(rdev);
8753         uvd_v1_0_fini(rdev);
8754         radeon_uvd_fini(rdev);
8755         radeon_vce_fini(rdev);
8756         cik_pcie_gart_fini(rdev);
8757         r600_vram_scratch_fini(rdev);
8758         radeon_gem_fini(rdev);
8759         radeon_fence_driver_fini(rdev);
8760         radeon_bo_fini(rdev);
8761         radeon_atombios_fini(rdev);
8762         kfree(rdev->bios);
8763         rdev->bios = NULL;
8764 }
8765
8766 void dce8_program_fmt(struct drm_encoder *encoder)
8767 {
8768         struct drm_device *dev = encoder->dev;
8769         struct radeon_device *rdev = dev->dev_private;
8770         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8771         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8772         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8773         int bpc = 0;
8774         u32 tmp = 0;
8775         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8776
8777         if (connector) {
8778                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8779                 bpc = radeon_get_monitor_bpc(connector);
8780                 dither = radeon_connector->dither;
8781         }
8782
8783         /* LVDS/eDP FMT is set up by atom */
8784         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8785                 return;
8786
8787         /* not needed for analog */
8788         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8789             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8790                 return;
8791
8792         if (bpc == 0)
8793                 return;
8794
8795         switch (bpc) {
8796         case 6:
8797                 if (dither == RADEON_FMT_DITHER_ENABLE)
8798                         /* XXX sort out optimal dither settings */
8799                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8800                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8801                 else
8802                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8803                 break;
8804         case 8:
8805                 if (dither == RADEON_FMT_DITHER_ENABLE)
8806                         /* XXX sort out optimal dither settings */
8807                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8808                                 FMT_RGB_RANDOM_ENABLE |
8809                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8810                 else
8811                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8812                 break;
8813         case 10:
8814                 if (dither == RADEON_FMT_DITHER_ENABLE)
8815                         /* XXX sort out optimal dither settings */
8816                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8817                                 FMT_RGB_RANDOM_ENABLE |
8818                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8819                 else
8820                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8821                 break;
8822         default:
8823                 /* not needed */
8824                 break;
8825         }
8826
8827         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8828 }
8829
8830 /* display watermark setup */
8831 /**
8832  * dce8_line_buffer_adjust - Set up the line buffer
8833  *
8834  * @rdev: radeon_device pointer
8835  * @radeon_crtc: the selected display controller
8836  * @mode: the current display mode on the selected display
8837  * controller
8838  *
8839  * Setup up the line buffer allocation for
8840  * the selected display controller (CIK).
8841  * Returns the line buffer size in pixels.
8842  */
8843 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8844                                    struct radeon_crtc *radeon_crtc,
8845                                    struct drm_display_mode *mode)
8846 {
8847         u32 tmp, buffer_alloc, i;
8848         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8849         /*
8850          * Line Buffer Setup
8851          * There are 6 line buffers, one for each display controllers.
8852          * There are 3 partitions per LB. Select the number of partitions
8853          * to enable based on the display width.  For display widths larger
8854          * than 4096, you need use to use 2 display controllers and combine
8855          * them using the stereo blender.
8856          */
8857         if (radeon_crtc->base.enabled && mode) {
8858                 if (mode->crtc_hdisplay < 1920) {
8859                         tmp = 1;
8860                         buffer_alloc = 2;
8861                 } else if (mode->crtc_hdisplay < 2560) {
8862                         tmp = 2;
8863                         buffer_alloc = 2;
8864                 } else if (mode->crtc_hdisplay < 4096) {
8865                         tmp = 0;
8866                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8867                 } else {
8868                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8869                         tmp = 0;
8870                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8871                 }
8872         } else {
8873                 tmp = 1;
8874                 buffer_alloc = 0;
8875         }
8876
8877         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8878                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8879
8880         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8881                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8882         for (i = 0; i < rdev->usec_timeout; i++) {
8883                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8884                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8885                         break;
8886                 udelay(1);
8887         }
8888
8889         if (radeon_crtc->base.enabled && mode) {
8890                 switch (tmp) {
8891                 case 0:
8892                 default:
8893                         return 4096 * 2;
8894                 case 1:
8895                         return 1920 * 2;
8896                 case 2:
8897                         return 2560 * 2;
8898                 }
8899         }
8900
8901         /* controller not enabled, so no lb used */
8902         return 0;
8903 }
8904
8905 /**
8906  * cik_get_number_of_dram_channels - get the number of dram channels
8907  *
8908  * @rdev: radeon_device pointer
8909  *
8910  * Look up the number of video ram channels (CIK).
8911  * Used for display watermark bandwidth calculations
8912  * Returns the number of dram channels
8913  */
8914 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8915 {
8916         u32 tmp = RREG32(MC_SHARED_CHMAP);
8917
8918         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8919         case 0:
8920         default:
8921                 return 1;
8922         case 1:
8923                 return 2;
8924         case 2:
8925                 return 4;
8926         case 3:
8927                 return 8;
8928         case 4:
8929                 return 3;
8930         case 5:
8931                 return 6;
8932         case 6:
8933                 return 10;
8934         case 7:
8935                 return 12;
8936         case 8:
8937                 return 16;
8938         }
8939 }
8940
8941 struct dce8_wm_params {
8942         u32 dram_channels; /* number of dram channels */
8943         u32 yclk;          /* bandwidth per dram data pin in kHz */
8944         u32 sclk;          /* engine clock in kHz */
8945         u32 disp_clk;      /* display clock in kHz */
8946         u32 src_width;     /* viewport width */
8947         u32 active_time;   /* active display time in ns */
8948         u32 blank_time;    /* blank time in ns */
8949         bool interlaced;    /* mode is interlaced */
8950         fixed20_12 vsc;    /* vertical scale ratio */
8951         u32 num_heads;     /* number of active crtcs */
8952         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8953         u32 lb_size;       /* line buffer allocated to pipe */
8954         u32 vtaps;         /* vertical scaler taps */
8955 };
8956
8957 /**
8958  * dce8_dram_bandwidth - get the dram bandwidth
8959  *
8960  * @wm: watermark calculation data
8961  *
8962  * Calculate the raw dram bandwidth (CIK).
8963  * Used for display watermark bandwidth calculations
8964  * Returns the dram bandwidth in MBytes/s
8965  */
8966 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8967 {
8968         /* Calculate raw DRAM Bandwidth */
8969         fixed20_12 dram_efficiency; /* 0.7 */
8970         fixed20_12 yclk, dram_channels, bandwidth;
8971         fixed20_12 a;
8972
8973         a.full = dfixed_const(1000);
8974         yclk.full = dfixed_const(wm->yclk);
8975         yclk.full = dfixed_div(yclk, a);
8976         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8977         a.full = dfixed_const(10);
8978         dram_efficiency.full = dfixed_const(7);
8979         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8980         bandwidth.full = dfixed_mul(dram_channels, yclk);
8981         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8982
8983         return dfixed_trunc(bandwidth);
8984 }
8985
8986 /**
8987  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8988  *
8989  * @wm: watermark calculation data
8990  *
8991  * Calculate the dram bandwidth used for display (CIK).
8992  * Used for display watermark bandwidth calculations
8993  * Returns the dram bandwidth for display in MBytes/s
8994  */
8995 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8996 {
8997         /* Calculate DRAM Bandwidth and the part allocated to display. */
8998         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8999         fixed20_12 yclk, dram_channels, bandwidth;
9000         fixed20_12 a;
9001
9002         a.full = dfixed_const(1000);
9003         yclk.full = dfixed_const(wm->yclk);
9004         yclk.full = dfixed_div(yclk, a);
9005         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9006         a.full = dfixed_const(10);
9007         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9008         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9009         bandwidth.full = dfixed_mul(dram_channels, yclk);
9010         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9011
9012         return dfixed_trunc(bandwidth);
9013 }
9014
9015 /**
9016  * dce8_data_return_bandwidth - get the data return bandwidth
9017  *
9018  * @wm: watermark calculation data
9019  *
9020  * Calculate the data return bandwidth used for display (CIK).
9021  * Used for display watermark bandwidth calculations
9022  * Returns the data return bandwidth in MBytes/s
9023  */
9024 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9025 {
9026         /* Calculate the display Data return Bandwidth */
9027         fixed20_12 return_efficiency; /* 0.8 */
9028         fixed20_12 sclk, bandwidth;
9029         fixed20_12 a;
9030
9031         a.full = dfixed_const(1000);
9032         sclk.full = dfixed_const(wm->sclk);
9033         sclk.full = dfixed_div(sclk, a);
9034         a.full = dfixed_const(10);
9035         return_efficiency.full = dfixed_const(8);
9036         return_efficiency.full = dfixed_div(return_efficiency, a);
9037         a.full = dfixed_const(32);
9038         bandwidth.full = dfixed_mul(a, sclk);
9039         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9040
9041         return dfixed_trunc(bandwidth);
9042 }
9043
9044 /**
9045  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9046  *
9047  * @wm: watermark calculation data
9048  *
9049  * Calculate the dmif bandwidth used for display (CIK).
9050  * Used for display watermark bandwidth calculations
9051  * Returns the dmif bandwidth in MBytes/s
9052  */
9053 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9054 {
9055         /* Calculate the DMIF Request Bandwidth */
9056         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9057         fixed20_12 disp_clk, bandwidth;
9058         fixed20_12 a, b;
9059
9060         a.full = dfixed_const(1000);
9061         disp_clk.full = dfixed_const(wm->disp_clk);
9062         disp_clk.full = dfixed_div(disp_clk, a);
9063         a.full = dfixed_const(32);
9064         b.full = dfixed_mul(a, disp_clk);
9065
9066         a.full = dfixed_const(10);
9067         disp_clk_request_efficiency.full = dfixed_const(8);
9068         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9069
9070         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9071
9072         return dfixed_trunc(bandwidth);
9073 }
9074
9075 /**
9076  * dce8_available_bandwidth - get the min available bandwidth
9077  *
9078  * @wm: watermark calculation data
9079  *
9080  * Calculate the min available bandwidth used for display (CIK).
9081  * Used for display watermark bandwidth calculations
9082  * Returns the min available bandwidth in MBytes/s
9083  */
9084 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9085 {
9086         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9087         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9088         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9089         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9090
9091         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9092 }
9093
9094 /**
9095  * dce8_average_bandwidth - get the average available bandwidth
9096  *
9097  * @wm: watermark calculation data
9098  *
9099  * Calculate the average available bandwidth used for display (CIK).
9100  * Used for display watermark bandwidth calculations
9101  * Returns the average available bandwidth in MBytes/s
9102  */
9103 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9104 {
9105         /* Calculate the display mode Average Bandwidth
9106          * DisplayMode should contain the source and destination dimensions,
9107          * timing, etc.
9108          */
9109         fixed20_12 bpp;
9110         fixed20_12 line_time;
9111         fixed20_12 src_width;
9112         fixed20_12 bandwidth;
9113         fixed20_12 a;
9114
9115         a.full = dfixed_const(1000);
9116         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9117         line_time.full = dfixed_div(line_time, a);
9118         bpp.full = dfixed_const(wm->bytes_per_pixel);
9119         src_width.full = dfixed_const(wm->src_width);
9120         bandwidth.full = dfixed_mul(src_width, bpp);
9121         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9122         bandwidth.full = dfixed_div(bandwidth, line_time);
9123
9124         return dfixed_trunc(bandwidth);
9125 }
9126
9127 /**
9128  * dce8_latency_watermark - get the latency watermark
9129  *
9130  * @wm: watermark calculation data
9131  *
9132  * Calculate the latency watermark (CIK).
9133  * Used for display watermark bandwidth calculations
9134  * Returns the latency watermark in ns
9135  */
9136 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9137 {
9138         /* First calculate the latency in ns */
9139         u32 mc_latency = 2000; /* 2000 ns. */
9140         u32 available_bandwidth = dce8_available_bandwidth(wm);
9141         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9142         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9143         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9144         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9145                 (wm->num_heads * cursor_line_pair_return_time);
9146         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9147         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9148         u32 tmp, dmif_size = 12288;
9149         fixed20_12 a, b, c;
9150
9151         if (wm->num_heads == 0)
9152                 return 0;
9153
9154         a.full = dfixed_const(2);
9155         b.full = dfixed_const(1);
9156         if ((wm->vsc.full > a.full) ||
9157             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9158             (wm->vtaps >= 5) ||
9159             ((wm->vsc.full >= a.full) && wm->interlaced))
9160                 max_src_lines_per_dst_line = 4;
9161         else
9162                 max_src_lines_per_dst_line = 2;
9163
9164         a.full = dfixed_const(available_bandwidth);
9165         b.full = dfixed_const(wm->num_heads);
9166         a.full = dfixed_div(a, b);
9167
9168         b.full = dfixed_const(mc_latency + 512);
9169         c.full = dfixed_const(wm->disp_clk);
9170         b.full = dfixed_div(b, c);
9171
9172         c.full = dfixed_const(dmif_size);
9173         b.full = dfixed_div(c, b);
9174
9175         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9176
9177         b.full = dfixed_const(1000);
9178         c.full = dfixed_const(wm->disp_clk);
9179         b.full = dfixed_div(c, b);
9180         c.full = dfixed_const(wm->bytes_per_pixel);
9181         b.full = dfixed_mul(b, c);
9182
9183         lb_fill_bw = min(tmp, dfixed_trunc(b));
9184
9185         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9186         b.full = dfixed_const(1000);
9187         c.full = dfixed_const(lb_fill_bw);
9188         b.full = dfixed_div(c, b);
9189         a.full = dfixed_div(a, b);
9190         line_fill_time = dfixed_trunc(a);
9191
9192         if (line_fill_time < wm->active_time)
9193                 return latency;
9194         else
9195                 return latency + (line_fill_time - wm->active_time);
9196
9197 }
9198
9199 /**
9200  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9201  * average and available dram bandwidth
9202  *
9203  * @wm: watermark calculation data
9204  *
9205  * Check if the display average bandwidth fits in the display
9206  * dram bandwidth (CIK).
9207  * Used for display watermark bandwidth calculations
9208  * Returns true if the display fits, false if not.
9209  */
9210 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9211 {
9212         if (dce8_average_bandwidth(wm) <=
9213             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9214                 return true;
9215         else
9216                 return false;
9217 }
9218
9219 /**
9220  * dce8_average_bandwidth_vs_available_bandwidth - check
9221  * average and available bandwidth
9222  *
9223  * @wm: watermark calculation data
9224  *
9225  * Check if the display average bandwidth fits in the display
9226  * available bandwidth (CIK).
9227  * Used for display watermark bandwidth calculations
9228  * Returns true if the display fits, false if not.
9229  */
9230 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9231 {
9232         if (dce8_average_bandwidth(wm) <=
9233             (dce8_available_bandwidth(wm) / wm->num_heads))
9234                 return true;
9235         else
9236                 return false;
9237 }
9238
9239 /**
9240  * dce8_check_latency_hiding - check latency hiding
9241  *
9242  * @wm: watermark calculation data
9243  *
9244  * Check latency hiding (CIK).
9245  * Used for display watermark bandwidth calculations
9246  * Returns true if the display fits, false if not.
9247  */
9248 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9249 {
9250         u32 lb_partitions = wm->lb_size / wm->src_width;
9251         u32 line_time = wm->active_time + wm->blank_time;
9252         u32 latency_tolerant_lines;
9253         u32 latency_hiding;
9254         fixed20_12 a;
9255
9256         a.full = dfixed_const(1);
9257         if (wm->vsc.full > a.full)
9258                 latency_tolerant_lines = 1;
9259         else {
9260                 if (lb_partitions <= (wm->vtaps + 1))
9261                         latency_tolerant_lines = 1;
9262                 else
9263                         latency_tolerant_lines = 2;
9264         }
9265
9266         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9267
9268         if (dce8_latency_watermark(wm) <= latency_hiding)
9269                 return true;
9270         else
9271                 return false;
9272 }
9273
9274 /**
9275  * dce8_program_watermarks - program display watermarks
9276  *
9277  * @rdev: radeon_device pointer
9278  * @radeon_crtc: the selected display controller
9279  * @lb_size: line buffer size
9280  * @num_heads: number of display controllers in use
9281  *
9282  * Calculate and program the display watermarks for the
9283  * selected display controller (CIK).
9284  */
9285 static void dce8_program_watermarks(struct radeon_device *rdev,
9286                                     struct radeon_crtc *radeon_crtc,
9287                                     u32 lb_size, u32 num_heads)
9288 {
9289         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9290         struct dce8_wm_params wm_low, wm_high;
9291         u32 pixel_period;
9292         u32 line_time = 0;
9293         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9294         u32 tmp, wm_mask;
9295
9296         if (radeon_crtc->base.enabled && num_heads && mode) {
9297                 pixel_period = 1000000 / (u32)mode->clock;
9298                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9299
9300                 /* watermark for high clocks */
9301                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9302                     rdev->pm.dpm_enabled) {
9303                         wm_high.yclk =
9304                                 radeon_dpm_get_mclk(rdev, false) * 10;
9305                         wm_high.sclk =
9306                                 radeon_dpm_get_sclk(rdev, false) * 10;
9307                 } else {
9308                         wm_high.yclk = rdev->pm.current_mclk * 10;
9309                         wm_high.sclk = rdev->pm.current_sclk * 10;
9310                 }
9311
9312                 wm_high.disp_clk = mode->clock;
9313                 wm_high.src_width = mode->crtc_hdisplay;
9314                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9315                 wm_high.blank_time = line_time - wm_high.active_time;
9316                 wm_high.interlaced = false;
9317                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9318                         wm_high.interlaced = true;
9319                 wm_high.vsc = radeon_crtc->vsc;
9320                 wm_high.vtaps = 1;
9321                 if (radeon_crtc->rmx_type != RMX_OFF)
9322                         wm_high.vtaps = 2;
9323                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9324                 wm_high.lb_size = lb_size;
9325                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9326                 wm_high.num_heads = num_heads;
9327
9328                 /* set for high clocks */
9329                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9330
9331                 /* possibly force display priority to high */
9332                 /* should really do this at mode validation time... */
9333                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9334                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9335                     !dce8_check_latency_hiding(&wm_high) ||
9336                     (rdev->disp_priority == 2)) {
9337                         DRM_DEBUG_KMS("force priority to high\n");
9338                 }
9339
9340                 /* watermark for low clocks */
9341                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9342                     rdev->pm.dpm_enabled) {
9343                         wm_low.yclk =
9344                                 radeon_dpm_get_mclk(rdev, true) * 10;
9345                         wm_low.sclk =
9346                                 radeon_dpm_get_sclk(rdev, true) * 10;
9347                 } else {
9348                         wm_low.yclk = rdev->pm.current_mclk * 10;
9349                         wm_low.sclk = rdev->pm.current_sclk * 10;
9350                 }
9351
9352                 wm_low.disp_clk = mode->clock;
9353                 wm_low.src_width = mode->crtc_hdisplay;
9354                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9355                 wm_low.blank_time = line_time - wm_low.active_time;
9356                 wm_low.interlaced = false;
9357                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9358                         wm_low.interlaced = true;
9359                 wm_low.vsc = radeon_crtc->vsc;
9360                 wm_low.vtaps = 1;
9361                 if (radeon_crtc->rmx_type != RMX_OFF)
9362                         wm_low.vtaps = 2;
9363                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9364                 wm_low.lb_size = lb_size;
9365                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9366                 wm_low.num_heads = num_heads;
9367
9368                 /* set for low clocks */
9369                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9370
9371                 /* possibly force display priority to high */
9372                 /* should really do this at mode validation time... */
9373                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9374                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9375                     !dce8_check_latency_hiding(&wm_low) ||
9376                     (rdev->disp_priority == 2)) {
9377                         DRM_DEBUG_KMS("force priority to high\n");
9378                 }
9379
9380                 /* Save number of lines the linebuffer leads before the scanout */
9381                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9382         }
9383
9384         /* select wm A */
9385         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9386         tmp = wm_mask;
9387         tmp &= ~LATENCY_WATERMARK_MASK(3);
9388         tmp |= LATENCY_WATERMARK_MASK(1);
9389         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9390         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9391                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9392                 LATENCY_HIGH_WATERMARK(line_time)));
9393         /* select wm B */
9394         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9395         tmp &= ~LATENCY_WATERMARK_MASK(3);
9396         tmp |= LATENCY_WATERMARK_MASK(2);
9397         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9398         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9399                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9400                 LATENCY_HIGH_WATERMARK(line_time)));
9401         /* restore original selection */
9402         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9403
9404         /* save values for DPM */
9405         radeon_crtc->line_time = line_time;
9406         radeon_crtc->wm_high = latency_watermark_a;
9407         radeon_crtc->wm_low = latency_watermark_b;
9408 }
9409
9410 /**
9411  * dce8_bandwidth_update - program display watermarks
9412  *
9413  * @rdev: radeon_device pointer
9414  *
9415  * Calculate and program the display watermarks and line
9416  * buffer allocation (CIK).
9417  */
9418 void dce8_bandwidth_update(struct radeon_device *rdev)
9419 {
9420         struct drm_display_mode *mode = NULL;
9421         u32 num_heads = 0, lb_size;
9422         int i;
9423
9424         if (!rdev->mode_info.mode_config_initialized)
9425                 return;
9426
9427         radeon_update_display_priority(rdev);
9428
9429         for (i = 0; i < rdev->num_crtc; i++) {
9430                 if (rdev->mode_info.crtcs[i]->base.enabled)
9431                         num_heads++;
9432         }
9433         for (i = 0; i < rdev->num_crtc; i++) {
9434                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9435                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9436                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9437         }
9438 }
9439
9440 /**
9441  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9442  *
9443  * @rdev: radeon_device pointer
9444  *
9445  * Fetches a GPU clock counter snapshot (SI).
9446  * Returns the 64 bit clock counter snapshot.
9447  */
9448 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9449 {
9450         uint64_t clock;
9451
9452         mutex_lock(&rdev->gpu_clock_mutex);
9453         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9454         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9455                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9456         mutex_unlock(&rdev->gpu_clock_mutex);
9457         return clock;
9458 }
9459
9460 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9461                              u32 cntl_reg, u32 status_reg)
9462 {
9463         int r, i;
9464         struct atom_clock_dividers dividers;
9465         uint32_t tmp;
9466
9467         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9468                                            clock, false, &dividers);
9469         if (r)
9470                 return r;
9471
9472         tmp = RREG32_SMC(cntl_reg);
9473         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9474         tmp |= dividers.post_divider;
9475         WREG32_SMC(cntl_reg, tmp);
9476
9477         for (i = 0; i < 100; i++) {
9478                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9479                         break;
9480                 mdelay(10);
9481         }
9482         if (i == 100)
9483                 return -ETIMEDOUT;
9484
9485         return 0;
9486 }
9487
9488 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9489 {
9490         int r = 0;
9491
9492         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9493         if (r)
9494                 return r;
9495
9496         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9497         return r;
9498 }
9499
9500 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9501 {
9502         int r, i;
9503         struct atom_clock_dividers dividers;
9504         u32 tmp;
9505
9506         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9507                                            ecclk, false, &dividers);
9508         if (r)
9509                 return r;
9510
9511         for (i = 0; i < 100; i++) {
9512                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9513                         break;
9514                 mdelay(10);
9515         }
9516         if (i == 100)
9517                 return -ETIMEDOUT;
9518
9519         tmp = RREG32_SMC(CG_ECLK_CNTL);
9520         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9521         tmp |= dividers.post_divider;
9522         WREG32_SMC(CG_ECLK_CNTL, tmp);
9523
9524         for (i = 0; i < 100; i++) {
9525                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9526                         break;
9527                 mdelay(10);
9528         }
9529         if (i == 100)
9530                 return -ETIMEDOUT;
9531
9532         return 0;
9533 }
9534
9535 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9536 {
9537         struct pci_dev *root = rdev->pdev->bus->self;
9538         int bridge_pos, gpu_pos;
9539         u32 speed_cntl, mask, current_data_rate;
9540         int ret, i;
9541         u16 tmp16;
9542
9543         if (pci_is_root_bus(rdev->pdev->bus))
9544                 return;
9545
9546         if (radeon_pcie_gen2 == 0)
9547                 return;
9548
9549         if (rdev->flags & RADEON_IS_IGP)
9550                 return;
9551
9552         if (!(rdev->flags & RADEON_IS_PCIE))
9553                 return;
9554
9555         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9556         if (ret != 0)
9557                 return;
9558
9559         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9560                 return;
9561
9562         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9563         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9564                 LC_CURRENT_DATA_RATE_SHIFT;
9565         if (mask & DRM_PCIE_SPEED_80) {
9566                 if (current_data_rate == 2) {
9567                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9568                         return;
9569                 }
9570                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9571         } else if (mask & DRM_PCIE_SPEED_50) {
9572                 if (current_data_rate == 1) {
9573                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9574                         return;
9575                 }
9576                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9577         }
9578
9579         bridge_pos = pci_pcie_cap(root);
9580         if (!bridge_pos)
9581                 return;
9582
9583         gpu_pos = pci_pcie_cap(rdev->pdev);
9584         if (!gpu_pos)
9585                 return;
9586
9587         if (mask & DRM_PCIE_SPEED_80) {
9588                 /* re-try equalization if gen3 is not already enabled */
9589                 if (current_data_rate != 2) {
9590                         u16 bridge_cfg, gpu_cfg;
9591                         u16 bridge_cfg2, gpu_cfg2;
9592                         u32 max_lw, current_lw, tmp;
9593
9594                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9595                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9596
9597                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9598                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9599
9600                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9601                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9602
9603                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9604                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9605                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9606
9607                         if (current_lw < max_lw) {
9608                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9609                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9610                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9611                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9612                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9613                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9614                                 }
9615                         }
9616
9617                         for (i = 0; i < 10; i++) {
9618                                 /* check status */
9619                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9620                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9621                                         break;
9622
9623                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9624                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9625
9626                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9627                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9628
9629                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9630                                 tmp |= LC_SET_QUIESCE;
9631                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9632
9633                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9634                                 tmp |= LC_REDO_EQ;
9635                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9636
9637                                 mdelay(100);
9638
9639                                 /* linkctl */
9640                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9641                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9642                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9643                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9644
9645                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9646                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9647                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9648                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9649
9650                                 /* linkctl2 */
9651                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9652                                 tmp16 &= ~((1 << 4) | (7 << 9));
9653                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9654                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9655
9656                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9657                                 tmp16 &= ~((1 << 4) | (7 << 9));
9658                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9659                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9660
9661                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9662                                 tmp &= ~LC_SET_QUIESCE;
9663                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9664                         }
9665                 }
9666         }
9667
9668         /* set the link speed */
9669         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9670         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9671         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9672
9673         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9674         tmp16 &= ~0xf;
9675         if (mask & DRM_PCIE_SPEED_80)
9676                 tmp16 |= 3; /* gen3 */
9677         else if (mask & DRM_PCIE_SPEED_50)
9678                 tmp16 |= 2; /* gen2 */
9679         else
9680                 tmp16 |= 1; /* gen1 */
9681         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9682
9683         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9684         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9685         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9686
9687         for (i = 0; i < rdev->usec_timeout; i++) {
9688                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9689                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9690                         break;
9691                 udelay(1);
9692         }
9693 }
9694
9695 static void cik_program_aspm(struct radeon_device *rdev)
9696 {
9697         u32 data, orig;
9698         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9699         bool disable_clkreq = false;
9700
9701         if (radeon_aspm == 0)
9702                 return;
9703
9704         /* XXX double check IGPs */
9705         if (rdev->flags & RADEON_IS_IGP)
9706                 return;
9707
9708         if (!(rdev->flags & RADEON_IS_PCIE))
9709                 return;
9710
9711         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9712         data &= ~LC_XMIT_N_FTS_MASK;
9713         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9714         if (orig != data)
9715                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9716
9717         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9718         data |= LC_GO_TO_RECOVERY;
9719         if (orig != data)
9720                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9721
9722         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9723         data |= P_IGNORE_EDB_ERR;
9724         if (orig != data)
9725                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9726
9727         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9728         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9729         data |= LC_PMI_TO_L1_DIS;
9730         if (!disable_l0s)
9731                 data |= LC_L0S_INACTIVITY(7);
9732
9733         if (!disable_l1) {
9734                 data |= LC_L1_INACTIVITY(7);
9735                 data &= ~LC_PMI_TO_L1_DIS;
9736                 if (orig != data)
9737                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9738
9739                 if (!disable_plloff_in_l1) {
9740                         bool clk_req_support;
9741
9742                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9743                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9744                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9745                         if (orig != data)
9746                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9747
9748                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9749                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9750                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9751                         if (orig != data)
9752                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9753
9754                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9755                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9756                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9757                         if (orig != data)
9758                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9759
9760                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9761                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9762                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9763                         if (orig != data)
9764                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9765
9766                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9767                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9768                         data |= LC_DYN_LANES_PWR_STATE(3);
9769                         if (orig != data)
9770                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9771
9772                         if (!disable_clkreq &&
9773                             !pci_is_root_bus(rdev->pdev->bus)) {
9774                                 struct pci_dev *root = rdev->pdev->bus->self;
9775                                 u32 lnkcap;
9776
9777                                 clk_req_support = false;
9778                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9779                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9780                                         clk_req_support = true;
9781                         } else {
9782                                 clk_req_support = false;
9783                         }
9784
9785                         if (clk_req_support) {
9786                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9787                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9788                                 if (orig != data)
9789                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9790
9791                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9792                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9793                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9794                                 if (orig != data)
9795                                         WREG32_SMC(THM_CLK_CNTL, data);
9796
9797                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9798                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9799                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9800                                 if (orig != data)
9801                                         WREG32_SMC(MISC_CLK_CTRL, data);
9802
9803                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9804                                 data &= ~BCLK_AS_XCLK;
9805                                 if (orig != data)
9806                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9807
9808                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9809                                 data &= ~FORCE_BIF_REFCLK_EN;
9810                                 if (orig != data)
9811                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9812
9813                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9814                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9815                                 data |= MPLL_CLKOUT_SEL(4);
9816                                 if (orig != data)
9817                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9818                         }
9819                 }
9820         } else {
9821                 if (orig != data)
9822                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9823         }
9824
9825         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9826         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9827         if (orig != data)
9828                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9829
9830         if (!disable_l0s) {
9831                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9832                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9833                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9834                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9835                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9836                                 data &= ~LC_L0S_INACTIVITY_MASK;
9837                                 if (orig != data)
9838                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9839                         }
9840                 }
9841         }
9842 }