Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi...
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49
50 #include "smu/smu_7_1_3_d.h"
51
52 #define GFX8_NUM_GFX_RINGS     1
53 #define GFX8_NUM_COMPUTE_RINGS 8
54
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
58 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
59
60 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
61 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
62 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
63 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
64 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
65 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
66 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
67 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
68 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
69
70 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
71 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
72 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
73 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
75 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
76
77 /* BPM SERDES CMD */
78 #define SET_BPM_SERDES_CMD    1
79 #define CLE_BPM_SERDES_CMD    0
80
81 /* BPM Register Address*/
82 enum {
83         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
84         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
85         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
86         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
87         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
88         BPM_REG_FGCG_MAX
89 };
90
91 #define RLC_FormatDirectRegListLength        14
92
93 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
99
100 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
105
106 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
118
119 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
132
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
139
140 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
141 {
142         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
143         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
144         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
145         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
146         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
147         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
148         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
149         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
150         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
151         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
152         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
153         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
154         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
155         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
156         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
157         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
158 };
159
160 static const u32 golden_settings_tonga_a11[] =
161 {
162         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
163         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
164         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
165         mmGB_GPU_ID, 0x0000000f, 0x00000000,
166         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
167         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
168         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
169         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
170         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
171         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
172         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
173         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
174         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
175         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
176         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
177 };
178
179 static const u32 tonga_golden_common_all[] =
180 {
181         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
182         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
183         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
184         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
185         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
186         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
187         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
188         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
189 };
190
191 static const u32 tonga_mgcg_cgcg_init[] =
192 {
193         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
194         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
195         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
198         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
200         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
202         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
203         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
204         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
215         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
218         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
219         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
220         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
221         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
223         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
224         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
225         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
226         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
227         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
228         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
229         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
230         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
231         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
232         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
233         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
234         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
265         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
266         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
267         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
268 };
269
270 static const u32 golden_settings_polaris11_a11[] =
271 {
272         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
273         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
274         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
275         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
276         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
277         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
278         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
279         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
280         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
281         mmSQ_CONFIG, 0x07f80000, 0x07180000,
282         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
283         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
284         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
285         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
286         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
287 };
288
289 static const u32 polaris11_golden_common_all[] =
290 {
291         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
292         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
293         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
294         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
295         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
296         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
297 };
298
299 static const u32 golden_settings_polaris10_a11[] =
300 {
301         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
302         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
303         mmCB_HW_CONTROL_2, 0, 0x0f000000,
304         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
305         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
306         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
307         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
308         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
309         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
310         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
311         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
312         mmSQ_CONFIG, 0x07f80000, 0x07180000,
313         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
314         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
315         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
316         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
317 };
318
319 static const u32 polaris10_golden_common_all[] =
320 {
321         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
322         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
323         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
324         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
325         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
326         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
327         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
328         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
329 };
330
331 static const u32 fiji_golden_common_all[] =
332 {
333         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
335         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
336         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
340         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
341         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
342         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
343 };
344
345 static const u32 golden_settings_fiji_a10[] =
346 {
347         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
348         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
349         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
350         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
351         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
352         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
353         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
354         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
355         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
356         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
357         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359
360 static const u32 fiji_mgcg_cgcg_init[] =
361 {
362         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
363         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
364         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
366         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
369         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
371         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
373         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
377         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
380         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
381         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
382         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
383         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
384         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
386         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
387         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
388         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
389         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
390         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
391         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
394         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
395         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
396         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
397 };
398
399 static const u32 golden_settings_iceland_a11[] =
400 {
401         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
402         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
403         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
404         mmGB_GPU_ID, 0x0000000f, 0x00000000,
405         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
406         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
407         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
408         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
409         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
410         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
411         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
412         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
413         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
414         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
415         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
416 };
417
418 static const u32 iceland_golden_common_all[] =
419 {
420         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
422         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
423         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
424         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
425         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
426         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
427         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
428 };
429
430 static const u32 iceland_mgcg_cgcg_init[] =
431 {
432         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
437         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
438         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
439         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
443         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
454         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
455         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
458         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
459         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
460         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
462         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
464         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
465         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
466         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
467         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
468         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
471         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
476         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
481         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
486         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
491         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
494         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
495         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
496 };
497
498 static const u32 cz_golden_settings_a11[] =
499 {
500         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
501         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
502         mmGB_GPU_ID, 0x0000000f, 0x00000000,
503         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
504         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
505         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
506         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
507         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
508         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
509         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
510 };
511
512 static const u32 cz_golden_common_all[] =
513 {
514         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
515         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
516         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
517         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
518         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
519         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
520         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
521         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
522 };
523
524 static const u32 cz_mgcg_cgcg_init[] =
525 {
526         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
527         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
528         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
530         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
532         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
534         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
535         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
537         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
542         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
544         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
545         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
546         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
547         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
548         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
552         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
553         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
554         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
555         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
556         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
557         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
563         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
564         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
565         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
566         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
567         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
568         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
569         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
570         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
571         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
572         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
575         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
580         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
585         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
590         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
595         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
598         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
599         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
600         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
601 };
602
603 static const u32 stoney_golden_settings_a11[] =
604 {
605         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
606         mmGB_GPU_ID, 0x0000000f, 0x00000000,
607         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
608         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
609         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
610         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
611         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
612         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
613         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
614         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
615 };
616
617 static const u32 stoney_golden_common_all[] =
618 {
619         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
620         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
621         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
622         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
623         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
624         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
625         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
626         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
627 };
628
629 static const u32 stoney_mgcg_cgcg_init[] =
630 {
631         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
633         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
634         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
635         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
636         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
637 };
638
639 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
640 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
641 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
642 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
643 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
644 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
645
646 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
647 {
648         switch (adev->asic_type) {
649         case CHIP_TOPAZ:
650                 amdgpu_program_register_sequence(adev,
651                                                  iceland_mgcg_cgcg_init,
652                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
653                 amdgpu_program_register_sequence(adev,
654                                                  golden_settings_iceland_a11,
655                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
656                 amdgpu_program_register_sequence(adev,
657                                                  iceland_golden_common_all,
658                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
659                 break;
660         case CHIP_FIJI:
661                 amdgpu_program_register_sequence(adev,
662                                                  fiji_mgcg_cgcg_init,
663                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
664                 amdgpu_program_register_sequence(adev,
665                                                  golden_settings_fiji_a10,
666                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
667                 amdgpu_program_register_sequence(adev,
668                                                  fiji_golden_common_all,
669                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
670                 break;
671
672         case CHIP_TONGA:
673                 amdgpu_program_register_sequence(adev,
674                                                  tonga_mgcg_cgcg_init,
675                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
676                 amdgpu_program_register_sequence(adev,
677                                                  golden_settings_tonga_a11,
678                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
679                 amdgpu_program_register_sequence(adev,
680                                                  tonga_golden_common_all,
681                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
682                 break;
683         case CHIP_POLARIS11:
684                 amdgpu_program_register_sequence(adev,
685                                                  golden_settings_polaris11_a11,
686                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
687                 amdgpu_program_register_sequence(adev,
688                                                  polaris11_golden_common_all,
689                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
690                 break;
691         case CHIP_POLARIS10:
692                 amdgpu_program_register_sequence(adev,
693                                                  golden_settings_polaris10_a11,
694                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
695                 amdgpu_program_register_sequence(adev,
696                                                  polaris10_golden_common_all,
697                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
698                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
699                 break;
700         case CHIP_CARRIZO:
701                 amdgpu_program_register_sequence(adev,
702                                                  cz_mgcg_cgcg_init,
703                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
704                 amdgpu_program_register_sequence(adev,
705                                                  cz_golden_settings_a11,
706                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
707                 amdgpu_program_register_sequence(adev,
708                                                  cz_golden_common_all,
709                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
710                 break;
711         case CHIP_STONEY:
712                 amdgpu_program_register_sequence(adev,
713                                                  stoney_mgcg_cgcg_init,
714                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
715                 amdgpu_program_register_sequence(adev,
716                                                  stoney_golden_settings_a11,
717                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
718                 amdgpu_program_register_sequence(adev,
719                                                  stoney_golden_common_all,
720                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
721                 break;
722         default:
723                 break;
724         }
725 }
726
727 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
728 {
729         int i;
730
731         adev->gfx.scratch.num_reg = 7;
732         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
733         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
734                 adev->gfx.scratch.free[i] = true;
735                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
736         }
737 }
738
739 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
740 {
741         struct amdgpu_device *adev = ring->adev;
742         uint32_t scratch;
743         uint32_t tmp = 0;
744         unsigned i;
745         int r;
746
747         r = amdgpu_gfx_scratch_get(adev, &scratch);
748         if (r) {
749                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
750                 return r;
751         }
752         WREG32(scratch, 0xCAFEDEAD);
753         r = amdgpu_ring_alloc(ring, 3);
754         if (r) {
755                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
756                           ring->idx, r);
757                 amdgpu_gfx_scratch_free(adev, scratch);
758                 return r;
759         }
760         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
761         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
762         amdgpu_ring_write(ring, 0xDEADBEEF);
763         amdgpu_ring_commit(ring);
764
765         for (i = 0; i < adev->usec_timeout; i++) {
766                 tmp = RREG32(scratch);
767                 if (tmp == 0xDEADBEEF)
768                         break;
769                 DRM_UDELAY(1);
770         }
771         if (i < adev->usec_timeout) {
772                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
773                          ring->idx, i);
774         } else {
775                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
776                           ring->idx, scratch, tmp);
777                 r = -EINVAL;
778         }
779         amdgpu_gfx_scratch_free(adev, scratch);
780         return r;
781 }
782
783 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
784 {
785         struct amdgpu_device *adev = ring->adev;
786         struct amdgpu_ib ib;
787         struct fence *f = NULL;
788         uint32_t scratch;
789         uint32_t tmp = 0;
790         unsigned i;
791         int r;
792
793         r = amdgpu_gfx_scratch_get(adev, &scratch);
794         if (r) {
795                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
796                 return r;
797         }
798         WREG32(scratch, 0xCAFEDEAD);
799         memset(&ib, 0, sizeof(ib));
800         r = amdgpu_ib_get(adev, NULL, 256, &ib);
801         if (r) {
802                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
803                 goto err1;
804         }
805         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
806         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
807         ib.ptr[2] = 0xDEADBEEF;
808         ib.length_dw = 3;
809
810         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
811         if (r)
812                 goto err2;
813
814         r = fence_wait(f, false);
815         if (r) {
816                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
817                 goto err2;
818         }
819         for (i = 0; i < adev->usec_timeout; i++) {
820                 tmp = RREG32(scratch);
821                 if (tmp == 0xDEADBEEF)
822                         break;
823                 DRM_UDELAY(1);
824         }
825         if (i < adev->usec_timeout) {
826                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
827                          ring->idx, i);
828                 goto err2;
829         } else {
830                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
831                           scratch, tmp);
832                 r = -EINVAL;
833         }
834 err2:
835         fence_put(f);
836         amdgpu_ib_free(adev, &ib, NULL);
837         fence_put(f);
838 err1:
839         amdgpu_gfx_scratch_free(adev, scratch);
840         return r;
841 }
842
843
844 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
845         release_firmware(adev->gfx.pfp_fw);
846         adev->gfx.pfp_fw = NULL;
847         release_firmware(adev->gfx.me_fw);
848         adev->gfx.me_fw = NULL;
849         release_firmware(adev->gfx.ce_fw);
850         adev->gfx.ce_fw = NULL;
851         release_firmware(adev->gfx.rlc_fw);
852         adev->gfx.rlc_fw = NULL;
853         release_firmware(adev->gfx.mec_fw);
854         adev->gfx.mec_fw = NULL;
855         if ((adev->asic_type != CHIP_STONEY) &&
856             (adev->asic_type != CHIP_TOPAZ))
857                 release_firmware(adev->gfx.mec2_fw);
858         adev->gfx.mec2_fw = NULL;
859
860         kfree(adev->gfx.rlc.register_list_format);
861 }
862
863 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
864 {
865         const char *chip_name;
866         char fw_name[30];
867         int err;
868         struct amdgpu_firmware_info *info = NULL;
869         const struct common_firmware_header *header = NULL;
870         const struct gfx_firmware_header_v1_0 *cp_hdr;
871         const struct rlc_firmware_header_v2_0 *rlc_hdr;
872         unsigned int *tmp = NULL, i;
873
874         DRM_DEBUG("\n");
875
876         switch (adev->asic_type) {
877         case CHIP_TOPAZ:
878                 chip_name = "topaz";
879                 break;
880         case CHIP_TONGA:
881                 chip_name = "tonga";
882                 break;
883         case CHIP_CARRIZO:
884                 chip_name = "carrizo";
885                 break;
886         case CHIP_FIJI:
887                 chip_name = "fiji";
888                 break;
889         case CHIP_POLARIS11:
890                 chip_name = "polaris11";
891                 break;
892         case CHIP_POLARIS10:
893                 chip_name = "polaris10";
894                 break;
895         case CHIP_STONEY:
896                 chip_name = "stoney";
897                 break;
898         default:
899                 BUG();
900         }
901
902         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
903         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
904         if (err)
905                 goto out;
906         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
907         if (err)
908                 goto out;
909         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
910         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
911         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
912
913         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
914         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
915         if (err)
916                 goto out;
917         err = amdgpu_ucode_validate(adev->gfx.me_fw);
918         if (err)
919                 goto out;
920         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
921         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
922         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
923
924         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
925         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
926         if (err)
927                 goto out;
928         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
929         if (err)
930                 goto out;
931         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
932         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934
935         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
936         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
937         if (err)
938                 goto out;
939         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
940         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
941         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
942         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
943
944         adev->gfx.rlc.save_and_restore_offset =
945                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
946         adev->gfx.rlc.clear_state_descriptor_offset =
947                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
948         adev->gfx.rlc.avail_scratch_ram_locations =
949                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
950         adev->gfx.rlc.reg_restore_list_size =
951                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
952         adev->gfx.rlc.reg_list_format_start =
953                         le32_to_cpu(rlc_hdr->reg_list_format_start);
954         adev->gfx.rlc.reg_list_format_separate_start =
955                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
956         adev->gfx.rlc.starting_offsets_start =
957                         le32_to_cpu(rlc_hdr->starting_offsets_start);
958         adev->gfx.rlc.reg_list_format_size_bytes =
959                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
960         adev->gfx.rlc.reg_list_size_bytes =
961                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
962
963         adev->gfx.rlc.register_list_format =
964                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
965                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
966
967         if (!adev->gfx.rlc.register_list_format) {
968                 err = -ENOMEM;
969                 goto out;
970         }
971
972         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
973                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
974         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
975                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
976
977         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
978
979         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
980                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
981         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
982                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
983
984         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
985         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
986         if (err)
987                 goto out;
988         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
989         if (err)
990                 goto out;
991         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
992         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
993         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
994
995         if ((adev->asic_type != CHIP_STONEY) &&
996             (adev->asic_type != CHIP_TOPAZ)) {
997                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
998                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
999                 if (!err) {
1000                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1001                         if (err)
1002                                 goto out;
1003                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1004                                 adev->gfx.mec2_fw->data;
1005                         adev->gfx.mec2_fw_version =
1006                                 le32_to_cpu(cp_hdr->header.ucode_version);
1007                         adev->gfx.mec2_feature_version =
1008                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1009                 } else {
1010                         err = 0;
1011                         adev->gfx.mec2_fw = NULL;
1012                 }
1013         }
1014
1015         if (adev->firmware.smu_load) {
1016                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1017                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1018                 info->fw = adev->gfx.pfp_fw;
1019                 header = (const struct common_firmware_header *)info->fw->data;
1020                 adev->firmware.fw_size +=
1021                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1022
1023                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1024                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1025                 info->fw = adev->gfx.me_fw;
1026                 header = (const struct common_firmware_header *)info->fw->data;
1027                 adev->firmware.fw_size +=
1028                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029
1030                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1031                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1032                 info->fw = adev->gfx.ce_fw;
1033                 header = (const struct common_firmware_header *)info->fw->data;
1034                 adev->firmware.fw_size +=
1035                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1036
1037                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1038                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1039                 info->fw = adev->gfx.rlc_fw;
1040                 header = (const struct common_firmware_header *)info->fw->data;
1041                 adev->firmware.fw_size +=
1042                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1043
1044                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1045                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1046                 info->fw = adev->gfx.mec_fw;
1047                 header = (const struct common_firmware_header *)info->fw->data;
1048                 adev->firmware.fw_size +=
1049                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051                 if (adev->gfx.mec2_fw) {
1052                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1053                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1054                         info->fw = adev->gfx.mec2_fw;
1055                         header = (const struct common_firmware_header *)info->fw->data;
1056                         adev->firmware.fw_size +=
1057                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1058                 }
1059
1060         }
1061
1062 out:
1063         if (err) {
1064                 dev_err(adev->dev,
1065                         "gfx8: Failed to load firmware \"%s\"\n",
1066                         fw_name);
1067                 release_firmware(adev->gfx.pfp_fw);
1068                 adev->gfx.pfp_fw = NULL;
1069                 release_firmware(adev->gfx.me_fw);
1070                 adev->gfx.me_fw = NULL;
1071                 release_firmware(adev->gfx.ce_fw);
1072                 adev->gfx.ce_fw = NULL;
1073                 release_firmware(adev->gfx.rlc_fw);
1074                 adev->gfx.rlc_fw = NULL;
1075                 release_firmware(adev->gfx.mec_fw);
1076                 adev->gfx.mec_fw = NULL;
1077                 release_firmware(adev->gfx.mec2_fw);
1078                 adev->gfx.mec2_fw = NULL;
1079         }
1080         return err;
1081 }
1082
1083 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1084                                     volatile u32 *buffer)
1085 {
1086         u32 count = 0, i;
1087         const struct cs_section_def *sect = NULL;
1088         const struct cs_extent_def *ext = NULL;
1089
1090         if (adev->gfx.rlc.cs_data == NULL)
1091                 return;
1092         if (buffer == NULL)
1093                 return;
1094
1095         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1096         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1097
1098         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1099         buffer[count++] = cpu_to_le32(0x80000000);
1100         buffer[count++] = cpu_to_le32(0x80000000);
1101
1102         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1103                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1104                         if (sect->id == SECT_CONTEXT) {
1105                                 buffer[count++] =
1106                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1107                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1108                                                 PACKET3_SET_CONTEXT_REG_START);
1109                                 for (i = 0; i < ext->reg_count; i++)
1110                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1111                         } else {
1112                                 return;
1113                         }
1114                 }
1115         }
1116
1117         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1118         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1119                         PACKET3_SET_CONTEXT_REG_START);
1120         switch (adev->asic_type) {
1121         case CHIP_TONGA:
1122         case CHIP_POLARIS10:
1123                 buffer[count++] = cpu_to_le32(0x16000012);
1124                 buffer[count++] = cpu_to_le32(0x0000002A);
1125                 break;
1126         case CHIP_POLARIS11:
1127                 buffer[count++] = cpu_to_le32(0x16000012);
1128                 buffer[count++] = cpu_to_le32(0x00000000);
1129                 break;
1130         case CHIP_FIJI:
1131                 buffer[count++] = cpu_to_le32(0x3a00161a);
1132                 buffer[count++] = cpu_to_le32(0x0000002e);
1133                 break;
1134         case CHIP_TOPAZ:
1135         case CHIP_CARRIZO:
1136                 buffer[count++] = cpu_to_le32(0x00000002);
1137                 buffer[count++] = cpu_to_le32(0x00000000);
1138                 break;
1139         case CHIP_STONEY:
1140                 buffer[count++] = cpu_to_le32(0x00000000);
1141                 buffer[count++] = cpu_to_le32(0x00000000);
1142                 break;
1143         default:
1144                 buffer[count++] = cpu_to_le32(0x00000000);
1145                 buffer[count++] = cpu_to_le32(0x00000000);
1146                 break;
1147         }
1148
1149         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1150         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1151
1152         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1153         buffer[count++] = cpu_to_le32(0);
1154 }
1155
1156 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1157 {
1158         int r;
1159
1160         /* clear state block */
1161         if (adev->gfx.rlc.clear_state_obj) {
1162                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1163                 if (unlikely(r != 0))
1164                         dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1165                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1166                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1167
1168                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1169                 adev->gfx.rlc.clear_state_obj = NULL;
1170         }
1171 }
1172
1173 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1174 {
1175         volatile u32 *dst_ptr;
1176         u32 dws;
1177         const struct cs_section_def *cs_data;
1178         int r;
1179
1180         adev->gfx.rlc.cs_data = vi_cs_data;
1181
1182         cs_data = adev->gfx.rlc.cs_data;
1183
1184         if (cs_data) {
1185                 /* clear state block */
1186                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1187
1188                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1189                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1190                                              AMDGPU_GEM_DOMAIN_VRAM,
1191                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1192                                              NULL, NULL,
1193                                              &adev->gfx.rlc.clear_state_obj);
1194                         if (r) {
1195                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1196                                 gfx_v8_0_rlc_fini(adev);
1197                                 return r;
1198                         }
1199                 }
1200                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1201                 if (unlikely(r != 0)) {
1202                         gfx_v8_0_rlc_fini(adev);
1203                         return r;
1204                 }
1205                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1206                                   &adev->gfx.rlc.clear_state_gpu_addr);
1207                 if (r) {
1208                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1209                         dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1210                         gfx_v8_0_rlc_fini(adev);
1211                         return r;
1212                 }
1213
1214                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1215                 if (r) {
1216                         dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1217                         gfx_v8_0_rlc_fini(adev);
1218                         return r;
1219                 }
1220                 /* set up the cs buffer */
1221                 dst_ptr = adev->gfx.rlc.cs_ptr;
1222                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1223                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1224                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1225         }
1226
1227         return 0;
1228 }
1229
1230 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1231 {
1232         int r;
1233
1234         if (adev->gfx.mec.hpd_eop_obj) {
1235                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1236                 if (unlikely(r != 0))
1237                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1238                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1239                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1240
1241                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1242                 adev->gfx.mec.hpd_eop_obj = NULL;
1243         }
1244 }
1245
1246 #define MEC_HPD_SIZE 2048
1247
1248 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1249 {
1250         int r;
1251         u32 *hpd;
1252
1253         /*
1254          * we assign only 1 pipe because all other pipes will
1255          * be handled by KFD
1256          */
1257         adev->gfx.mec.num_mec = 1;
1258         adev->gfx.mec.num_pipe = 1;
1259         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1260
1261         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1262                 r = amdgpu_bo_create(adev,
1263                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1264                                      PAGE_SIZE, true,
1265                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1266                                      &adev->gfx.mec.hpd_eop_obj);
1267                 if (r) {
1268                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1269                         return r;
1270                 }
1271         }
1272
1273         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1274         if (unlikely(r != 0)) {
1275                 gfx_v8_0_mec_fini(adev);
1276                 return r;
1277         }
1278         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1279                           &adev->gfx.mec.hpd_eop_gpu_addr);
1280         if (r) {
1281                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1282                 gfx_v8_0_mec_fini(adev);
1283                 return r;
1284         }
1285         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1286         if (r) {
1287                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1288                 gfx_v8_0_mec_fini(adev);
1289                 return r;
1290         }
1291
1292         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1293
1294         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1295         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1296
1297         return 0;
1298 }
1299
1300 static const u32 vgpr_init_compute_shader[] =
1301 {
1302         0x7e000209, 0x7e020208,
1303         0x7e040207, 0x7e060206,
1304         0x7e080205, 0x7e0a0204,
1305         0x7e0c0203, 0x7e0e0202,
1306         0x7e100201, 0x7e120200,
1307         0x7e140209, 0x7e160208,
1308         0x7e180207, 0x7e1a0206,
1309         0x7e1c0205, 0x7e1e0204,
1310         0x7e200203, 0x7e220202,
1311         0x7e240201, 0x7e260200,
1312         0x7e280209, 0x7e2a0208,
1313         0x7e2c0207, 0x7e2e0206,
1314         0x7e300205, 0x7e320204,
1315         0x7e340203, 0x7e360202,
1316         0x7e380201, 0x7e3a0200,
1317         0x7e3c0209, 0x7e3e0208,
1318         0x7e400207, 0x7e420206,
1319         0x7e440205, 0x7e460204,
1320         0x7e480203, 0x7e4a0202,
1321         0x7e4c0201, 0x7e4e0200,
1322         0x7e500209, 0x7e520208,
1323         0x7e540207, 0x7e560206,
1324         0x7e580205, 0x7e5a0204,
1325         0x7e5c0203, 0x7e5e0202,
1326         0x7e600201, 0x7e620200,
1327         0x7e640209, 0x7e660208,
1328         0x7e680207, 0x7e6a0206,
1329         0x7e6c0205, 0x7e6e0204,
1330         0x7e700203, 0x7e720202,
1331         0x7e740201, 0x7e760200,
1332         0x7e780209, 0x7e7a0208,
1333         0x7e7c0207, 0x7e7e0206,
1334         0xbf8a0000, 0xbf810000,
1335 };
1336
1337 static const u32 sgpr_init_compute_shader[] =
1338 {
1339         0xbe8a0100, 0xbe8c0102,
1340         0xbe8e0104, 0xbe900106,
1341         0xbe920108, 0xbe940100,
1342         0xbe960102, 0xbe980104,
1343         0xbe9a0106, 0xbe9c0108,
1344         0xbe9e0100, 0xbea00102,
1345         0xbea20104, 0xbea40106,
1346         0xbea60108, 0xbea80100,
1347         0xbeaa0102, 0xbeac0104,
1348         0xbeae0106, 0xbeb00108,
1349         0xbeb20100, 0xbeb40102,
1350         0xbeb60104, 0xbeb80106,
1351         0xbeba0108, 0xbebc0100,
1352         0xbebe0102, 0xbec00104,
1353         0xbec20106, 0xbec40108,
1354         0xbec60100, 0xbec80102,
1355         0xbee60004, 0xbee70005,
1356         0xbeea0006, 0xbeeb0007,
1357         0xbee80008, 0xbee90009,
1358         0xbefc0000, 0xbf8a0000,
1359         0xbf810000, 0x00000000,
1360 };
1361
1362 static const u32 vgpr_init_regs[] =
1363 {
1364         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1365         mmCOMPUTE_RESOURCE_LIMITS, 0,
1366         mmCOMPUTE_NUM_THREAD_X, 256*4,
1367         mmCOMPUTE_NUM_THREAD_Y, 1,
1368         mmCOMPUTE_NUM_THREAD_Z, 1,
1369         mmCOMPUTE_PGM_RSRC2, 20,
1370         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1371         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1372         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1373         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1374         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1375         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1376         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1377         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1378         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1379         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1380 };
1381
1382 static const u32 sgpr1_init_regs[] =
1383 {
1384         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1385         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1386         mmCOMPUTE_NUM_THREAD_X, 256*5,
1387         mmCOMPUTE_NUM_THREAD_Y, 1,
1388         mmCOMPUTE_NUM_THREAD_Z, 1,
1389         mmCOMPUTE_PGM_RSRC2, 20,
1390         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1391         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1392         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1393         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1394         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1395         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1396         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1397         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1398         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1399         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1400 };
1401
1402 static const u32 sgpr2_init_regs[] =
1403 {
1404         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1405         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1406         mmCOMPUTE_NUM_THREAD_X, 256*5,
1407         mmCOMPUTE_NUM_THREAD_Y, 1,
1408         mmCOMPUTE_NUM_THREAD_Z, 1,
1409         mmCOMPUTE_PGM_RSRC2, 20,
1410         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1411         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1412         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1413         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1414         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1415         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1416         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1417         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1418         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1419         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1420 };
1421
1422 static const u32 sec_ded_counter_registers[] =
1423 {
1424         mmCPC_EDC_ATC_CNT,
1425         mmCPC_EDC_SCRATCH_CNT,
1426         mmCPC_EDC_UCODE_CNT,
1427         mmCPF_EDC_ATC_CNT,
1428         mmCPF_EDC_ROQ_CNT,
1429         mmCPF_EDC_TAG_CNT,
1430         mmCPG_EDC_ATC_CNT,
1431         mmCPG_EDC_DMA_CNT,
1432         mmCPG_EDC_TAG_CNT,
1433         mmDC_EDC_CSINVOC_CNT,
1434         mmDC_EDC_RESTORE_CNT,
1435         mmDC_EDC_STATE_CNT,
1436         mmGDS_EDC_CNT,
1437         mmGDS_EDC_GRBM_CNT,
1438         mmGDS_EDC_OA_DED,
1439         mmSPI_EDC_CNT,
1440         mmSQC_ATC_EDC_GATCL1_CNT,
1441         mmSQC_EDC_CNT,
1442         mmSQ_EDC_DED_CNT,
1443         mmSQ_EDC_INFO,
1444         mmSQ_EDC_SEC_CNT,
1445         mmTCC_EDC_CNT,
1446         mmTCP_ATC_EDC_GATCL1_CNT,
1447         mmTCP_EDC_CNT,
1448         mmTD_EDC_CNT
1449 };
1450
1451 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1452 {
1453         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1454         struct amdgpu_ib ib;
1455         struct fence *f = NULL;
1456         int r, i;
1457         u32 tmp;
1458         unsigned total_size, vgpr_offset, sgpr_offset;
1459         u64 gpu_addr;
1460
1461         /* only supported on CZ */
1462         if (adev->asic_type != CHIP_CARRIZO)
1463                 return 0;
1464
1465         /* bail if the compute ring is not ready */
1466         if (!ring->ready)
1467                 return 0;
1468
1469         tmp = RREG32(mmGB_EDC_MODE);
1470         WREG32(mmGB_EDC_MODE, 0);
1471
1472         total_size =
1473                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1474         total_size +=
1475                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1476         total_size +=
1477                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1478         total_size = ALIGN(total_size, 256);
1479         vgpr_offset = total_size;
1480         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1481         sgpr_offset = total_size;
1482         total_size += sizeof(sgpr_init_compute_shader);
1483
1484         /* allocate an indirect buffer to put the commands in */
1485         memset(&ib, 0, sizeof(ib));
1486         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1487         if (r) {
1488                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1489                 return r;
1490         }
1491
1492         /* load the compute shaders */
1493         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1494                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1495
1496         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1497                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1498
1499         /* init the ib length to 0 */
1500         ib.length_dw = 0;
1501
1502         /* VGPR */
1503         /* write the register state for the compute dispatch */
1504         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1505                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1506                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1507                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1508         }
1509         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1510         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1511         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1512         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1513         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1514         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1515
1516         /* write dispatch packet */
1517         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1518         ib.ptr[ib.length_dw++] = 8; /* x */
1519         ib.ptr[ib.length_dw++] = 1; /* y */
1520         ib.ptr[ib.length_dw++] = 1; /* z */
1521         ib.ptr[ib.length_dw++] =
1522                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1523
1524         /* write CS partial flush packet */
1525         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1526         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1527
1528         /* SGPR1 */
1529         /* write the register state for the compute dispatch */
1530         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1531                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1532                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1533                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1534         }
1535         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1536         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1537         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1538         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1539         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1540         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1541
1542         /* write dispatch packet */
1543         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1544         ib.ptr[ib.length_dw++] = 8; /* x */
1545         ib.ptr[ib.length_dw++] = 1; /* y */
1546         ib.ptr[ib.length_dw++] = 1; /* z */
1547         ib.ptr[ib.length_dw++] =
1548                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1549
1550         /* write CS partial flush packet */
1551         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1552         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1553
1554         /* SGPR2 */
1555         /* write the register state for the compute dispatch */
1556         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1557                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1558                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1559                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1560         }
1561         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1562         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1563         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1564         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1565         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1566         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1567
1568         /* write dispatch packet */
1569         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1570         ib.ptr[ib.length_dw++] = 8; /* x */
1571         ib.ptr[ib.length_dw++] = 1; /* y */
1572         ib.ptr[ib.length_dw++] = 1; /* z */
1573         ib.ptr[ib.length_dw++] =
1574                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1575
1576         /* write CS partial flush packet */
1577         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1578         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1579
1580         /* shedule the ib on the ring */
1581         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1582         if (r) {
1583                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1584                 goto fail;
1585         }
1586
1587         /* wait for the GPU to finish processing the IB */
1588         r = fence_wait(f, false);
1589         if (r) {
1590                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1591                 goto fail;
1592         }
1593
1594         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1595         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1596         WREG32(mmGB_EDC_MODE, tmp);
1597
1598         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1599         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1600         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1601
1602
1603         /* read back registers to clear the counters */
1604         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1605                 RREG32(sec_ded_counter_registers[i]);
1606
1607 fail:
1608         fence_put(f);
1609         amdgpu_ib_free(adev, &ib, NULL);
1610         fence_put(f);
1611
1612         return r;
1613 }
1614
1615 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1616 {
1617         u32 gb_addr_config;
1618         u32 mc_shared_chmap, mc_arb_ramcfg;
1619         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1620         u32 tmp;
1621         int ret;
1622
1623         switch (adev->asic_type) {
1624         case CHIP_TOPAZ:
1625                 adev->gfx.config.max_shader_engines = 1;
1626                 adev->gfx.config.max_tile_pipes = 2;
1627                 adev->gfx.config.max_cu_per_sh = 6;
1628                 adev->gfx.config.max_sh_per_se = 1;
1629                 adev->gfx.config.max_backends_per_se = 2;
1630                 adev->gfx.config.max_texture_channel_caches = 2;
1631                 adev->gfx.config.max_gprs = 256;
1632                 adev->gfx.config.max_gs_threads = 32;
1633                 adev->gfx.config.max_hw_contexts = 8;
1634
1635                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1636                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1637                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1638                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1639                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1640                 break;
1641         case CHIP_FIJI:
1642                 adev->gfx.config.max_shader_engines = 4;
1643                 adev->gfx.config.max_tile_pipes = 16;
1644                 adev->gfx.config.max_cu_per_sh = 16;
1645                 adev->gfx.config.max_sh_per_se = 1;
1646                 adev->gfx.config.max_backends_per_se = 4;
1647                 adev->gfx.config.max_texture_channel_caches = 16;
1648                 adev->gfx.config.max_gprs = 256;
1649                 adev->gfx.config.max_gs_threads = 32;
1650                 adev->gfx.config.max_hw_contexts = 8;
1651
1652                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1653                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1654                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1655                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1656                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1657                 break;
1658         case CHIP_POLARIS11:
1659                 ret = amdgpu_atombios_get_gfx_info(adev);
1660                 if (ret)
1661                         return ret;
1662                 adev->gfx.config.max_gprs = 256;
1663                 adev->gfx.config.max_gs_threads = 32;
1664                 adev->gfx.config.max_hw_contexts = 8;
1665
1666                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1667                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1668                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1669                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1670                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1671                 break;
1672         case CHIP_POLARIS10:
1673                 ret = amdgpu_atombios_get_gfx_info(adev);
1674                 if (ret)
1675                         return ret;
1676                 adev->gfx.config.max_gprs = 256;
1677                 adev->gfx.config.max_gs_threads = 32;
1678                 adev->gfx.config.max_hw_contexts = 8;
1679
1680                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1681                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1682                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1683                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1684                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1685                 break;
1686         case CHIP_TONGA:
1687                 adev->gfx.config.max_shader_engines = 4;
1688                 adev->gfx.config.max_tile_pipes = 8;
1689                 adev->gfx.config.max_cu_per_sh = 8;
1690                 adev->gfx.config.max_sh_per_se = 1;
1691                 adev->gfx.config.max_backends_per_se = 2;
1692                 adev->gfx.config.max_texture_channel_caches = 8;
1693                 adev->gfx.config.max_gprs = 256;
1694                 adev->gfx.config.max_gs_threads = 32;
1695                 adev->gfx.config.max_hw_contexts = 8;
1696
1697                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1698                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1699                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1700                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1701                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1702                 break;
1703         case CHIP_CARRIZO:
1704                 adev->gfx.config.max_shader_engines = 1;
1705                 adev->gfx.config.max_tile_pipes = 2;
1706                 adev->gfx.config.max_sh_per_se = 1;
1707                 adev->gfx.config.max_backends_per_se = 2;
1708
1709                 switch (adev->pdev->revision) {
1710                 case 0xc4:
1711                 case 0x84:
1712                 case 0xc8:
1713                 case 0xcc:
1714                 case 0xe1:
1715                 case 0xe3:
1716                         /* B10 */
1717                         adev->gfx.config.max_cu_per_sh = 8;
1718                         break;
1719                 case 0xc5:
1720                 case 0x81:
1721                 case 0x85:
1722                 case 0xc9:
1723                 case 0xcd:
1724                 case 0xe2:
1725                 case 0xe4:
1726                         /* B8 */
1727                         adev->gfx.config.max_cu_per_sh = 6;
1728                         break;
1729                 case 0xc6:
1730                 case 0xca:
1731                 case 0xce:
1732                 case 0x88:
1733                         /* B6 */
1734                         adev->gfx.config.max_cu_per_sh = 6;
1735                         break;
1736                 case 0xc7:
1737                 case 0x87:
1738                 case 0xcb:
1739                 case 0xe5:
1740                 case 0x89:
1741                 default:
1742                         /* B4 */
1743                         adev->gfx.config.max_cu_per_sh = 4;
1744                         break;
1745                 }
1746
1747                 adev->gfx.config.max_texture_channel_caches = 2;
1748                 adev->gfx.config.max_gprs = 256;
1749                 adev->gfx.config.max_gs_threads = 32;
1750                 adev->gfx.config.max_hw_contexts = 8;
1751
1752                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1753                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1754                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1755                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1756                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1757                 break;
1758         case CHIP_STONEY:
1759                 adev->gfx.config.max_shader_engines = 1;
1760                 adev->gfx.config.max_tile_pipes = 2;
1761                 adev->gfx.config.max_sh_per_se = 1;
1762                 adev->gfx.config.max_backends_per_se = 1;
1763
1764                 switch (adev->pdev->revision) {
1765                 case 0xc0:
1766                 case 0xc1:
1767                 case 0xc2:
1768                 case 0xc4:
1769                 case 0xc8:
1770                 case 0xc9:
1771                         adev->gfx.config.max_cu_per_sh = 3;
1772                         break;
1773                 case 0xd0:
1774                 case 0xd1:
1775                 case 0xd2:
1776                 default:
1777                         adev->gfx.config.max_cu_per_sh = 2;
1778                         break;
1779                 }
1780
1781                 adev->gfx.config.max_texture_channel_caches = 2;
1782                 adev->gfx.config.max_gprs = 256;
1783                 adev->gfx.config.max_gs_threads = 16;
1784                 adev->gfx.config.max_hw_contexts = 8;
1785
1786                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1787                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1788                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1789                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1790                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1791                 break;
1792         default:
1793                 adev->gfx.config.max_shader_engines = 2;
1794                 adev->gfx.config.max_tile_pipes = 4;
1795                 adev->gfx.config.max_cu_per_sh = 2;
1796                 adev->gfx.config.max_sh_per_se = 1;
1797                 adev->gfx.config.max_backends_per_se = 2;
1798                 adev->gfx.config.max_texture_channel_caches = 4;
1799                 adev->gfx.config.max_gprs = 256;
1800                 adev->gfx.config.max_gs_threads = 32;
1801                 adev->gfx.config.max_hw_contexts = 8;
1802
1803                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1808                 break;
1809         }
1810
1811         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1812         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1813         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1814
1815         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1816         adev->gfx.config.mem_max_burst_length_bytes = 256;
1817         if (adev->flags & AMD_IS_APU) {
1818                 /* Get memory bank mapping mode. */
1819                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1820                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1821                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1822
1823                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1824                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1825                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1826
1827                 /* Validate settings in case only one DIMM installed. */
1828                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1829                         dimm00_addr_map = 0;
1830                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1831                         dimm01_addr_map = 0;
1832                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1833                         dimm10_addr_map = 0;
1834                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1835                         dimm11_addr_map = 0;
1836
1837                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1838                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1839                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1840                         adev->gfx.config.mem_row_size_in_kb = 2;
1841                 else
1842                         adev->gfx.config.mem_row_size_in_kb = 1;
1843         } else {
1844                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1845                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1846                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1847                         adev->gfx.config.mem_row_size_in_kb = 4;
1848         }
1849
1850         adev->gfx.config.shader_engine_tile_size = 32;
1851         adev->gfx.config.num_gpus = 1;
1852         adev->gfx.config.multi_gpu_tile_size = 64;
1853
1854         /* fix up row size */
1855         switch (adev->gfx.config.mem_row_size_in_kb) {
1856         case 1:
1857         default:
1858                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1859                 break;
1860         case 2:
1861                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1862                 break;
1863         case 4:
1864                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1865                 break;
1866         }
1867         adev->gfx.config.gb_addr_config = gb_addr_config;
1868
1869         return 0;
1870 }
1871
1872 static int gfx_v8_0_sw_init(void *handle)
1873 {
1874         int i, r;
1875         struct amdgpu_ring *ring;
1876         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1877
1878         /* EOP Event */
1879         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1880         if (r)
1881                 return r;
1882
1883         /* Privileged reg */
1884         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1885         if (r)
1886                 return r;
1887
1888         /* Privileged inst */
1889         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1890         if (r)
1891                 return r;
1892
1893         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1894
1895         gfx_v8_0_scratch_init(adev);
1896
1897         r = gfx_v8_0_init_microcode(adev);
1898         if (r) {
1899                 DRM_ERROR("Failed to load gfx firmware!\n");
1900                 return r;
1901         }
1902
1903         r = gfx_v8_0_rlc_init(adev);
1904         if (r) {
1905                 DRM_ERROR("Failed to init rlc BOs!\n");
1906                 return r;
1907         }
1908
1909         r = gfx_v8_0_mec_init(adev);
1910         if (r) {
1911                 DRM_ERROR("Failed to init MEC BOs!\n");
1912                 return r;
1913         }
1914
1915         /* set up the gfx ring */
1916         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1917                 ring = &adev->gfx.gfx_ring[i];
1918                 ring->ring_obj = NULL;
1919                 sprintf(ring->name, "gfx");
1920                 /* no gfx doorbells on iceland */
1921                 if (adev->asic_type != CHIP_TOPAZ) {
1922                         ring->use_doorbell = true;
1923                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1924                 }
1925
1926                 r = amdgpu_ring_init(adev, ring, 1024,
1927                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1928                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1929                                      AMDGPU_RING_TYPE_GFX);
1930                 if (r)
1931                         return r;
1932         }
1933
1934         /* set up the compute queues */
1935         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1936                 unsigned irq_type;
1937
1938                 /* max 32 queues per MEC */
1939                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1940                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1941                         break;
1942                 }
1943                 ring = &adev->gfx.compute_ring[i];
1944                 ring->ring_obj = NULL;
1945                 ring->use_doorbell = true;
1946                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1947                 ring->me = 1; /* first MEC */
1948                 ring->pipe = i / 8;
1949                 ring->queue = i % 8;
1950                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1951                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1952                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1953                 r = amdgpu_ring_init(adev, ring, 1024,
1954                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1955                                      &adev->gfx.eop_irq, irq_type,
1956                                      AMDGPU_RING_TYPE_COMPUTE);
1957                 if (r)
1958                         return r;
1959         }
1960
1961         /* reserve GDS, GWS and OA resource for gfx */
1962         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1963                         PAGE_SIZE, true,
1964                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1965                         NULL, &adev->gds.gds_gfx_bo);
1966         if (r)
1967                 return r;
1968
1969         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1970                 PAGE_SIZE, true,
1971                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1972                 NULL, &adev->gds.gws_gfx_bo);
1973         if (r)
1974                 return r;
1975
1976         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1977                         PAGE_SIZE, true,
1978                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1979                         NULL, &adev->gds.oa_gfx_bo);
1980         if (r)
1981                 return r;
1982
1983         adev->gfx.ce_ram_size = 0x8000;
1984
1985         r = gfx_v8_0_gpu_early_init(adev);
1986         if (r)
1987                 return r;
1988
1989         return 0;
1990 }
1991
1992 static int gfx_v8_0_sw_fini(void *handle)
1993 {
1994         int i;
1995         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1996
1997         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1998         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1999         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2000
2001         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2002                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2003         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2004                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2005
2006         gfx_v8_0_mec_fini(adev);
2007
2008         gfx_v8_0_rlc_fini(adev);
2009
2010         gfx_v8_0_free_microcode(adev);
2011
2012         return 0;
2013 }
2014
2015 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2016 {
2017         uint32_t *modearray, *mod2array;
2018         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2019         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2020         u32 reg_offset;
2021
2022         modearray = adev->gfx.config.tile_mode_array;
2023         mod2array = adev->gfx.config.macrotile_mode_array;
2024
2025         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2026                 modearray[reg_offset] = 0;
2027
2028         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2029                 mod2array[reg_offset] = 0;
2030
2031         switch (adev->asic_type) {
2032         case CHIP_TOPAZ:
2033                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034                                 PIPE_CONFIG(ADDR_SURF_P2) |
2035                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2036                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2037                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2038                                 PIPE_CONFIG(ADDR_SURF_P2) |
2039                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2040                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2041                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042                                 PIPE_CONFIG(ADDR_SURF_P2) |
2043                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2044                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2045                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046                                 PIPE_CONFIG(ADDR_SURF_P2) |
2047                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2048                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2049                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2050                                 PIPE_CONFIG(ADDR_SURF_P2) |
2051                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2052                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2053                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2054                                 PIPE_CONFIG(ADDR_SURF_P2) |
2055                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2056                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2057                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2058                                 PIPE_CONFIG(ADDR_SURF_P2) |
2059                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2060                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2061                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2062                                 PIPE_CONFIG(ADDR_SURF_P2));
2063                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2064                                 PIPE_CONFIG(ADDR_SURF_P2) |
2065                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2066                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068                                  PIPE_CONFIG(ADDR_SURF_P2) |
2069                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2070                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2072                                  PIPE_CONFIG(ADDR_SURF_P2) |
2073                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2074                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2075                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2076                                  PIPE_CONFIG(ADDR_SURF_P2) |
2077                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2080                                  PIPE_CONFIG(ADDR_SURF_P2) |
2081                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2082                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2084                                  PIPE_CONFIG(ADDR_SURF_P2) |
2085                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2086                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2088                                  PIPE_CONFIG(ADDR_SURF_P2) |
2089                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2091                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2092                                  PIPE_CONFIG(ADDR_SURF_P2) |
2093                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2094                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2095                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2096                                  PIPE_CONFIG(ADDR_SURF_P2) |
2097                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2098                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2099                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2100                                  PIPE_CONFIG(ADDR_SURF_P2) |
2101                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2102                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2103                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2104                                  PIPE_CONFIG(ADDR_SURF_P2) |
2105                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2106                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2107                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2108                                  PIPE_CONFIG(ADDR_SURF_P2) |
2109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2111                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2112                                  PIPE_CONFIG(ADDR_SURF_P2) |
2113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2115                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2116                                  PIPE_CONFIG(ADDR_SURF_P2) |
2117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2119                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2120                                  PIPE_CONFIG(ADDR_SURF_P2) |
2121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2123                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2124                                  PIPE_CONFIG(ADDR_SURF_P2) |
2125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2127                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2128                                  PIPE_CONFIG(ADDR_SURF_P2) |
2129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2131                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2132                                  PIPE_CONFIG(ADDR_SURF_P2) |
2133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2135
2136                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2137                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2138                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139                                 NUM_BANKS(ADDR_SURF_8_BANK));
2140                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2141                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2143                                 NUM_BANKS(ADDR_SURF_8_BANK));
2144                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2145                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2147                                 NUM_BANKS(ADDR_SURF_8_BANK));
2148                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2149                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2150                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2151                                 NUM_BANKS(ADDR_SURF_8_BANK));
2152                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2154                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2155                                 NUM_BANKS(ADDR_SURF_8_BANK));
2156                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2158                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2159                                 NUM_BANKS(ADDR_SURF_8_BANK));
2160                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2162                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2163                                 NUM_BANKS(ADDR_SURF_8_BANK));
2164                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2165                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2166                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2167                                 NUM_BANKS(ADDR_SURF_16_BANK));
2168                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2169                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2171                                 NUM_BANKS(ADDR_SURF_16_BANK));
2172                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2173                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2175                                  NUM_BANKS(ADDR_SURF_16_BANK));
2176                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2177                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2178                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2179                                  NUM_BANKS(ADDR_SURF_16_BANK));
2180                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2182                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2183                                  NUM_BANKS(ADDR_SURF_16_BANK));
2184                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2187                                  NUM_BANKS(ADDR_SURF_16_BANK));
2188                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2190                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2191                                  NUM_BANKS(ADDR_SURF_8_BANK));
2192
2193                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2194                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2195                             reg_offset != 23)
2196                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2197
2198                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2199                         if (reg_offset != 7)
2200                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2201
2202                 break;
2203         case CHIP_FIJI:
2204                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2206                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2207                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2208                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2209                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2210                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2211                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2212                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2214                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2215                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2216                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2218                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2219                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2220                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2222                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2223                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2224                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2225                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2226                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2227                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2229                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2230                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2234                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2235                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2237                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2238                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2239                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2241                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2254                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2263                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2271                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2274                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2275                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2278                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2279                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2282                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2283                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2287                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2291                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2295                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2299                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2303                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2307                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2322                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2326
2327                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2330                                 NUM_BANKS(ADDR_SURF_8_BANK));
2331                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2334                                 NUM_BANKS(ADDR_SURF_8_BANK));
2335                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338                                 NUM_BANKS(ADDR_SURF_8_BANK));
2339                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342                                 NUM_BANKS(ADDR_SURF_8_BANK));
2343                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2346                                 NUM_BANKS(ADDR_SURF_8_BANK));
2347                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2350                                 NUM_BANKS(ADDR_SURF_8_BANK));
2351                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2354                                 NUM_BANKS(ADDR_SURF_8_BANK));
2355                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                 NUM_BANKS(ADDR_SURF_8_BANK));
2359                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362                                 NUM_BANKS(ADDR_SURF_8_BANK));
2363                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2365                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2366                                  NUM_BANKS(ADDR_SURF_8_BANK));
2367                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370                                  NUM_BANKS(ADDR_SURF_8_BANK));
2371                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374                                  NUM_BANKS(ADDR_SURF_8_BANK));
2375                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2378                                  NUM_BANKS(ADDR_SURF_8_BANK));
2379                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                                  NUM_BANKS(ADDR_SURF_4_BANK));
2383
2384                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2385                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2386
2387                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2388                         if (reg_offset != 7)
2389                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2390
2391                 break;
2392         case CHIP_TONGA:
2393                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2419                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2423                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2424                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2425                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2426                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2427                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2429                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2430                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2444                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2452                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2459                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2463                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2464                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2468                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2472                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2476                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2480                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2484                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2488                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2492                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2496                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2499                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2502                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2513                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2514                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2515
2516                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519                                 NUM_BANKS(ADDR_SURF_16_BANK));
2520                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523                                 NUM_BANKS(ADDR_SURF_16_BANK));
2524                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2527                                 NUM_BANKS(ADDR_SURF_16_BANK));
2528                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2531                                 NUM_BANKS(ADDR_SURF_16_BANK));
2532                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2534                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2535                                 NUM_BANKS(ADDR_SURF_16_BANK));
2536                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539                                 NUM_BANKS(ADDR_SURF_16_BANK));
2540                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2542                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2543                                 NUM_BANKS(ADDR_SURF_16_BANK));
2544                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2546                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2547                                 NUM_BANKS(ADDR_SURF_16_BANK));
2548                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2550                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2551                                 NUM_BANKS(ADDR_SURF_16_BANK));
2552                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2554                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2555                                  NUM_BANKS(ADDR_SURF_16_BANK));
2556                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2559                                  NUM_BANKS(ADDR_SURF_16_BANK));
2560                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2563                                  NUM_BANKS(ADDR_SURF_8_BANK));
2564                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2567                                  NUM_BANKS(ADDR_SURF_4_BANK));
2568                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2570                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2571                                  NUM_BANKS(ADDR_SURF_4_BANK));
2572
2573                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2574                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2575
2576                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2577                         if (reg_offset != 7)
2578                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2579
2580                 break;
2581         case CHIP_POLARIS11:
2582                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2585                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2586                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2589                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2592                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2593                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2594                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2597                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2598                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2599                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2601                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2602                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2604                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2605                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2606                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2607                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2608                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2609                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2612                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2613                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2614                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2615                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2616                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2617                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2629                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2631                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2632                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2635                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2639                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2641                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2645                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2648                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2649                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2651                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2652                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2653                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2655                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2656                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2657                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2659                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2661                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2665                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2667                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2668                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2669                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2671                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2672                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2673                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2675                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2676                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2677                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2679                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2680                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2681                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2683                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2684                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2685                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2687                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2688                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2689                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2691                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2695                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2699                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2700                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2701                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2703                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2704
2705                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2707                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2708                                 NUM_BANKS(ADDR_SURF_16_BANK));
2709
2710                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2712                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2713                                 NUM_BANKS(ADDR_SURF_16_BANK));
2714
2715                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718                                 NUM_BANKS(ADDR_SURF_16_BANK));
2719
2720                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2723                                 NUM_BANKS(ADDR_SURF_16_BANK));
2724
2725                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2728                                 NUM_BANKS(ADDR_SURF_16_BANK));
2729
2730                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2733                                 NUM_BANKS(ADDR_SURF_16_BANK));
2734
2735                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2737                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2738                                 NUM_BANKS(ADDR_SURF_16_BANK));
2739
2740                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2741                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2742                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743                                 NUM_BANKS(ADDR_SURF_16_BANK));
2744
2745                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2746                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2747                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2748                                 NUM_BANKS(ADDR_SURF_16_BANK));
2749
2750                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753                                 NUM_BANKS(ADDR_SURF_16_BANK));
2754
2755                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758                                 NUM_BANKS(ADDR_SURF_16_BANK));
2759
2760                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2762                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2763                                 NUM_BANKS(ADDR_SURF_16_BANK));
2764
2765                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2767                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2768                                 NUM_BANKS(ADDR_SURF_8_BANK));
2769
2770                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2772                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2773                                 NUM_BANKS(ADDR_SURF_4_BANK));
2774
2775                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2776                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2777
2778                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2779                         if (reg_offset != 7)
2780                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2781
2782                 break;
2783         case CHIP_POLARIS10:
2784                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2786                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2790                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2793                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2794                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2798                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2800                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2801                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2802                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2803                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2804                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2806                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2807                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2808                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2809                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2810                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2811                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2812                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2815                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2817                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2818                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2819                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2823                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2829                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2830                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2833                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2834                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2835                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2837                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2841                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2842                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2843                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2845                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2846                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2850                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2851                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2853                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2854                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2855                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2857                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2859                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2863                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2865                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2867                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2869                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2870                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2871                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2873                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2874                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2875                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2876                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2877                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2878                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2879                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2881                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2882                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2883                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2885                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2886                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2887                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2889                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2890                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2891                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2893                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2894                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2897                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2898                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2899                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2901                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2902                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2903                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2904                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2905                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2906
2907                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2908                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2909                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2910                                 NUM_BANKS(ADDR_SURF_16_BANK));
2911
2912                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2913                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2914                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2915                                 NUM_BANKS(ADDR_SURF_16_BANK));
2916
2917                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2918                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2919                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2920                                 NUM_BANKS(ADDR_SURF_16_BANK));
2921
2922                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2923                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2924                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2925                                 NUM_BANKS(ADDR_SURF_16_BANK));
2926
2927                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2929                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2930                                 NUM_BANKS(ADDR_SURF_16_BANK));
2931
2932                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2935                                 NUM_BANKS(ADDR_SURF_16_BANK));
2936
2937                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2938                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2939                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2940                                 NUM_BANKS(ADDR_SURF_16_BANK));
2941
2942                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2944                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945                                 NUM_BANKS(ADDR_SURF_16_BANK));
2946
2947                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2948                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950                                 NUM_BANKS(ADDR_SURF_16_BANK));
2951
2952                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2953                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2954                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2955                                 NUM_BANKS(ADDR_SURF_16_BANK));
2956
2957                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2959                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2960                                 NUM_BANKS(ADDR_SURF_16_BANK));
2961
2962                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2964                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2965                                 NUM_BANKS(ADDR_SURF_8_BANK));
2966
2967                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2970                                 NUM_BANKS(ADDR_SURF_4_BANK));
2971
2972                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2975                                 NUM_BANKS(ADDR_SURF_4_BANK));
2976
2977                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2978                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2979
2980                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2981                         if (reg_offset != 7)
2982                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2983
2984                 break;
2985         case CHIP_STONEY:
2986                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2987                                 PIPE_CONFIG(ADDR_SURF_P2) |
2988                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2991                                 PIPE_CONFIG(ADDR_SURF_P2) |
2992                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2994                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2995                                 PIPE_CONFIG(ADDR_SURF_P2) |
2996                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2998                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2999                                 PIPE_CONFIG(ADDR_SURF_P2) |
3000                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3002                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3003                                 PIPE_CONFIG(ADDR_SURF_P2) |
3004                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3005                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3006                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3007                                 PIPE_CONFIG(ADDR_SURF_P2) |
3008                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3009                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3010                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3011                                 PIPE_CONFIG(ADDR_SURF_P2) |
3012                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3013                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3014                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3015                                 PIPE_CONFIG(ADDR_SURF_P2));
3016                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3017                                 PIPE_CONFIG(ADDR_SURF_P2) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3019                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3020                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3021                                  PIPE_CONFIG(ADDR_SURF_P2) |
3022                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3023                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3024                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3025                                  PIPE_CONFIG(ADDR_SURF_P2) |
3026                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3027                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3028                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3029                                  PIPE_CONFIG(ADDR_SURF_P2) |
3030                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3031                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3033                                  PIPE_CONFIG(ADDR_SURF_P2) |
3034                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3035                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3036                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3037                                  PIPE_CONFIG(ADDR_SURF_P2) |
3038                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3039                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3041                                  PIPE_CONFIG(ADDR_SURF_P2) |
3042                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3044                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3045                                  PIPE_CONFIG(ADDR_SURF_P2) |
3046                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3047                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3048                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3049                                  PIPE_CONFIG(ADDR_SURF_P2) |
3050                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3051                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3053                                  PIPE_CONFIG(ADDR_SURF_P2) |
3054                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3055                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3056                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3057                                  PIPE_CONFIG(ADDR_SURF_P2) |
3058                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3059                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3060                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3061                                  PIPE_CONFIG(ADDR_SURF_P2) |
3062                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3063                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3064                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3065                                  PIPE_CONFIG(ADDR_SURF_P2) |
3066                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3067                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3068                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3069                                  PIPE_CONFIG(ADDR_SURF_P2) |
3070                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3071                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3072                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3073                                  PIPE_CONFIG(ADDR_SURF_P2) |
3074                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3075                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3076                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3077                                  PIPE_CONFIG(ADDR_SURF_P2) |
3078                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3079                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3080                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081                                  PIPE_CONFIG(ADDR_SURF_P2) |
3082                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3083                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3084                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3085                                  PIPE_CONFIG(ADDR_SURF_P2) |
3086                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3087                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3088
3089                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3091                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3092                                 NUM_BANKS(ADDR_SURF_8_BANK));
3093                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3095                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096                                 NUM_BANKS(ADDR_SURF_8_BANK));
3097                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3099                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3100                                 NUM_BANKS(ADDR_SURF_8_BANK));
3101                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3104                                 NUM_BANKS(ADDR_SURF_8_BANK));
3105                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108                                 NUM_BANKS(ADDR_SURF_8_BANK));
3109                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3110                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3111                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3112                                 NUM_BANKS(ADDR_SURF_8_BANK));
3113                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3115                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3116                                 NUM_BANKS(ADDR_SURF_8_BANK));
3117                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3118                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3119                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120                                 NUM_BANKS(ADDR_SURF_16_BANK));
3121                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3122                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3123                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3124                                 NUM_BANKS(ADDR_SURF_16_BANK));
3125                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3126                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3127                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128                                  NUM_BANKS(ADDR_SURF_16_BANK));
3129                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3131                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132                                  NUM_BANKS(ADDR_SURF_16_BANK));
3133                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3135                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136                                  NUM_BANKS(ADDR_SURF_16_BANK));
3137                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3140                                  NUM_BANKS(ADDR_SURF_16_BANK));
3141                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3143                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3144                                  NUM_BANKS(ADDR_SURF_8_BANK));
3145
3146                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3147                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3148                             reg_offset != 23)
3149                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3150
3151                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3152                         if (reg_offset != 7)
3153                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3154
3155                 break;
3156         default:
3157                 dev_warn(adev->dev,
3158                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3159                          adev->asic_type);
3160
3161         case CHIP_CARRIZO:
3162                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163                                 PIPE_CONFIG(ADDR_SURF_P2) |
3164                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3166                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3167                                 PIPE_CONFIG(ADDR_SURF_P2) |
3168                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3169                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3170                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3171                                 PIPE_CONFIG(ADDR_SURF_P2) |
3172                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3173                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3174                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175                                 PIPE_CONFIG(ADDR_SURF_P2) |
3176                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3177                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3178                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3179                                 PIPE_CONFIG(ADDR_SURF_P2) |
3180                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3181                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3182                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3183                                 PIPE_CONFIG(ADDR_SURF_P2) |
3184                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3185                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3186                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3187                                 PIPE_CONFIG(ADDR_SURF_P2) |
3188                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3189                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3190                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3191                                 PIPE_CONFIG(ADDR_SURF_P2));
3192                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193                                 PIPE_CONFIG(ADDR_SURF_P2) |
3194                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3195                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3204                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3205                                  PIPE_CONFIG(ADDR_SURF_P2) |
3206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3208                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3209                                  PIPE_CONFIG(ADDR_SURF_P2) |
3210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3212                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3213                                  PIPE_CONFIG(ADDR_SURF_P2) |
3214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3217                                  PIPE_CONFIG(ADDR_SURF_P2) |
3218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3220                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3221                                  PIPE_CONFIG(ADDR_SURF_P2) |
3222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3224                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3225                                  PIPE_CONFIG(ADDR_SURF_P2) |
3226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3228                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3229                                  PIPE_CONFIG(ADDR_SURF_P2) |
3230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3232                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3233                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3236                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3237                                  PIPE_CONFIG(ADDR_SURF_P2) |
3238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3240                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3241                                  PIPE_CONFIG(ADDR_SURF_P2) |
3242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3244                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3245                                  PIPE_CONFIG(ADDR_SURF_P2) |
3246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3248                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3249                                  PIPE_CONFIG(ADDR_SURF_P2) |
3250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3252                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3253                                  PIPE_CONFIG(ADDR_SURF_P2) |
3254                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3255                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3257                                  PIPE_CONFIG(ADDR_SURF_P2) |
3258                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3259                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3260                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261                                  PIPE_CONFIG(ADDR_SURF_P2) |
3262                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3263                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3264
3265                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3268                                 NUM_BANKS(ADDR_SURF_8_BANK));
3269                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3271                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272                                 NUM_BANKS(ADDR_SURF_8_BANK));
3273                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3276                                 NUM_BANKS(ADDR_SURF_8_BANK));
3277                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3278                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3279                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3280                                 NUM_BANKS(ADDR_SURF_8_BANK));
3281                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284                                 NUM_BANKS(ADDR_SURF_8_BANK));
3285                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3288                                 NUM_BANKS(ADDR_SURF_8_BANK));
3289                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3290                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3291                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3292                                 NUM_BANKS(ADDR_SURF_8_BANK));
3293                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3295                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296                                 NUM_BANKS(ADDR_SURF_16_BANK));
3297                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3298                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3299                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300                                 NUM_BANKS(ADDR_SURF_16_BANK));
3301                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3302                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3303                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3304                                  NUM_BANKS(ADDR_SURF_16_BANK));
3305                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3306                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308                                  NUM_BANKS(ADDR_SURF_16_BANK));
3309                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3311                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312                                  NUM_BANKS(ADDR_SURF_16_BANK));
3313                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3315                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316                                  NUM_BANKS(ADDR_SURF_16_BANK));
3317                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3320                                  NUM_BANKS(ADDR_SURF_8_BANK));
3321
3322                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3323                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3324                             reg_offset != 23)
3325                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3326
3327                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3328                         if (reg_offset != 7)
3329                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3330
3331                 break;
3332         }
3333 }
3334
3335 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3336 {
3337         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3338
3339         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3340                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3341                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3342         } else if (se_num == 0xffffffff) {
3343                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3344                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3345         } else if (sh_num == 0xffffffff) {
3346                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3347                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3348         } else {
3349                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3350                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3351         }
3352         WREG32(mmGRBM_GFX_INDEX, data);
3353 }
3354
3355 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3356 {
3357         return (u32)((1ULL << bit_width) - 1);
3358 }
3359
3360 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3361 {
3362         u32 data, mask;
3363
3364         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3365         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3366
3367         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3368         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3369
3370         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3371                                        adev->gfx.config.max_sh_per_se);
3372
3373         return (~data) & mask;
3374 }
3375
3376 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3377 {
3378         int i, j;
3379         u32 data;
3380         u32 active_rbs = 0;
3381         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3382                                         adev->gfx.config.max_sh_per_se;
3383
3384         mutex_lock(&adev->grbm_idx_mutex);
3385         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3386                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3387                         gfx_v8_0_select_se_sh(adev, i, j);
3388                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3389                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3390                                                rb_bitmap_width_per_sh);
3391                 }
3392         }
3393         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3394         mutex_unlock(&adev->grbm_idx_mutex);
3395
3396         adev->gfx.config.backend_enable_mask = active_rbs;
3397         adev->gfx.config.num_rbs = hweight32(active_rbs);
3398 }
3399
3400 /**
3401  * gfx_v8_0_init_compute_vmid - gart enable
3402  *
3403  * @rdev: amdgpu_device pointer
3404  *
3405  * Initialize compute vmid sh_mem registers
3406  *
3407  */
3408 #define DEFAULT_SH_MEM_BASES    (0x6000)
3409 #define FIRST_COMPUTE_VMID      (8)
3410 #define LAST_COMPUTE_VMID       (16)
3411 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3412 {
3413         int i;
3414         uint32_t sh_mem_config;
3415         uint32_t sh_mem_bases;
3416
3417         /*
3418          * Configure apertures:
3419          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3420          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3421          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3422          */
3423         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3424
3425         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3426                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3427                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3428                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3429                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3430                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3431
3432         mutex_lock(&adev->srbm_mutex);
3433         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3434                 vi_srbm_select(adev, 0, 0, 0, i);
3435                 /* CP and shaders */
3436                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3437                 WREG32(mmSH_MEM_APE1_BASE, 1);
3438                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3439                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3440         }
3441         vi_srbm_select(adev, 0, 0, 0, 0);
3442         mutex_unlock(&adev->srbm_mutex);
3443 }
3444
3445 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3446 {
3447         u32 tmp;
3448         int i;
3449
3450         tmp = RREG32(mmGRBM_CNTL);
3451         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3452         WREG32(mmGRBM_CNTL, tmp);
3453
3454         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3455         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3456         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3457
3458         gfx_v8_0_tiling_mode_table_init(adev);
3459
3460         gfx_v8_0_setup_rb(adev);
3461         gfx_v8_0_get_cu_info(adev);
3462
3463         /* XXX SH_MEM regs */
3464         /* where to put LDS, scratch, GPUVM in FSA64 space */
3465         mutex_lock(&adev->srbm_mutex);
3466         for (i = 0; i < 16; i++) {
3467                 vi_srbm_select(adev, 0, 0, 0, i);
3468                 /* CP and shaders */
3469                 if (i == 0) {
3470                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3471                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3472                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3473                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3474                         WREG32(mmSH_MEM_CONFIG, tmp);
3475                 } else {
3476                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3477                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3478                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3479                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3480                         WREG32(mmSH_MEM_CONFIG, tmp);
3481                 }
3482
3483                 WREG32(mmSH_MEM_APE1_BASE, 1);
3484                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3485                 WREG32(mmSH_MEM_BASES, 0);
3486         }
3487         vi_srbm_select(adev, 0, 0, 0, 0);
3488         mutex_unlock(&adev->srbm_mutex);
3489
3490         gfx_v8_0_init_compute_vmid(adev);
3491
3492         mutex_lock(&adev->grbm_idx_mutex);
3493         /*
3494          * making sure that the following register writes will be broadcasted
3495          * to all the shaders
3496          */
3497         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3498
3499         WREG32(mmPA_SC_FIFO_SIZE,
3500                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3501                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3502                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3503                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3504                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3505                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3506                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3507                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3508         mutex_unlock(&adev->grbm_idx_mutex);
3509
3510 }
3511
3512 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3513 {
3514         u32 i, j, k;
3515         u32 mask;
3516
3517         mutex_lock(&adev->grbm_idx_mutex);
3518         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3519                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3520                         gfx_v8_0_select_se_sh(adev, i, j);
3521                         for (k = 0; k < adev->usec_timeout; k++) {
3522                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3523                                         break;
3524                                 udelay(1);
3525                         }
3526                 }
3527         }
3528         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3529         mutex_unlock(&adev->grbm_idx_mutex);
3530
3531         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3532                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3533                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3534                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3535         for (k = 0; k < adev->usec_timeout; k++) {
3536                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3537                         break;
3538                 udelay(1);
3539         }
3540 }
3541
3542 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3543                                                bool enable)
3544 {
3545         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3546
3547         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3548         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3549         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3550         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3551
3552         WREG32(mmCP_INT_CNTL_RING0, tmp);
3553 }
3554
3555 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3556 {
3557         /* csib */
3558         WREG32(mmRLC_CSIB_ADDR_HI,
3559                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3560         WREG32(mmRLC_CSIB_ADDR_LO,
3561                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3562         WREG32(mmRLC_CSIB_LENGTH,
3563                         adev->gfx.rlc.clear_state_size);
3564 }
3565
3566 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3567                                 int ind_offset,
3568                                 int list_size,
3569                                 int *unique_indices,
3570                                 int *indices_count,
3571                                 int max_indices,
3572                                 int *ind_start_offsets,
3573                                 int *offset_count,
3574                                 int max_offset)
3575 {
3576         int indices;
3577         bool new_entry = true;
3578
3579         for (; ind_offset < list_size; ind_offset++) {
3580
3581                 if (new_entry) {
3582                         new_entry = false;
3583                         ind_start_offsets[*offset_count] = ind_offset;
3584                         *offset_count = *offset_count + 1;
3585                         BUG_ON(*offset_count >= max_offset);
3586                 }
3587
3588                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3589                         new_entry = true;
3590                         continue;
3591                 }
3592
3593                 ind_offset += 2;
3594
3595                 /* look for the matching indice */
3596                 for (indices = 0;
3597                         indices < *indices_count;
3598                         indices++) {
3599                         if (unique_indices[indices] ==
3600                                 register_list_format[ind_offset])
3601                                 break;
3602                 }
3603
3604                 if (indices >= *indices_count) {
3605                         unique_indices[*indices_count] =
3606                                 register_list_format[ind_offset];
3607                         indices = *indices_count;
3608                         *indices_count = *indices_count + 1;
3609                         BUG_ON(*indices_count >= max_indices);
3610                 }
3611
3612                 register_list_format[ind_offset] = indices;
3613         }
3614 }
3615
3616 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3617 {
3618         int i, temp, data;
3619         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3620         int indices_count = 0;
3621         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3622         int offset_count = 0;
3623
3624         int list_size;
3625         unsigned int *register_list_format =
3626                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3627         if (register_list_format == NULL)
3628                 return -ENOMEM;
3629         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3630                         adev->gfx.rlc.reg_list_format_size_bytes);
3631
3632         gfx_v8_0_parse_ind_reg_list(register_list_format,
3633                                 RLC_FormatDirectRegListLength,
3634                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3635                                 unique_indices,
3636                                 &indices_count,
3637                                 sizeof(unique_indices) / sizeof(int),
3638                                 indirect_start_offsets,
3639                                 &offset_count,
3640                                 sizeof(indirect_start_offsets)/sizeof(int));
3641
3642         /* save and restore list */
3643         temp = RREG32(mmRLC_SRM_CNTL);
3644         temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3645         WREG32(mmRLC_SRM_CNTL, temp);
3646
3647         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3648         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3649                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3650
3651         /* indirect list */
3652         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3653         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3654                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3655
3656         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3657         list_size = list_size >> 1;
3658         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3659         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3660
3661         /* starting offsets starts */
3662         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3663                 adev->gfx.rlc.starting_offsets_start);
3664         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3665                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3666                                 indirect_start_offsets[i]);
3667
3668         /* unique indices */
3669         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3670         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3671         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3672                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3673                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3674         }
3675         kfree(register_list_format);
3676
3677         return 0;
3678 }
3679
3680 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3681 {
3682         uint32_t data;
3683
3684         data = RREG32(mmRLC_SRM_CNTL);
3685         data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3686         WREG32(mmRLC_SRM_CNTL, data);
3687 }
3688
3689 static void polaris11_init_power_gating(struct amdgpu_device *adev)
3690 {
3691         uint32_t data;
3692
3693         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3694                         AMD_PG_SUPPORT_GFX_SMG |
3695                         AMD_PG_SUPPORT_GFX_DMG)) {
3696                 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3697                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3698                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3699                 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3700
3701                 data = 0;
3702                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3703                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3704                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3705                 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3706                 WREG32(mmRLC_PG_DELAY, data);
3707
3708                 data = RREG32(mmRLC_PG_DELAY_2);
3709                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3710                 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3711                 WREG32(mmRLC_PG_DELAY_2, data);
3712
3713                 data = RREG32(mmRLC_AUTO_PG_CTRL);
3714                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3715                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3716                 WREG32(mmRLC_AUTO_PG_CTRL, data);
3717         }
3718 }
3719
3720 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3721 {
3722         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3723                               AMD_PG_SUPPORT_GFX_SMG |
3724                               AMD_PG_SUPPORT_GFX_DMG |
3725                               AMD_PG_SUPPORT_CP |
3726                               AMD_PG_SUPPORT_GDS |
3727                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3728                 gfx_v8_0_init_csb(adev);
3729                 gfx_v8_0_init_save_restore_list(adev);
3730                 gfx_v8_0_enable_save_restore_machine(adev);
3731
3732                 if (adev->asic_type == CHIP_POLARIS11)
3733                         polaris11_init_power_gating(adev);
3734         }
3735 }
3736
3737 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3738 {
3739         u32 tmp = RREG32(mmRLC_CNTL);
3740
3741         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3742         WREG32(mmRLC_CNTL, tmp);
3743
3744         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3745
3746         gfx_v8_0_wait_for_rlc_serdes(adev);
3747 }
3748
3749 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3750 {
3751         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3752
3753         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3754         WREG32(mmGRBM_SOFT_RESET, tmp);
3755         udelay(50);
3756         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3757         WREG32(mmGRBM_SOFT_RESET, tmp);
3758         udelay(50);
3759 }
3760
3761 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3762 {
3763         u32 tmp = RREG32(mmRLC_CNTL);
3764
3765         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3766         WREG32(mmRLC_CNTL, tmp);
3767
3768         /* carrizo do enable cp interrupt after cp inited */
3769         if (!(adev->flags & AMD_IS_APU))
3770                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3771
3772         udelay(50);
3773 }
3774
3775 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3776 {
3777         const struct rlc_firmware_header_v2_0 *hdr;
3778         const __le32 *fw_data;
3779         unsigned i, fw_size;
3780
3781         if (!adev->gfx.rlc_fw)
3782                 return -EINVAL;
3783
3784         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3785         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3786
3787         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3788                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3789         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3790
3791         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3792         for (i = 0; i < fw_size; i++)
3793                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3794         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3795
3796         return 0;
3797 }
3798
3799 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3800 {
3801         int r;
3802
3803         gfx_v8_0_rlc_stop(adev);
3804
3805         /* disable CG */
3806         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3807         if (adev->asic_type == CHIP_POLARIS11 ||
3808                 adev->asic_type == CHIP_POLARIS10)
3809                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3810
3811         /* disable PG */
3812         WREG32(mmRLC_PG_CNTL, 0);
3813
3814         gfx_v8_0_rlc_reset(adev);
3815
3816         gfx_v8_0_init_pg(adev);
3817
3818         if (!adev->pp_enabled) {
3819                 if (!adev->firmware.smu_load) {
3820                         /* legacy rlc firmware loading */
3821                         r = gfx_v8_0_rlc_load_microcode(adev);
3822                         if (r)
3823                                 return r;
3824                 } else {
3825                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3826                                                         AMDGPU_UCODE_ID_RLC_G);
3827                         if (r)
3828                                 return -EINVAL;
3829                 }
3830         }
3831
3832         gfx_v8_0_rlc_start(adev);
3833
3834         return 0;
3835 }
3836
3837 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3838 {
3839         int i;
3840         u32 tmp = RREG32(mmCP_ME_CNTL);
3841
3842         if (enable) {
3843                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3844                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3845                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3846         } else {
3847                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3848                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3849                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3850                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3851                         adev->gfx.gfx_ring[i].ready = false;
3852         }
3853         WREG32(mmCP_ME_CNTL, tmp);
3854         udelay(50);
3855 }
3856
3857 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3858 {
3859         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3860         const struct gfx_firmware_header_v1_0 *ce_hdr;
3861         const struct gfx_firmware_header_v1_0 *me_hdr;
3862         const __le32 *fw_data;
3863         unsigned i, fw_size;
3864
3865         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3866                 return -EINVAL;
3867
3868         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3869                 adev->gfx.pfp_fw->data;
3870         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3871                 adev->gfx.ce_fw->data;
3872         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3873                 adev->gfx.me_fw->data;
3874
3875         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3876         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3877         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3878
3879         gfx_v8_0_cp_gfx_enable(adev, false);
3880
3881         /* PFP */
3882         fw_data = (const __le32 *)
3883                 (adev->gfx.pfp_fw->data +
3884                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3885         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3886         WREG32(mmCP_PFP_UCODE_ADDR, 0);
3887         for (i = 0; i < fw_size; i++)
3888                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3889         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3890
3891         /* CE */
3892         fw_data = (const __le32 *)
3893                 (adev->gfx.ce_fw->data +
3894                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3895         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3896         WREG32(mmCP_CE_UCODE_ADDR, 0);
3897         for (i = 0; i < fw_size; i++)
3898                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3899         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3900
3901         /* ME */
3902         fw_data = (const __le32 *)
3903                 (adev->gfx.me_fw->data +
3904                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3905         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3906         WREG32(mmCP_ME_RAM_WADDR, 0);
3907         for (i = 0; i < fw_size; i++)
3908                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3909         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3910
3911         return 0;
3912 }
3913
3914 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3915 {
3916         u32 count = 0;
3917         const struct cs_section_def *sect = NULL;
3918         const struct cs_extent_def *ext = NULL;
3919
3920         /* begin clear state */
3921         count += 2;
3922         /* context control state */
3923         count += 3;
3924
3925         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3926                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3927                         if (sect->id == SECT_CONTEXT)
3928                                 count += 2 + ext->reg_count;
3929                         else
3930                                 return 0;
3931                 }
3932         }
3933         /* pa_sc_raster_config/pa_sc_raster_config1 */
3934         count += 4;
3935         /* end clear state */
3936         count += 2;
3937         /* clear state */
3938         count += 2;
3939
3940         return count;
3941 }
3942
3943 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3944 {
3945         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3946         const struct cs_section_def *sect = NULL;
3947         const struct cs_extent_def *ext = NULL;
3948         int r, i;
3949
3950         /* init the CP */
3951         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3952         WREG32(mmCP_ENDIAN_SWAP, 0);
3953         WREG32(mmCP_DEVICE_ID, 1);
3954
3955         gfx_v8_0_cp_gfx_enable(adev, true);
3956
3957         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3958         if (r) {
3959                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3960                 return r;
3961         }
3962
3963         /* clear state buffer */
3964         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3965         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3966
3967         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3968         amdgpu_ring_write(ring, 0x80000000);
3969         amdgpu_ring_write(ring, 0x80000000);
3970
3971         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3972                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3973                         if (sect->id == SECT_CONTEXT) {
3974                                 amdgpu_ring_write(ring,
3975                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3976                                                ext->reg_count));
3977                                 amdgpu_ring_write(ring,
3978                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3979                                 for (i = 0; i < ext->reg_count; i++)
3980                                         amdgpu_ring_write(ring, ext->extent[i]);
3981                         }
3982                 }
3983         }
3984
3985         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3986         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3987         switch (adev->asic_type) {
3988         case CHIP_TONGA:
3989         case CHIP_POLARIS10:
3990                 amdgpu_ring_write(ring, 0x16000012);
3991                 amdgpu_ring_write(ring, 0x0000002A);
3992                 break;
3993         case CHIP_POLARIS11:
3994                 amdgpu_ring_write(ring, 0x16000012);
3995                 amdgpu_ring_write(ring, 0x00000000);
3996                 break;
3997         case CHIP_FIJI:
3998                 amdgpu_ring_write(ring, 0x3a00161a);
3999                 amdgpu_ring_write(ring, 0x0000002e);
4000                 break;
4001         case CHIP_CARRIZO:
4002                 amdgpu_ring_write(ring, 0x00000002);
4003                 amdgpu_ring_write(ring, 0x00000000);
4004                 break;
4005         case CHIP_TOPAZ:
4006                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4007                                 0x00000000 : 0x00000002);
4008                 amdgpu_ring_write(ring, 0x00000000);
4009                 break;
4010         case CHIP_STONEY:
4011                 amdgpu_ring_write(ring, 0x00000000);
4012                 amdgpu_ring_write(ring, 0x00000000);
4013                 break;
4014         default:
4015                 BUG();
4016         }
4017
4018         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4019         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4020
4021         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4022         amdgpu_ring_write(ring, 0);
4023
4024         /* init the CE partitions */
4025         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4026         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4027         amdgpu_ring_write(ring, 0x8000);
4028         amdgpu_ring_write(ring, 0x8000);
4029
4030         amdgpu_ring_commit(ring);
4031
4032         return 0;
4033 }
4034
4035 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4036 {
4037         struct amdgpu_ring *ring;
4038         u32 tmp;
4039         u32 rb_bufsz;
4040         u64 rb_addr, rptr_addr;
4041         int r;
4042
4043         /* Set the write pointer delay */
4044         WREG32(mmCP_RB_WPTR_DELAY, 0);
4045
4046         /* set the RB to use vmid 0 */
4047         WREG32(mmCP_RB_VMID, 0);
4048
4049         /* Set ring buffer size */
4050         ring = &adev->gfx.gfx_ring[0];
4051         rb_bufsz = order_base_2(ring->ring_size / 8);
4052         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4053         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4054         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4055         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4056 #ifdef __BIG_ENDIAN
4057         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4058 #endif
4059         WREG32(mmCP_RB0_CNTL, tmp);
4060
4061         /* Initialize the ring buffer's read and write pointers */
4062         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4063         ring->wptr = 0;
4064         WREG32(mmCP_RB0_WPTR, ring->wptr);
4065
4066         /* set the wb address wether it's enabled or not */
4067         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4068         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4069         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4070
4071         mdelay(1);
4072         WREG32(mmCP_RB0_CNTL, tmp);
4073
4074         rb_addr = ring->gpu_addr >> 8;
4075         WREG32(mmCP_RB0_BASE, rb_addr);
4076         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4077
4078         /* no gfx doorbells on iceland */
4079         if (adev->asic_type != CHIP_TOPAZ) {
4080                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4081                 if (ring->use_doorbell) {
4082                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4083                                             DOORBELL_OFFSET, ring->doorbell_index);
4084                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4085                                             DOORBELL_HIT, 0);
4086                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4087                                             DOORBELL_EN, 1);
4088                 } else {
4089                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4090                                             DOORBELL_EN, 0);
4091                 }
4092                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4093
4094                 if (adev->asic_type == CHIP_TONGA) {
4095                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4096                                             DOORBELL_RANGE_LOWER,
4097                                             AMDGPU_DOORBELL_GFX_RING0);
4098                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4099
4100                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4101                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4102                 }
4103
4104         }
4105
4106         /* start the ring */
4107         gfx_v8_0_cp_gfx_start(adev);
4108         ring->ready = true;
4109         r = amdgpu_ring_test_ring(ring);
4110         if (r) {
4111                 ring->ready = false;
4112                 return r;
4113         }
4114
4115         return 0;
4116 }
4117
4118 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4119 {
4120         int i;
4121
4122         if (enable) {
4123                 WREG32(mmCP_MEC_CNTL, 0);
4124         } else {
4125                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4126                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4127                         adev->gfx.compute_ring[i].ready = false;
4128         }
4129         udelay(50);
4130 }
4131
4132 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4133 {
4134         const struct gfx_firmware_header_v1_0 *mec_hdr;
4135         const __le32 *fw_data;
4136         unsigned i, fw_size;
4137
4138         if (!adev->gfx.mec_fw)
4139                 return -EINVAL;
4140
4141         gfx_v8_0_cp_compute_enable(adev, false);
4142
4143         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4144         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4145
4146         fw_data = (const __le32 *)
4147                 (adev->gfx.mec_fw->data +
4148                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4149         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4150
4151         /* MEC1 */
4152         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4153         for (i = 0; i < fw_size; i++)
4154                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4155         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4156
4157         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4158         if (adev->gfx.mec2_fw) {
4159                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4160
4161                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4162                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4163
4164                 fw_data = (const __le32 *)
4165                         (adev->gfx.mec2_fw->data +
4166                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4167                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4168
4169                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4170                 for (i = 0; i < fw_size; i++)
4171                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4172                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4173         }
4174
4175         return 0;
4176 }
4177
4178 struct vi_mqd {
4179         uint32_t header;  /* ordinal0 */
4180         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4181         uint32_t compute_dim_x;  /* ordinal2 */
4182         uint32_t compute_dim_y;  /* ordinal3 */
4183         uint32_t compute_dim_z;  /* ordinal4 */
4184         uint32_t compute_start_x;  /* ordinal5 */
4185         uint32_t compute_start_y;  /* ordinal6 */
4186         uint32_t compute_start_z;  /* ordinal7 */
4187         uint32_t compute_num_thread_x;  /* ordinal8 */
4188         uint32_t compute_num_thread_y;  /* ordinal9 */
4189         uint32_t compute_num_thread_z;  /* ordinal10 */
4190         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4191         uint32_t compute_perfcount_enable;  /* ordinal12 */
4192         uint32_t compute_pgm_lo;  /* ordinal13 */
4193         uint32_t compute_pgm_hi;  /* ordinal14 */
4194         uint32_t compute_tba_lo;  /* ordinal15 */
4195         uint32_t compute_tba_hi;  /* ordinal16 */
4196         uint32_t compute_tma_lo;  /* ordinal17 */
4197         uint32_t compute_tma_hi;  /* ordinal18 */
4198         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4199         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4200         uint32_t compute_vmid;  /* ordinal21 */
4201         uint32_t compute_resource_limits;  /* ordinal22 */
4202         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4203         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4204         uint32_t compute_tmpring_size;  /* ordinal25 */
4205         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4206         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4207         uint32_t compute_restart_x;  /* ordinal28 */
4208         uint32_t compute_restart_y;  /* ordinal29 */
4209         uint32_t compute_restart_z;  /* ordinal30 */
4210         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4211         uint32_t compute_misc_reserved;  /* ordinal32 */
4212         uint32_t compute_dispatch_id;  /* ordinal33 */
4213         uint32_t compute_threadgroup_id;  /* ordinal34 */
4214         uint32_t compute_relaunch;  /* ordinal35 */
4215         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4216         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4217         uint32_t compute_wave_restore_control;  /* ordinal38 */
4218         uint32_t reserved9;  /* ordinal39 */
4219         uint32_t reserved10;  /* ordinal40 */
4220         uint32_t reserved11;  /* ordinal41 */
4221         uint32_t reserved12;  /* ordinal42 */
4222         uint32_t reserved13;  /* ordinal43 */
4223         uint32_t reserved14;  /* ordinal44 */
4224         uint32_t reserved15;  /* ordinal45 */
4225         uint32_t reserved16;  /* ordinal46 */
4226         uint32_t reserved17;  /* ordinal47 */
4227         uint32_t reserved18;  /* ordinal48 */
4228         uint32_t reserved19;  /* ordinal49 */
4229         uint32_t reserved20;  /* ordinal50 */
4230         uint32_t reserved21;  /* ordinal51 */
4231         uint32_t reserved22;  /* ordinal52 */
4232         uint32_t reserved23;  /* ordinal53 */
4233         uint32_t reserved24;  /* ordinal54 */
4234         uint32_t reserved25;  /* ordinal55 */
4235         uint32_t reserved26;  /* ordinal56 */
4236         uint32_t reserved27;  /* ordinal57 */
4237         uint32_t reserved28;  /* ordinal58 */
4238         uint32_t reserved29;  /* ordinal59 */
4239         uint32_t reserved30;  /* ordinal60 */
4240         uint32_t reserved31;  /* ordinal61 */
4241         uint32_t reserved32;  /* ordinal62 */
4242         uint32_t reserved33;  /* ordinal63 */
4243         uint32_t reserved34;  /* ordinal64 */
4244         uint32_t compute_user_data_0;  /* ordinal65 */
4245         uint32_t compute_user_data_1;  /* ordinal66 */
4246         uint32_t compute_user_data_2;  /* ordinal67 */
4247         uint32_t compute_user_data_3;  /* ordinal68 */
4248         uint32_t compute_user_data_4;  /* ordinal69 */
4249         uint32_t compute_user_data_5;  /* ordinal70 */
4250         uint32_t compute_user_data_6;  /* ordinal71 */
4251         uint32_t compute_user_data_7;  /* ordinal72 */
4252         uint32_t compute_user_data_8;  /* ordinal73 */
4253         uint32_t compute_user_data_9;  /* ordinal74 */
4254         uint32_t compute_user_data_10;  /* ordinal75 */
4255         uint32_t compute_user_data_11;  /* ordinal76 */
4256         uint32_t compute_user_data_12;  /* ordinal77 */
4257         uint32_t compute_user_data_13;  /* ordinal78 */
4258         uint32_t compute_user_data_14;  /* ordinal79 */
4259         uint32_t compute_user_data_15;  /* ordinal80 */
4260         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4261         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4262         uint32_t reserved35;  /* ordinal83 */
4263         uint32_t reserved36;  /* ordinal84 */
4264         uint32_t reserved37;  /* ordinal85 */
4265         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4266         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4267         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4268         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4269         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4270         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4271         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4272         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4273         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4274         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4275         uint32_t reserved38;  /* ordinal96 */
4276         uint32_t reserved39;  /* ordinal97 */
4277         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4278         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4279         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4280         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4281         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4282         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4283         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4284         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4285         uint32_t reserved40;  /* ordinal106 */
4286         uint32_t reserved41;  /* ordinal107 */
4287         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4288         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4289         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4290         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4291         uint32_t reserved42;  /* ordinal112 */
4292         uint32_t reserved43;  /* ordinal113 */
4293         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4294         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4295         uint32_t cp_packet_id_lo;  /* ordinal116 */
4296         uint32_t cp_packet_id_hi;  /* ordinal117 */
4297         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4298         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4299         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4300         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4301         uint32_t gds_save_mask_lo;  /* ordinal122 */
4302         uint32_t gds_save_mask_hi;  /* ordinal123 */
4303         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4304         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4305         uint32_t reserved44;  /* ordinal126 */
4306         uint32_t reserved45;  /* ordinal127 */
4307         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4308         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4309         uint32_t cp_hqd_active;  /* ordinal130 */
4310         uint32_t cp_hqd_vmid;  /* ordinal131 */
4311         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4312         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4313         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4314         uint32_t cp_hqd_quantum;  /* ordinal135 */
4315         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4316         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4317         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4318         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4319         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4320         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4321         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4322         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4323         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4324         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4325         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4326         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4327         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4328         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4329         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4330         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4331         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4332         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4333         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4334         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4335         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4336         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4337         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4338         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4339         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4340         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4341         uint32_t cp_mqd_control;  /* ordinal162 */
4342         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4343         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4344         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4345         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4346         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4347         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4348         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4349         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4350         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4351         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4352         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4353         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4354         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4355         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4356         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4357         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4358         uint32_t cp_hqd_error;  /* ordinal179 */
4359         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4360         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4361         uint32_t reserved46;  /* ordinal182 */
4362         uint32_t reserved47;  /* ordinal183 */
4363         uint32_t reserved48;  /* ordinal184 */
4364         uint32_t reserved49;  /* ordinal185 */
4365         uint32_t reserved50;  /* ordinal186 */
4366         uint32_t reserved51;  /* ordinal187 */
4367         uint32_t reserved52;  /* ordinal188 */
4368         uint32_t reserved53;  /* ordinal189 */
4369         uint32_t reserved54;  /* ordinal190 */
4370         uint32_t reserved55;  /* ordinal191 */
4371         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4372         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4373         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4374         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4375         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4376         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4377         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4378         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4379         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4380         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4381         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4382         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4383         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4384         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4385         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4386         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4387         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4388         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4389         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4390         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4391         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4392         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4393         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4394         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4395         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4396         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4397         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4398         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4399         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4400         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4401         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4402         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4403         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4404         uint32_t reserved56;  /* ordinal225 */
4405         uint32_t reserved57;  /* ordinal226 */
4406         uint32_t reserved58;  /* ordinal227 */
4407         uint32_t set_resources_header;  /* ordinal228 */
4408         uint32_t set_resources_dw1;  /* ordinal229 */
4409         uint32_t set_resources_dw2;  /* ordinal230 */
4410         uint32_t set_resources_dw3;  /* ordinal231 */
4411         uint32_t set_resources_dw4;  /* ordinal232 */
4412         uint32_t set_resources_dw5;  /* ordinal233 */
4413         uint32_t set_resources_dw6;  /* ordinal234 */
4414         uint32_t set_resources_dw7;  /* ordinal235 */
4415         uint32_t reserved59;  /* ordinal236 */
4416         uint32_t reserved60;  /* ordinal237 */
4417         uint32_t reserved61;  /* ordinal238 */
4418         uint32_t reserved62;  /* ordinal239 */
4419         uint32_t reserved63;  /* ordinal240 */
4420         uint32_t reserved64;  /* ordinal241 */
4421         uint32_t reserved65;  /* ordinal242 */
4422         uint32_t reserved66;  /* ordinal243 */
4423         uint32_t reserved67;  /* ordinal244 */
4424         uint32_t reserved68;  /* ordinal245 */
4425         uint32_t reserved69;  /* ordinal246 */
4426         uint32_t reserved70;  /* ordinal247 */
4427         uint32_t reserved71;  /* ordinal248 */
4428         uint32_t reserved72;  /* ordinal249 */
4429         uint32_t reserved73;  /* ordinal250 */
4430         uint32_t reserved74;  /* ordinal251 */
4431         uint32_t reserved75;  /* ordinal252 */
4432         uint32_t reserved76;  /* ordinal253 */
4433         uint32_t reserved77;  /* ordinal254 */
4434         uint32_t reserved78;  /* ordinal255 */
4435
4436         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4437 };
4438
4439 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4440 {
4441         int i, r;
4442
4443         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4444                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4445
4446                 if (ring->mqd_obj) {
4447                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4448                         if (unlikely(r != 0))
4449                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4450
4451                         amdgpu_bo_unpin(ring->mqd_obj);
4452                         amdgpu_bo_unreserve(ring->mqd_obj);
4453
4454                         amdgpu_bo_unref(&ring->mqd_obj);
4455                         ring->mqd_obj = NULL;
4456                 }
4457         }
4458 }
4459
4460 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4461 {
4462         int r, i, j;
4463         u32 tmp;
4464         bool use_doorbell = true;
4465         u64 hqd_gpu_addr;
4466         u64 mqd_gpu_addr;
4467         u64 eop_gpu_addr;
4468         u64 wb_gpu_addr;
4469         u32 *buf;
4470         struct vi_mqd *mqd;
4471
4472         /* init the pipes */
4473         mutex_lock(&adev->srbm_mutex);
4474         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4475                 int me = (i < 4) ? 1 : 2;
4476                 int pipe = (i < 4) ? i : (i - 4);
4477
4478                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4479                 eop_gpu_addr >>= 8;
4480
4481                 vi_srbm_select(adev, me, pipe, 0, 0);
4482
4483                 /* write the EOP addr */
4484                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4485                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4486
4487                 /* set the VMID assigned */
4488                 WREG32(mmCP_HQD_VMID, 0);
4489
4490                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4491                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4492                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4493                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4494                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4495         }
4496         vi_srbm_select(adev, 0, 0, 0, 0);
4497         mutex_unlock(&adev->srbm_mutex);
4498
4499         /* init the queues.  Just two for now. */
4500         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4501                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4502
4503                 if (ring->mqd_obj == NULL) {
4504                         r = amdgpu_bo_create(adev,
4505                                              sizeof(struct vi_mqd),
4506                                              PAGE_SIZE, true,
4507                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4508                                              NULL, &ring->mqd_obj);
4509                         if (r) {
4510                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4511                                 return r;
4512                         }
4513                 }
4514
4515                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4516                 if (unlikely(r != 0)) {
4517                         gfx_v8_0_cp_compute_fini(adev);
4518                         return r;
4519                 }
4520                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4521                                   &mqd_gpu_addr);
4522                 if (r) {
4523                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4524                         gfx_v8_0_cp_compute_fini(adev);
4525                         return r;
4526                 }
4527                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4528                 if (r) {
4529                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4530                         gfx_v8_0_cp_compute_fini(adev);
4531                         return r;
4532                 }
4533
4534                 /* init the mqd struct */
4535                 memset(buf, 0, sizeof(struct vi_mqd));
4536
4537                 mqd = (struct vi_mqd *)buf;
4538                 mqd->header = 0xC0310800;
4539                 mqd->compute_pipelinestat_enable = 0x00000001;
4540                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4541                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4542                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4543                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4544                 mqd->compute_misc_reserved = 0x00000003;
4545
4546                 mutex_lock(&adev->srbm_mutex);
4547                 vi_srbm_select(adev, ring->me,
4548                                ring->pipe,
4549                                ring->queue, 0);
4550
4551                 /* disable wptr polling */
4552                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4553                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4554                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4555
4556                 mqd->cp_hqd_eop_base_addr_lo =
4557                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4558                 mqd->cp_hqd_eop_base_addr_hi =
4559                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4560
4561                 /* enable doorbell? */
4562                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4563                 if (use_doorbell) {
4564                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4565                 } else {
4566                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4567                 }
4568                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4569                 mqd->cp_hqd_pq_doorbell_control = tmp;
4570
4571                 /* disable the queue if it's active */
4572                 mqd->cp_hqd_dequeue_request = 0;
4573                 mqd->cp_hqd_pq_rptr = 0;
4574                 mqd->cp_hqd_pq_wptr= 0;
4575                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4576                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4577                         for (j = 0; j < adev->usec_timeout; j++) {
4578                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4579                                         break;
4580                                 udelay(1);
4581                         }
4582                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4583                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4584                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4585                 }
4586
4587                 /* set the pointer to the MQD */
4588                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4589                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4590                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4591                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4592
4593                 /* set MQD vmid to 0 */
4594                 tmp = RREG32(mmCP_MQD_CONTROL);
4595                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4596                 WREG32(mmCP_MQD_CONTROL, tmp);
4597                 mqd->cp_mqd_control = tmp;
4598
4599                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4600                 hqd_gpu_addr = ring->gpu_addr >> 8;
4601                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4602                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4603                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4604                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4605
4606                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4607                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4608                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4609                                     (order_base_2(ring->ring_size / 4) - 1));
4610                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4611                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4612 #ifdef __BIG_ENDIAN
4613                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4614 #endif
4615                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4616                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4617                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4618                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4619                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4620                 mqd->cp_hqd_pq_control = tmp;
4621
4622                 /* set the wb address wether it's enabled or not */
4623                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4624                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4625                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4626                         upper_32_bits(wb_gpu_addr) & 0xffff;
4627                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4628                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4629                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4630                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4631
4632                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4633                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4634                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4635                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4636                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4637                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4638                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4639
4640                 /* enable the doorbell if requested */
4641                 if (use_doorbell) {
4642                         if ((adev->asic_type == CHIP_CARRIZO) ||
4643                             (adev->asic_type == CHIP_FIJI) ||
4644                             (adev->asic_type == CHIP_STONEY) ||
4645                             (adev->asic_type == CHIP_POLARIS11) ||
4646                             (adev->asic_type == CHIP_POLARIS10)) {
4647                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4648                                        AMDGPU_DOORBELL_KIQ << 2);
4649                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4650                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4651                         }
4652                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4653                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4654                                             DOORBELL_OFFSET, ring->doorbell_index);
4655                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4656                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4657                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4658                         mqd->cp_hqd_pq_doorbell_control = tmp;
4659
4660                 } else {
4661                         mqd->cp_hqd_pq_doorbell_control = 0;
4662                 }
4663                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4664                        mqd->cp_hqd_pq_doorbell_control);
4665
4666                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4667                 ring->wptr = 0;
4668                 mqd->cp_hqd_pq_wptr = ring->wptr;
4669                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4670                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4671
4672                 /* set the vmid for the queue */
4673                 mqd->cp_hqd_vmid = 0;
4674                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4675
4676                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4677                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4678                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4679                 mqd->cp_hqd_persistent_state = tmp;
4680                 if (adev->asic_type == CHIP_STONEY ||
4681                         adev->asic_type == CHIP_POLARIS11 ||
4682                         adev->asic_type == CHIP_POLARIS10) {
4683                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4684                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4685                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4686                 }
4687
4688                 /* activate the queue */
4689                 mqd->cp_hqd_active = 1;
4690                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4691
4692                 vi_srbm_select(adev, 0, 0, 0, 0);
4693                 mutex_unlock(&adev->srbm_mutex);
4694
4695                 amdgpu_bo_kunmap(ring->mqd_obj);
4696                 amdgpu_bo_unreserve(ring->mqd_obj);
4697         }
4698
4699         if (use_doorbell) {
4700                 tmp = RREG32(mmCP_PQ_STATUS);
4701                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4702                 WREG32(mmCP_PQ_STATUS, tmp);
4703         }
4704
4705         gfx_v8_0_cp_compute_enable(adev, true);
4706
4707         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4708                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4709
4710                 ring->ready = true;
4711                 r = amdgpu_ring_test_ring(ring);
4712                 if (r)
4713                         ring->ready = false;
4714         }
4715
4716         return 0;
4717 }
4718
4719 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4720 {
4721         int r;
4722
4723         if (!(adev->flags & AMD_IS_APU))
4724                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4725
4726         if (!adev->pp_enabled) {
4727                 if (!adev->firmware.smu_load) {
4728                         /* legacy firmware loading */
4729                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4730                         if (r)
4731                                 return r;
4732
4733                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4734                         if (r)
4735                                 return r;
4736                 } else {
4737                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4738                                                         AMDGPU_UCODE_ID_CP_CE);
4739                         if (r)
4740                                 return -EINVAL;
4741
4742                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4743                                                         AMDGPU_UCODE_ID_CP_PFP);
4744                         if (r)
4745                                 return -EINVAL;
4746
4747                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4748                                                         AMDGPU_UCODE_ID_CP_ME);
4749                         if (r)
4750                                 return -EINVAL;
4751
4752                         if (adev->asic_type == CHIP_TOPAZ) {
4753                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4754                                 if (r)
4755                                         return r;
4756                         } else {
4757                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4758                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4759                                 if (r)
4760                                         return -EINVAL;
4761                         }
4762                 }
4763         }
4764
4765         r = gfx_v8_0_cp_gfx_resume(adev);
4766         if (r)
4767                 return r;
4768
4769         r = gfx_v8_0_cp_compute_resume(adev);
4770         if (r)
4771                 return r;
4772
4773         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4774
4775         return 0;
4776 }
4777
4778 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4779 {
4780         gfx_v8_0_cp_gfx_enable(adev, enable);
4781         gfx_v8_0_cp_compute_enable(adev, enable);
4782 }
4783
4784 static int gfx_v8_0_hw_init(void *handle)
4785 {
4786         int r;
4787         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4788
4789         gfx_v8_0_init_golden_registers(adev);
4790
4791         gfx_v8_0_gpu_init(adev);
4792
4793         r = gfx_v8_0_rlc_resume(adev);
4794         if (r)
4795                 return r;
4796
4797         r = gfx_v8_0_cp_resume(adev);
4798         if (r)
4799                 return r;
4800
4801         return r;
4802 }
4803
4804 static int gfx_v8_0_hw_fini(void *handle)
4805 {
4806         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4807
4808         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4809         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4810         gfx_v8_0_cp_enable(adev, false);
4811         gfx_v8_0_rlc_stop(adev);
4812         gfx_v8_0_cp_compute_fini(adev);
4813
4814         amdgpu_set_powergating_state(adev,
4815                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4816
4817         return 0;
4818 }
4819
4820 static int gfx_v8_0_suspend(void *handle)
4821 {
4822         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4823
4824         return gfx_v8_0_hw_fini(adev);
4825 }
4826
4827 static int gfx_v8_0_resume(void *handle)
4828 {
4829         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830
4831         return gfx_v8_0_hw_init(adev);
4832 }
4833
4834 static bool gfx_v8_0_is_idle(void *handle)
4835 {
4836         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837
4838         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4839                 return false;
4840         else
4841                 return true;
4842 }
4843
4844 static int gfx_v8_0_wait_for_idle(void *handle)
4845 {
4846         unsigned i;
4847         u32 tmp;
4848         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4849
4850         for (i = 0; i < adev->usec_timeout; i++) {
4851                 /* read MC_STATUS */
4852                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4853
4854                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4855                         return 0;
4856                 udelay(1);
4857         }
4858         return -ETIMEDOUT;
4859 }
4860
4861 static int gfx_v8_0_soft_reset(void *handle)
4862 {
4863         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4864         u32 tmp;
4865         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4866
4867         /* GRBM_STATUS */
4868         tmp = RREG32(mmGRBM_STATUS);
4869         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4870                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4871                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4872                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4873                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4874                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4875                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4876                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4877                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4878                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4879         }
4880
4881         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4882                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4883                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4884                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4885                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4886         }
4887
4888         /* GRBM_STATUS2 */
4889         tmp = RREG32(mmGRBM_STATUS2);
4890         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4891                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4892                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4893
4894         /* SRBM_STATUS */
4895         tmp = RREG32(mmSRBM_STATUS);
4896         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4897                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4898                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4899
4900         if (grbm_soft_reset || srbm_soft_reset) {
4901                 /* stop the rlc */
4902                 gfx_v8_0_rlc_stop(adev);
4903
4904                 /* Disable GFX parsing/prefetching */
4905                 gfx_v8_0_cp_gfx_enable(adev, false);
4906
4907                 /* Disable MEC parsing/prefetching */
4908                 gfx_v8_0_cp_compute_enable(adev, false);
4909
4910                 if (grbm_soft_reset || srbm_soft_reset) {
4911                         tmp = RREG32(mmGMCON_DEBUG);
4912                         tmp = REG_SET_FIELD(tmp,
4913                                             GMCON_DEBUG, GFX_STALL, 1);
4914                         tmp = REG_SET_FIELD(tmp,
4915                                             GMCON_DEBUG, GFX_CLEAR, 1);
4916                         WREG32(mmGMCON_DEBUG, tmp);
4917
4918                         udelay(50);
4919                 }
4920
4921                 if (grbm_soft_reset) {
4922                         tmp = RREG32(mmGRBM_SOFT_RESET);
4923                         tmp |= grbm_soft_reset;
4924                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4925                         WREG32(mmGRBM_SOFT_RESET, tmp);
4926                         tmp = RREG32(mmGRBM_SOFT_RESET);
4927
4928                         udelay(50);
4929
4930                         tmp &= ~grbm_soft_reset;
4931                         WREG32(mmGRBM_SOFT_RESET, tmp);
4932                         tmp = RREG32(mmGRBM_SOFT_RESET);
4933                 }
4934
4935                 if (srbm_soft_reset) {
4936                         tmp = RREG32(mmSRBM_SOFT_RESET);
4937                         tmp |= srbm_soft_reset;
4938                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4939                         WREG32(mmSRBM_SOFT_RESET, tmp);
4940                         tmp = RREG32(mmSRBM_SOFT_RESET);
4941
4942                         udelay(50);
4943
4944                         tmp &= ~srbm_soft_reset;
4945                         WREG32(mmSRBM_SOFT_RESET, tmp);
4946                         tmp = RREG32(mmSRBM_SOFT_RESET);
4947                 }
4948
4949                 if (grbm_soft_reset || srbm_soft_reset) {
4950                         tmp = RREG32(mmGMCON_DEBUG);
4951                         tmp = REG_SET_FIELD(tmp,
4952                                             GMCON_DEBUG, GFX_STALL, 0);
4953                         tmp = REG_SET_FIELD(tmp,
4954                                             GMCON_DEBUG, GFX_CLEAR, 0);
4955                         WREG32(mmGMCON_DEBUG, tmp);
4956                 }
4957
4958                 /* Wait a little for things to settle down */
4959                 udelay(50);
4960         }
4961         return 0;
4962 }
4963
4964 /**
4965  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4966  *
4967  * @adev: amdgpu_device pointer
4968  *
4969  * Fetches a GPU clock counter snapshot.
4970  * Returns the 64 bit clock counter snapshot.
4971  */
4972 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4973 {
4974         uint64_t clock;
4975
4976         mutex_lock(&adev->gfx.gpu_clock_mutex);
4977         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4978         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4979                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4980         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4981         return clock;
4982 }
4983
4984 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4985                                           uint32_t vmid,
4986                                           uint32_t gds_base, uint32_t gds_size,
4987                                           uint32_t gws_base, uint32_t gws_size,
4988                                           uint32_t oa_base, uint32_t oa_size)
4989 {
4990         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4991         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4992
4993         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4994         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4995
4996         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4997         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4998
4999         /* GDS Base */
5000         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5001         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5002                                 WRITE_DATA_DST_SEL(0)));
5003         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5004         amdgpu_ring_write(ring, 0);
5005         amdgpu_ring_write(ring, gds_base);
5006
5007         /* GDS Size */
5008         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5009         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5010                                 WRITE_DATA_DST_SEL(0)));
5011         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5012         amdgpu_ring_write(ring, 0);
5013         amdgpu_ring_write(ring, gds_size);
5014
5015         /* GWS */
5016         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5017         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5018                                 WRITE_DATA_DST_SEL(0)));
5019         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5020         amdgpu_ring_write(ring, 0);
5021         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5022
5023         /* OA */
5024         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5025         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5026                                 WRITE_DATA_DST_SEL(0)));
5027         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5028         amdgpu_ring_write(ring, 0);
5029         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5030 }
5031
5032 static int gfx_v8_0_early_init(void *handle)
5033 {
5034         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5035
5036         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5037         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5038         gfx_v8_0_set_ring_funcs(adev);
5039         gfx_v8_0_set_irq_funcs(adev);
5040         gfx_v8_0_set_gds_init(adev);
5041         gfx_v8_0_set_rlc_funcs(adev);
5042
5043         return 0;
5044 }
5045
5046 static int gfx_v8_0_late_init(void *handle)
5047 {
5048         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5049         int r;
5050
5051         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5052         if (r)
5053                 return r;
5054
5055         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5056         if (r)
5057                 return r;
5058
5059         /* requires IBs so do in late init after IB pool is initialized */
5060         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5061         if (r)
5062                 return r;
5063
5064         amdgpu_set_powergating_state(adev,
5065                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5066
5067         return 0;
5068 }
5069
5070 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5071                 bool enable)
5072 {
5073         uint32_t data, temp;
5074
5075         /* Send msg to SMU via Powerplay */
5076         amdgpu_set_powergating_state(adev,
5077                         AMD_IP_BLOCK_TYPE_SMC,
5078                         enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5079
5080         if (enable) {
5081                 /* Enable static MGPG */
5082                 temp = data = RREG32(mmRLC_PG_CNTL);
5083                 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5084
5085                 if (temp != data)
5086                         WREG32(mmRLC_PG_CNTL, data);
5087         } else {
5088                 temp = data = RREG32(mmRLC_PG_CNTL);
5089                 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5090
5091                 if (temp != data)
5092                         WREG32(mmRLC_PG_CNTL, data);
5093         }
5094 }
5095
5096 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5097                 bool enable)
5098 {
5099         uint32_t data, temp;
5100
5101         if (enable) {
5102                 /* Enable dynamic MGPG */
5103                 temp = data = RREG32(mmRLC_PG_CNTL);
5104                 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5105
5106                 if (temp != data)
5107                         WREG32(mmRLC_PG_CNTL, data);
5108         } else {
5109                 temp = data = RREG32(mmRLC_PG_CNTL);
5110                 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5111
5112                 if (temp != data)
5113                         WREG32(mmRLC_PG_CNTL, data);
5114         }
5115 }
5116
5117 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5118                 bool enable)
5119 {
5120         uint32_t data, temp;
5121
5122         if (enable) {
5123                 /* Enable quick PG */
5124                 temp = data = RREG32(mmRLC_PG_CNTL);
5125                 data |= 0x100000;
5126
5127                 if (temp != data)
5128                         WREG32(mmRLC_PG_CNTL, data);
5129         } else {
5130                 temp = data = RREG32(mmRLC_PG_CNTL);
5131                 data &= ~0x100000;
5132
5133                 if (temp != data)
5134                         WREG32(mmRLC_PG_CNTL, data);
5135         }
5136 }
5137
5138 static int gfx_v8_0_set_powergating_state(void *handle,
5139                                           enum amd_powergating_state state)
5140 {
5141         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5142
5143         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5144                 return 0;
5145
5146         switch (adev->asic_type) {
5147         case CHIP_POLARIS11:
5148                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5149                         polaris11_enable_gfx_static_mg_power_gating(adev,
5150                                         state == AMD_PG_STATE_GATE ? true : false);
5151                 else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5152                         polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5153                                         state == AMD_PG_STATE_GATE ? true : false);
5154                 else
5155                         polaris11_enable_gfx_quick_mg_power_gating(adev,
5156                                         state == AMD_PG_STATE_GATE ? true : false);
5157                 break;
5158         default:
5159                 break;
5160         }
5161
5162         return 0;
5163 }
5164
5165 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5166                                      uint32_t reg_addr, uint32_t cmd)
5167 {
5168         uint32_t data;
5169
5170         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5171
5172         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5173         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5174
5175         data = RREG32(mmRLC_SERDES_WR_CTRL);
5176         if (adev->asic_type == CHIP_STONEY)
5177                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5178                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5179                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5180                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5181                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5182                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5183                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5184                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5185                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5186         else
5187                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5188                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5189                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5190                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5191                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5192                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5193                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5194                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5195                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5196                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5197                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5198         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5199                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5200                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5201                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5202
5203         WREG32(mmRLC_SERDES_WR_CTRL, data);
5204 }
5205
5206 #define MSG_ENTER_RLC_SAFE_MODE     1
5207 #define MSG_EXIT_RLC_SAFE_MODE      0
5208
5209 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5210 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5211 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5212
5213 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5214 {
5215         u32 data = 0;
5216         unsigned i;
5217
5218         data = RREG32(mmRLC_CNTL);
5219         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5220                 return;
5221
5222         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5223             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5224                                AMD_PG_SUPPORT_GFX_DMG))) {
5225                 data |= RLC_GPR_REG2__REQ_MASK;
5226                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5227                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5228                 WREG32(mmRLC_GPR_REG2, data);
5229
5230                 for (i = 0; i < adev->usec_timeout; i++) {
5231                         if ((RREG32(mmRLC_GPM_STAT) &
5232                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5233                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5234                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5235                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5236                                 break;
5237                         udelay(1);
5238                 }
5239
5240                 for (i = 0; i < adev->usec_timeout; i++) {
5241                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5242                                 break;
5243                         udelay(1);
5244                 }
5245                 adev->gfx.rlc.in_safe_mode = true;
5246         }
5247 }
5248
5249 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5250 {
5251         u32 data;
5252         unsigned i;
5253
5254         data = RREG32(mmRLC_CNTL);
5255         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5256                 return;
5257
5258         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5259             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5260                                AMD_PG_SUPPORT_GFX_DMG))) {
5261                 data |= RLC_GPR_REG2__REQ_MASK;
5262                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5263                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5264                 WREG32(mmRLC_GPR_REG2, data);
5265                 adev->gfx.rlc.in_safe_mode = false;
5266         }
5267
5268         for (i = 0; i < adev->usec_timeout; i++) {
5269                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5270                         break;
5271                 udelay(1);
5272         }
5273 }
5274
5275 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5276 {
5277         u32 data;
5278         unsigned i;
5279
5280         data = RREG32(mmRLC_CNTL);
5281         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5282                 return;
5283
5284         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5285                 data |= RLC_SAFE_MODE__CMD_MASK;
5286                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5287                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5288                 WREG32(mmRLC_SAFE_MODE, data);
5289
5290                 for (i = 0; i < adev->usec_timeout; i++) {
5291                         if ((RREG32(mmRLC_GPM_STAT) &
5292                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5293                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5294                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5295                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5296                                 break;
5297                         udelay(1);
5298                 }
5299
5300                 for (i = 0; i < adev->usec_timeout; i++) {
5301                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5302                                 break;
5303                         udelay(1);
5304                 }
5305                 adev->gfx.rlc.in_safe_mode = true;
5306         }
5307 }
5308
5309 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5310 {
5311         u32 data = 0;
5312         unsigned i;
5313
5314         data = RREG32(mmRLC_CNTL);
5315         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5316                 return;
5317
5318         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5319                 if (adev->gfx.rlc.in_safe_mode) {
5320                         data |= RLC_SAFE_MODE__CMD_MASK;
5321                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5322                         WREG32(mmRLC_SAFE_MODE, data);
5323                         adev->gfx.rlc.in_safe_mode = false;
5324                 }
5325         }
5326
5327         for (i = 0; i < adev->usec_timeout; i++) {
5328                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5329                         break;
5330                 udelay(1);
5331         }
5332 }
5333
5334 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5335 {
5336         adev->gfx.rlc.in_safe_mode = true;
5337 }
5338
5339 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5340 {
5341         adev->gfx.rlc.in_safe_mode = false;
5342 }
5343
5344 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5345         .enter_safe_mode = cz_enter_rlc_safe_mode,
5346         .exit_safe_mode = cz_exit_rlc_safe_mode
5347 };
5348
5349 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5350         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5351         .exit_safe_mode = iceland_exit_rlc_safe_mode
5352 };
5353
5354 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5355         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5356         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5357 };
5358
5359 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5360                                                       bool enable)
5361 {
5362         uint32_t temp, data;
5363
5364         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5365
5366         /* It is disabled by HW by default */
5367         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5368                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5369                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5370                                 /* 1 - RLC memory Light sleep */
5371                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5372                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5373                                 if (temp != data)
5374                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5375                         }
5376
5377                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5378                                 /* 2 - CP memory Light sleep */
5379                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5380                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5381                                 if (temp != data)
5382                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5383                         }
5384                 }
5385
5386                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5387                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5388                 if (adev->flags & AMD_IS_APU)
5389                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5390                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5391                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5392                 else
5393                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5394                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5395                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5396                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5397
5398                 if (temp != data)
5399                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5400
5401                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5402                 gfx_v8_0_wait_for_rlc_serdes(adev);
5403
5404                 /* 5 - clear mgcg override */
5405                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5406
5407                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5408                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5409                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5410                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5411                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5412                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5413                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5414                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5415                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5416                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5417                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5418                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5419                         if (temp != data)
5420                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5421                 }
5422                 udelay(50);
5423
5424                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5425                 gfx_v8_0_wait_for_rlc_serdes(adev);
5426         } else {
5427                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5428                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5429                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5430                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5431                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5432                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5433                 if (temp != data)
5434                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5435
5436                 /* 2 - disable MGLS in RLC */
5437                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5438                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5439                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5440                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5441                 }
5442
5443                 /* 3 - disable MGLS in CP */
5444                 data = RREG32(mmCP_MEM_SLP_CNTL);
5445                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5446                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5447                         WREG32(mmCP_MEM_SLP_CNTL, data);
5448                 }
5449
5450                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5451                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5452                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5453                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5454                 if (temp != data)
5455                         WREG32(mmCGTS_SM_CTRL_REG, data);
5456
5457                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5458                 gfx_v8_0_wait_for_rlc_serdes(adev);
5459
5460                 /* 6 - set mgcg override */
5461                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5462
5463                 udelay(50);
5464
5465                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5466                 gfx_v8_0_wait_for_rlc_serdes(adev);
5467         }
5468
5469         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5470 }
5471
5472 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5473                                                       bool enable)
5474 {
5475         uint32_t temp, temp1, data, data1;
5476
5477         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5478
5479         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5480
5481         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5482                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5483                  * Cmp_busy/GFX_Idle interrupts
5484                  */
5485                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5486
5487                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5488                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5489                 if (temp1 != data1)
5490                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5491
5492                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5493                 gfx_v8_0_wait_for_rlc_serdes(adev);
5494
5495                 /* 3 - clear cgcg override */
5496                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5497
5498                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5499                 gfx_v8_0_wait_for_rlc_serdes(adev);
5500
5501                 /* 4 - write cmd to set CGLS */
5502                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5503
5504                 /* 5 - enable cgcg */
5505                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5506
5507                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5508                         /* enable cgls*/
5509                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5510
5511                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5512                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5513
5514                         if (temp1 != data1)
5515                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5516                 } else {
5517                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5518                 }
5519
5520                 if (temp != data)
5521                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5522         } else {
5523                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5524                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5525
5526                 /* TEST CGCG */
5527                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5528                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5529                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5530                 if (temp1 != data1)
5531                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5532
5533                 /* read gfx register to wake up cgcg */
5534                 RREG32(mmCB_CGTT_SCLK_CTRL);
5535                 RREG32(mmCB_CGTT_SCLK_CTRL);
5536                 RREG32(mmCB_CGTT_SCLK_CTRL);
5537                 RREG32(mmCB_CGTT_SCLK_CTRL);
5538
5539                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5540                 gfx_v8_0_wait_for_rlc_serdes(adev);
5541
5542                 /* write cmd to Set CGCG Overrride */
5543                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5544
5545                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5546                 gfx_v8_0_wait_for_rlc_serdes(adev);
5547
5548                 /* write cmd to Clear CGLS */
5549                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5550
5551                 /* disable cgcg, cgls should be disabled too. */
5552                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5553                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5554                 if (temp != data)
5555                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5556         }
5557
5558         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5559 }
5560 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5561                                             bool enable)
5562 {
5563         if (enable) {
5564                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5565                  * ===  MGCG + MGLS + TS(CG/LS) ===
5566                  */
5567                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5568                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5569         } else {
5570                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5571                  * ===  CGCG + CGLS ===
5572                  */
5573                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5574                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5575         }
5576         return 0;
5577 }
5578
5579 static int gfx_v8_0_set_clockgating_state(void *handle,
5580                                           enum amd_clockgating_state state)
5581 {
5582         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5583
5584         switch (adev->asic_type) {
5585         case CHIP_FIJI:
5586         case CHIP_CARRIZO:
5587         case CHIP_STONEY:
5588                 gfx_v8_0_update_gfx_clock_gating(adev,
5589                                                  state == AMD_CG_STATE_GATE ? true : false);
5590                 break;
5591         default:
5592                 break;
5593         }
5594         return 0;
5595 }
5596
5597 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5598 {
5599         u32 rptr;
5600
5601         rptr = ring->adev->wb.wb[ring->rptr_offs];
5602
5603         return rptr;
5604 }
5605
5606 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5607 {
5608         struct amdgpu_device *adev = ring->adev;
5609         u32 wptr;
5610
5611         if (ring->use_doorbell)
5612                 /* XXX check if swapping is necessary on BE */
5613                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5614         else
5615                 wptr = RREG32(mmCP_RB0_WPTR);
5616
5617         return wptr;
5618 }
5619
5620 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5621 {
5622         struct amdgpu_device *adev = ring->adev;
5623
5624         if (ring->use_doorbell) {
5625                 /* XXX check if swapping is necessary on BE */
5626                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5627                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5628         } else {
5629                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5630                 (void)RREG32(mmCP_RB0_WPTR);
5631         }
5632 }
5633
5634 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5635 {
5636         u32 ref_and_mask, reg_mem_engine;
5637
5638         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5639                 switch (ring->me) {
5640                 case 1:
5641                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5642                         break;
5643                 case 2:
5644                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5645                         break;
5646                 default:
5647                         return;
5648                 }
5649                 reg_mem_engine = 0;
5650         } else {
5651                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5652                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5653         }
5654
5655         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5656         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5657                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5658                                  reg_mem_engine));
5659         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5660         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5661         amdgpu_ring_write(ring, ref_and_mask);
5662         amdgpu_ring_write(ring, ref_and_mask);
5663         amdgpu_ring_write(ring, 0x20); /* poll interval */
5664 }
5665
5666 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5667 {
5668         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5669         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5670                                  WRITE_DATA_DST_SEL(0) |
5671                                  WR_CONFIRM));
5672         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5673         amdgpu_ring_write(ring, 0);
5674         amdgpu_ring_write(ring, 1);
5675
5676 }
5677
5678 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5679                                       struct amdgpu_ib *ib,
5680                                       unsigned vm_id, bool ctx_switch)
5681 {
5682         u32 header, control = 0;
5683         u32 next_rptr = ring->wptr + 5;
5684
5685         if (ctx_switch)
5686                 next_rptr += 2;
5687
5688         next_rptr += 4;
5689         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5690         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5691         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5692         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5693         amdgpu_ring_write(ring, next_rptr);
5694
5695         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5696         if (ctx_switch) {
5697                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5698                 amdgpu_ring_write(ring, 0);
5699         }
5700
5701         if (ib->flags & AMDGPU_IB_FLAG_CE)
5702                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5703         else
5704                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5705
5706         control |= ib->length_dw | (vm_id << 24);
5707
5708         amdgpu_ring_write(ring, header);
5709         amdgpu_ring_write(ring,
5710 #ifdef __BIG_ENDIAN
5711                           (2 << 0) |
5712 #endif
5713                           (ib->gpu_addr & 0xFFFFFFFC));
5714         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5715         amdgpu_ring_write(ring, control);
5716 }
5717
5718 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5719                                           struct amdgpu_ib *ib,
5720                                           unsigned vm_id, bool ctx_switch)
5721 {
5722         u32 header, control = 0;
5723         u32 next_rptr = ring->wptr + 5;
5724
5725         control |= INDIRECT_BUFFER_VALID;
5726
5727         next_rptr += 4;
5728         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5729         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5730         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5731         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5732         amdgpu_ring_write(ring, next_rptr);
5733
5734         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5735
5736         control |= ib->length_dw | (vm_id << 24);
5737
5738         amdgpu_ring_write(ring, header);
5739         amdgpu_ring_write(ring,
5740 #ifdef __BIG_ENDIAN
5741                                           (2 << 0) |
5742 #endif
5743                                           (ib->gpu_addr & 0xFFFFFFFC));
5744         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5745         amdgpu_ring_write(ring, control);
5746 }
5747
5748 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5749                                          u64 seq, unsigned flags)
5750 {
5751         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5752         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5753
5754         /* EVENT_WRITE_EOP - flush caches, send int */
5755         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5756         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5757                                  EOP_TC_ACTION_EN |
5758                                  EOP_TC_WB_ACTION_EN |
5759                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5760                                  EVENT_INDEX(5)));
5761         amdgpu_ring_write(ring, addr & 0xfffffffc);
5762         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5763                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5764         amdgpu_ring_write(ring, lower_32_bits(seq));
5765         amdgpu_ring_write(ring, upper_32_bits(seq));
5766
5767 }
5768
5769 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5770 {
5771         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5772         uint32_t seq = ring->fence_drv.sync_seq;
5773         uint64_t addr = ring->fence_drv.gpu_addr;
5774
5775         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5776         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5777                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
5778                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5779         amdgpu_ring_write(ring, addr & 0xfffffffc);
5780         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5781         amdgpu_ring_write(ring, seq);
5782         amdgpu_ring_write(ring, 0xffffffff);
5783         amdgpu_ring_write(ring, 4); /* poll interval */
5784
5785         if (usepfp) {
5786                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5787                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5788                 amdgpu_ring_write(ring, 0);
5789                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5790                 amdgpu_ring_write(ring, 0);
5791         }
5792 }
5793
5794 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5795                                         unsigned vm_id, uint64_t pd_addr)
5796 {
5797         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5798
5799         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5800         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5801                                  WRITE_DATA_DST_SEL(0)) |
5802                                  WR_CONFIRM);
5803         if (vm_id < 8) {
5804                 amdgpu_ring_write(ring,
5805                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5806         } else {
5807                 amdgpu_ring_write(ring,
5808                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5809         }
5810         amdgpu_ring_write(ring, 0);
5811         amdgpu_ring_write(ring, pd_addr >> 12);
5812
5813         /* bits 0-15 are the VM contexts0-15 */
5814         /* invalidate the cache */
5815         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5816         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5817                                  WRITE_DATA_DST_SEL(0)));
5818         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5819         amdgpu_ring_write(ring, 0);
5820         amdgpu_ring_write(ring, 1 << vm_id);
5821
5822         /* wait for the invalidate to complete */
5823         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5824         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5825                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5826                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5827         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5828         amdgpu_ring_write(ring, 0);
5829         amdgpu_ring_write(ring, 0); /* ref */
5830         amdgpu_ring_write(ring, 0); /* mask */
5831         amdgpu_ring_write(ring, 0x20); /* poll interval */
5832
5833         /* compute doesn't have PFP */
5834         if (usepfp) {
5835                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5836                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5837                 amdgpu_ring_write(ring, 0x0);
5838                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5839                 amdgpu_ring_write(ring, 0);
5840                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5841                 amdgpu_ring_write(ring, 0);
5842         }
5843 }
5844
5845 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5846 {
5847         return ring->adev->wb.wb[ring->rptr_offs];
5848 }
5849
5850 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5851 {
5852         return ring->adev->wb.wb[ring->wptr_offs];
5853 }
5854
5855 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5856 {
5857         struct amdgpu_device *adev = ring->adev;
5858
5859         /* XXX check if swapping is necessary on BE */
5860         adev->wb.wb[ring->wptr_offs] = ring->wptr;
5861         WDOORBELL32(ring->doorbell_index, ring->wptr);
5862 }
5863
5864 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5865                                              u64 addr, u64 seq,
5866                                              unsigned flags)
5867 {
5868         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5869         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5870
5871         /* RELEASE_MEM - flush caches, send int */
5872         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5873         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5874                                  EOP_TC_ACTION_EN |
5875                                  EOP_TC_WB_ACTION_EN |
5876                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5877                                  EVENT_INDEX(5)));
5878         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5879         amdgpu_ring_write(ring, addr & 0xfffffffc);
5880         amdgpu_ring_write(ring, upper_32_bits(addr));
5881         amdgpu_ring_write(ring, lower_32_bits(seq));
5882         amdgpu_ring_write(ring, upper_32_bits(seq));
5883 }
5884
5885 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5886                                                  enum amdgpu_interrupt_state state)
5887 {
5888         u32 cp_int_cntl;
5889
5890         switch (state) {
5891         case AMDGPU_IRQ_STATE_DISABLE:
5892                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5893                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5894                                             TIME_STAMP_INT_ENABLE, 0);
5895                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5896                 break;
5897         case AMDGPU_IRQ_STATE_ENABLE:
5898                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5899                 cp_int_cntl =
5900                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5901                                       TIME_STAMP_INT_ENABLE, 1);
5902                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5903                 break;
5904         default:
5905                 break;
5906         }
5907 }
5908
5909 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5910                                                      int me, int pipe,
5911                                                      enum amdgpu_interrupt_state state)
5912 {
5913         u32 mec_int_cntl, mec_int_cntl_reg;
5914
5915         /*
5916          * amdgpu controls only pipe 0 of MEC1. That's why this function only
5917          * handles the setting of interrupts for this specific pipe. All other
5918          * pipes' interrupts are set by amdkfd.
5919          */
5920
5921         if (me == 1) {
5922                 switch (pipe) {
5923                 case 0:
5924                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5925                         break;
5926                 default:
5927                         DRM_DEBUG("invalid pipe %d\n", pipe);
5928                         return;
5929                 }
5930         } else {
5931                 DRM_DEBUG("invalid me %d\n", me);
5932                 return;
5933         }
5934
5935         switch (state) {
5936         case AMDGPU_IRQ_STATE_DISABLE:
5937                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5938                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5939                                              TIME_STAMP_INT_ENABLE, 0);
5940                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5941                 break;
5942         case AMDGPU_IRQ_STATE_ENABLE:
5943                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5944                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5945                                              TIME_STAMP_INT_ENABLE, 1);
5946                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5947                 break;
5948         default:
5949                 break;
5950         }
5951 }
5952
5953 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5954                                              struct amdgpu_irq_src *source,
5955                                              unsigned type,
5956                                              enum amdgpu_interrupt_state state)
5957 {
5958         u32 cp_int_cntl;
5959
5960         switch (state) {
5961         case AMDGPU_IRQ_STATE_DISABLE:
5962                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5963                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5964                                             PRIV_REG_INT_ENABLE, 0);
5965                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5966                 break;
5967         case AMDGPU_IRQ_STATE_ENABLE:
5968                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5969                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5970                                             PRIV_REG_INT_ENABLE, 1);
5971                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5972                 break;
5973         default:
5974                 break;
5975         }
5976
5977         return 0;
5978 }
5979
5980 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5981                                               struct amdgpu_irq_src *source,
5982                                               unsigned type,
5983                                               enum amdgpu_interrupt_state state)
5984 {
5985         u32 cp_int_cntl;
5986
5987         switch (state) {
5988         case AMDGPU_IRQ_STATE_DISABLE:
5989                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5990                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5991                                             PRIV_INSTR_INT_ENABLE, 0);
5992                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5993                 break;
5994         case AMDGPU_IRQ_STATE_ENABLE:
5995                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5996                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5997                                             PRIV_INSTR_INT_ENABLE, 1);
5998                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5999                 break;
6000         default:
6001                 break;
6002         }
6003
6004         return 0;
6005 }
6006
6007 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6008                                             struct amdgpu_irq_src *src,
6009                                             unsigned type,
6010                                             enum amdgpu_interrupt_state state)
6011 {
6012         switch (type) {
6013         case AMDGPU_CP_IRQ_GFX_EOP:
6014                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6015                 break;
6016         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6017                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6018                 break;
6019         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6020                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6021                 break;
6022         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6023                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6024                 break;
6025         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6026                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6027                 break;
6028         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6029                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6030                 break;
6031         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6032                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6033                 break;
6034         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6035                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6036                 break;
6037         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6038                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6039                 break;
6040         default:
6041                 break;
6042         }
6043         return 0;
6044 }
6045
6046 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6047                             struct amdgpu_irq_src *source,
6048                             struct amdgpu_iv_entry *entry)
6049 {
6050         int i;
6051         u8 me_id, pipe_id, queue_id;
6052         struct amdgpu_ring *ring;
6053
6054         DRM_DEBUG("IH: CP EOP\n");
6055         me_id = (entry->ring_id & 0x0c) >> 2;
6056         pipe_id = (entry->ring_id & 0x03) >> 0;
6057         queue_id = (entry->ring_id & 0x70) >> 4;
6058
6059         switch (me_id) {
6060         case 0:
6061                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6062                 break;
6063         case 1:
6064         case 2:
6065                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6066                         ring = &adev->gfx.compute_ring[i];
6067                         /* Per-queue interrupt is supported for MEC starting from VI.
6068                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6069                           */
6070                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6071                                 amdgpu_fence_process(ring);
6072                 }
6073                 break;
6074         }
6075         return 0;
6076 }
6077
6078 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6079                                  struct amdgpu_irq_src *source,
6080                                  struct amdgpu_iv_entry *entry)
6081 {
6082         DRM_ERROR("Illegal register access in command stream\n");
6083         schedule_work(&adev->reset_work);
6084         return 0;
6085 }
6086
6087 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6088                                   struct amdgpu_irq_src *source,
6089                                   struct amdgpu_iv_entry *entry)
6090 {
6091         DRM_ERROR("Illegal instruction in command stream\n");
6092         schedule_work(&adev->reset_work);
6093         return 0;
6094 }
6095
6096 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6097         .name = "gfx_v8_0",
6098         .early_init = gfx_v8_0_early_init,
6099         .late_init = gfx_v8_0_late_init,
6100         .sw_init = gfx_v8_0_sw_init,
6101         .sw_fini = gfx_v8_0_sw_fini,
6102         .hw_init = gfx_v8_0_hw_init,
6103         .hw_fini = gfx_v8_0_hw_fini,
6104         .suspend = gfx_v8_0_suspend,
6105         .resume = gfx_v8_0_resume,
6106         .is_idle = gfx_v8_0_is_idle,
6107         .wait_for_idle = gfx_v8_0_wait_for_idle,
6108         .soft_reset = gfx_v8_0_soft_reset,
6109         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6110         .set_powergating_state = gfx_v8_0_set_powergating_state,
6111 };
6112
6113 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6114         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6115         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6116         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6117         .parse_cs = NULL,
6118         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6119         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6120         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6121         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6122         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6123         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6124         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6125         .test_ring = gfx_v8_0_ring_test_ring,
6126         .test_ib = gfx_v8_0_ring_test_ib,
6127         .insert_nop = amdgpu_ring_insert_nop,
6128         .pad_ib = amdgpu_ring_generic_pad_ib,
6129 };
6130
6131 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6132         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6133         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6134         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6135         .parse_cs = NULL,
6136         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6137         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6138         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6139         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6140         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6141         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6142         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6143         .test_ring = gfx_v8_0_ring_test_ring,
6144         .test_ib = gfx_v8_0_ring_test_ib,
6145         .insert_nop = amdgpu_ring_insert_nop,
6146         .pad_ib = amdgpu_ring_generic_pad_ib,
6147 };
6148
6149 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6150 {
6151         int i;
6152
6153         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6154                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6155
6156         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6157                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6158 }
6159
6160 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6161         .set = gfx_v8_0_set_eop_interrupt_state,
6162         .process = gfx_v8_0_eop_irq,
6163 };
6164
6165 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6166         .set = gfx_v8_0_set_priv_reg_fault_state,
6167         .process = gfx_v8_0_priv_reg_irq,
6168 };
6169
6170 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6171         .set = gfx_v8_0_set_priv_inst_fault_state,
6172         .process = gfx_v8_0_priv_inst_irq,
6173 };
6174
6175 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6176 {
6177         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6178         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6179
6180         adev->gfx.priv_reg_irq.num_types = 1;
6181         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6182
6183         adev->gfx.priv_inst_irq.num_types = 1;
6184         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6185 }
6186
6187 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6188 {
6189         switch (adev->asic_type) {
6190         case CHIP_TOPAZ:
6191         case CHIP_STONEY:
6192                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6193                 break;
6194         case CHIP_CARRIZO:
6195                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6196                 break;
6197         default:
6198                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6199                 break;
6200         }
6201 }
6202
6203 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6204 {
6205         /* init asci gds info */
6206         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6207         adev->gds.gws.total_size = 64;
6208         adev->gds.oa.total_size = 16;
6209
6210         if (adev->gds.mem.total_size == 64 * 1024) {
6211                 adev->gds.mem.gfx_partition_size = 4096;
6212                 adev->gds.mem.cs_partition_size = 4096;
6213
6214                 adev->gds.gws.gfx_partition_size = 4;
6215                 adev->gds.gws.cs_partition_size = 4;
6216
6217                 adev->gds.oa.gfx_partition_size = 4;
6218                 adev->gds.oa.cs_partition_size = 1;
6219         } else {
6220                 adev->gds.mem.gfx_partition_size = 1024;
6221                 adev->gds.mem.cs_partition_size = 1024;
6222
6223                 adev->gds.gws.gfx_partition_size = 16;
6224                 adev->gds.gws.cs_partition_size = 16;
6225
6226                 adev->gds.oa.gfx_partition_size = 4;
6227                 adev->gds.oa.cs_partition_size = 4;
6228         }
6229 }
6230
6231 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6232 {
6233         u32 data, mask;
6234
6235         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6236         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6237
6238         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6239         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6240
6241         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6242
6243         return (~data) & mask;
6244 }
6245
6246 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6247 {
6248         int i, j, k, counter, active_cu_number = 0;
6249         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6250         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6251
6252         memset(cu_info, 0, sizeof(*cu_info));
6253
6254         mutex_lock(&adev->grbm_idx_mutex);
6255         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6256                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6257                         mask = 1;
6258                         ao_bitmap = 0;
6259                         counter = 0;
6260                         gfx_v8_0_select_se_sh(adev, i, j);
6261                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6262                         cu_info->bitmap[i][j] = bitmap;
6263
6264                         for (k = 0; k < 16; k ++) {
6265                                 if (bitmap & mask) {
6266                                         if (counter < 2)
6267                                                 ao_bitmap |= mask;
6268                                         counter ++;
6269                                 }
6270                                 mask <<= 1;
6271                         }
6272                         active_cu_number += counter;
6273                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6274                 }
6275         }
6276         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6277         mutex_unlock(&adev->grbm_idx_mutex);
6278
6279         cu_info->number = active_cu_number;
6280         cu_info->ao_cu_mask = ao_cu_mask;
6281 }