Merge branch 'x86/cpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into...
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49
50 #define GFX8_NUM_GFX_RINGS     1
51 #define GFX8_NUM_COMPUTE_RINGS 8
52
53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
55 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
57
58 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
59 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
60 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
61 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
62 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
63 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
64 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
65 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
66 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
67
68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
74
75 /* BPM SERDES CMD */
76 #define SET_BPM_SERDES_CMD    1
77 #define CLE_BPM_SERDES_CMD    0
78
79 /* BPM Register Address*/
80 enum {
81         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
82         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
83         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
84         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
85         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
86         BPM_REG_FGCG_MAX
87 };
88
89 #define RLC_FormatDirectRegListLength        14
90
91 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
97
98 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
110
111 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
123
124 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
125 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
126 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
130
131 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
137
138 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
139 {
140         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
141         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
142         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
143         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
144         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
145         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
146         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
147         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
148         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
149         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
150         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
151         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
152         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
153         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
154         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
155         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
156 };
157
158 static const u32 golden_settings_tonga_a11[] =
159 {
160         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
161         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
162         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
163         mmGB_GPU_ID, 0x0000000f, 0x00000000,
164         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
165         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
166         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
167         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
168         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
169         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
170         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
171         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
172         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
173         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
174         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
175 };
176
177 static const u32 tonga_golden_common_all[] =
178 {
179         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
181         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
182         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
183         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
184         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
185         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
186         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
187 };
188
189 static const u32 tonga_mgcg_cgcg_init[] =
190 {
191         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
192         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
193         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
194         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
195         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
198         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
200         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
202         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
203         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
209         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
210         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
213         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
214         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
216         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
217         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
218         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
219         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
220         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
222         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
225         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
230         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
235         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
240         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
245         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
250         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
255         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
260         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
263         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
264         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
265         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
266 };
267
268 static const u32 golden_settings_polaris11_a11[] =
269 {
270         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
271         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
272         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
273         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
274         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
275         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
276         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
277         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
278         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
279         mmSQ_CONFIG, 0x07f80000, 0x07180000,
280         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
281         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
282         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
283         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
284         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
285 };
286
287 static const u32 polaris11_golden_common_all[] =
288 {
289         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
290         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
291         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
292         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
293         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
294         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
295 };
296
297 static const u32 golden_settings_polaris10_a11[] =
298 {
299         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
300         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
301         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
302         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
303         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
304         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
305         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
306         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
307         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
308         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
309         mmSQ_CONFIG, 0x07f80000, 0x07180000,
310         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
311         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
312         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
313         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
314 };
315
316 static const u32 polaris10_golden_common_all[] =
317 {
318         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
320         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
321         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
322         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
323         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
324         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
325         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
326 };
327
328 static const u32 fiji_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
332         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
333         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
334         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
335         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
336         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
337         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
338         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
339         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
340 };
341
342 static const u32 golden_settings_fiji_a10[] =
343 {
344         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
345         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
346         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
347         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
350         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
351         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
352         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
354         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
355 };
356
357 static const u32 fiji_mgcg_cgcg_init[] =
358 {
359         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
360         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
366         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
368         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
370         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
377         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
378         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
379         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
380         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
381         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
385         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
386         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
387         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
391         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
392         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
393         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
394 };
395
396 static const u32 golden_settings_iceland_a11[] =
397 {
398         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
399         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
400         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
401         mmGB_GPU_ID, 0x0000000f, 0x00000000,
402         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
403         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
404         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
405         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
406         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
407         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
408         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
409         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
410         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
411         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
412         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
413 };
414
415 static const u32 iceland_golden_common_all[] =
416 {
417         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
418         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
419         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
420         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
421         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
422         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
423         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
424         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
425 };
426
427 static const u32 iceland_mgcg_cgcg_init[] =
428 {
429         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
430         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
431         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
432         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
434         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
435         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
436         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
438         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
440         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
451         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
452         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
455         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
456         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
459         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
460         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
461         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
462         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
463         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
464         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
465         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
466         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
467         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
468         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
469         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
470         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
471         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
472         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
473         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
474         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
475         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
476         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
477         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
478         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
479         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
480         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
483         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
488         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
491         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
492         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
493 };
494
495 static const u32 cz_golden_settings_a11[] =
496 {
497         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
498         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
499         mmGB_GPU_ID, 0x0000000f, 0x00000000,
500         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
501         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
502         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
503         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
504         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
505         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
506         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
507 };
508
509 static const u32 cz_golden_common_all[] =
510 {
511         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
512         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
513         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
514         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
515         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
516         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
517         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
518         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
519 };
520
521 static const u32 cz_mgcg_cgcg_init[] =
522 {
523         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
524         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
525         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
526         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
527         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
530         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
532         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
534         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
535         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
541         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
542         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
543         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
544         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
545         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
548         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
549         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
550         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
551         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
552         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
553         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
557         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
560         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
561         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
562         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
563         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
564         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
567         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
572         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
577         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
582         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
587         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
592         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
595         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
596         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
597         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
598 };
599
600 static const u32 stoney_golden_settings_a11[] =
601 {
602         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
603         mmGB_GPU_ID, 0x0000000f, 0x00000000,
604         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
605         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
606         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
607         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
608         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
609         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
610         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
611         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
612 };
613
614 static const u32 stoney_golden_common_all[] =
615 {
616         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
617         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
618         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
619         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
620         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
621         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
622         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
623         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
624 };
625
626 static const u32 stoney_mgcg_cgcg_init[] =
627 {
628         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
630         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
631         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
632         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
633         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
634 };
635
636 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
637 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
638 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
639 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
640 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
641 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
642
643 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
644 {
645         switch (adev->asic_type) {
646         case CHIP_TOPAZ:
647                 amdgpu_program_register_sequence(adev,
648                                                  iceland_mgcg_cgcg_init,
649                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
650                 amdgpu_program_register_sequence(adev,
651                                                  golden_settings_iceland_a11,
652                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
653                 amdgpu_program_register_sequence(adev,
654                                                  iceland_golden_common_all,
655                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
656                 break;
657         case CHIP_FIJI:
658                 amdgpu_program_register_sequence(adev,
659                                                  fiji_mgcg_cgcg_init,
660                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
661                 amdgpu_program_register_sequence(adev,
662                                                  golden_settings_fiji_a10,
663                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
664                 amdgpu_program_register_sequence(adev,
665                                                  fiji_golden_common_all,
666                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
667                 break;
668
669         case CHIP_TONGA:
670                 amdgpu_program_register_sequence(adev,
671                                                  tonga_mgcg_cgcg_init,
672                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
673                 amdgpu_program_register_sequence(adev,
674                                                  golden_settings_tonga_a11,
675                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
676                 amdgpu_program_register_sequence(adev,
677                                                  tonga_golden_common_all,
678                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
679                 break;
680         case CHIP_POLARIS11:
681                 amdgpu_program_register_sequence(adev,
682                                                  golden_settings_polaris11_a11,
683                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
684                 amdgpu_program_register_sequence(adev,
685                                                  polaris11_golden_common_all,
686                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
687                 break;
688         case CHIP_POLARIS10:
689                 amdgpu_program_register_sequence(adev,
690                                                  golden_settings_polaris10_a11,
691                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
692                 amdgpu_program_register_sequence(adev,
693                                                  polaris10_golden_common_all,
694                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
695                 break;
696         case CHIP_CARRIZO:
697                 amdgpu_program_register_sequence(adev,
698                                                  cz_mgcg_cgcg_init,
699                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
700                 amdgpu_program_register_sequence(adev,
701                                                  cz_golden_settings_a11,
702                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
703                 amdgpu_program_register_sequence(adev,
704                                                  cz_golden_common_all,
705                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
706                 break;
707         case CHIP_STONEY:
708                 amdgpu_program_register_sequence(adev,
709                                                  stoney_mgcg_cgcg_init,
710                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
711                 amdgpu_program_register_sequence(adev,
712                                                  stoney_golden_settings_a11,
713                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
714                 amdgpu_program_register_sequence(adev,
715                                                  stoney_golden_common_all,
716                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
717                 break;
718         default:
719                 break;
720         }
721 }
722
723 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
724 {
725         int i;
726
727         adev->gfx.scratch.num_reg = 7;
728         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
729         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
730                 adev->gfx.scratch.free[i] = true;
731                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
732         }
733 }
734
735 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
736 {
737         struct amdgpu_device *adev = ring->adev;
738         uint32_t scratch;
739         uint32_t tmp = 0;
740         unsigned i;
741         int r;
742
743         r = amdgpu_gfx_scratch_get(adev, &scratch);
744         if (r) {
745                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
746                 return r;
747         }
748         WREG32(scratch, 0xCAFEDEAD);
749         r = amdgpu_ring_alloc(ring, 3);
750         if (r) {
751                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
752                           ring->idx, r);
753                 amdgpu_gfx_scratch_free(adev, scratch);
754                 return r;
755         }
756         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
757         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
758         amdgpu_ring_write(ring, 0xDEADBEEF);
759         amdgpu_ring_commit(ring);
760
761         for (i = 0; i < adev->usec_timeout; i++) {
762                 tmp = RREG32(scratch);
763                 if (tmp == 0xDEADBEEF)
764                         break;
765                 DRM_UDELAY(1);
766         }
767         if (i < adev->usec_timeout) {
768                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
769                          ring->idx, i);
770         } else {
771                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
772                           ring->idx, scratch, tmp);
773                 r = -EINVAL;
774         }
775         amdgpu_gfx_scratch_free(adev, scratch);
776         return r;
777 }
778
779 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
780 {
781         struct amdgpu_device *adev = ring->adev;
782         struct amdgpu_ib ib;
783         struct fence *f = NULL;
784         uint32_t scratch;
785         uint32_t tmp = 0;
786         unsigned i;
787         int r;
788
789         r = amdgpu_gfx_scratch_get(adev, &scratch);
790         if (r) {
791                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
792                 return r;
793         }
794         WREG32(scratch, 0xCAFEDEAD);
795         memset(&ib, 0, sizeof(ib));
796         r = amdgpu_ib_get(adev, NULL, 256, &ib);
797         if (r) {
798                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
799                 goto err1;
800         }
801         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
802         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
803         ib.ptr[2] = 0xDEADBEEF;
804         ib.length_dw = 3;
805
806         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
807         if (r)
808                 goto err2;
809
810         r = fence_wait(f, false);
811         if (r) {
812                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
813                 goto err2;
814         }
815         for (i = 0; i < adev->usec_timeout; i++) {
816                 tmp = RREG32(scratch);
817                 if (tmp == 0xDEADBEEF)
818                         break;
819                 DRM_UDELAY(1);
820         }
821         if (i < adev->usec_timeout) {
822                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
823                          ring->idx, i);
824                 goto err2;
825         } else {
826                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
827                           scratch, tmp);
828                 r = -EINVAL;
829         }
830 err2:
831         fence_put(f);
832         amdgpu_ib_free(adev, &ib, NULL);
833         fence_put(f);
834 err1:
835         amdgpu_gfx_scratch_free(adev, scratch);
836         return r;
837 }
838
839
840 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
841         release_firmware(adev->gfx.pfp_fw);
842         adev->gfx.pfp_fw = NULL;
843         release_firmware(adev->gfx.me_fw);
844         adev->gfx.me_fw = NULL;
845         release_firmware(adev->gfx.ce_fw);
846         adev->gfx.ce_fw = NULL;
847         release_firmware(adev->gfx.rlc_fw);
848         adev->gfx.rlc_fw = NULL;
849         release_firmware(adev->gfx.mec_fw);
850         adev->gfx.mec_fw = NULL;
851         if ((adev->asic_type != CHIP_STONEY) &&
852             (adev->asic_type != CHIP_TOPAZ))
853                 release_firmware(adev->gfx.mec2_fw);
854         adev->gfx.mec2_fw = NULL;
855
856         kfree(adev->gfx.rlc.register_list_format);
857 }
858
859 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
860 {
861         const char *chip_name;
862         char fw_name[30];
863         int err;
864         struct amdgpu_firmware_info *info = NULL;
865         const struct common_firmware_header *header = NULL;
866         const struct gfx_firmware_header_v1_0 *cp_hdr;
867         const struct rlc_firmware_header_v2_0 *rlc_hdr;
868         unsigned int *tmp = NULL, i;
869
870         DRM_DEBUG("\n");
871
872         switch (adev->asic_type) {
873         case CHIP_TOPAZ:
874                 chip_name = "topaz";
875                 break;
876         case CHIP_TONGA:
877                 chip_name = "tonga";
878                 break;
879         case CHIP_CARRIZO:
880                 chip_name = "carrizo";
881                 break;
882         case CHIP_FIJI:
883                 chip_name = "fiji";
884                 break;
885         case CHIP_POLARIS11:
886                 chip_name = "polaris11";
887                 break;
888         case CHIP_POLARIS10:
889                 chip_name = "polaris10";
890                 break;
891         case CHIP_STONEY:
892                 chip_name = "stoney";
893                 break;
894         default:
895                 BUG();
896         }
897
898         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
899         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
900         if (err)
901                 goto out;
902         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
903         if (err)
904                 goto out;
905         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
906         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
907         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
908
909         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
910         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
911         if (err)
912                 goto out;
913         err = amdgpu_ucode_validate(adev->gfx.me_fw);
914         if (err)
915                 goto out;
916         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
917         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
918         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
919
920         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
921         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
922         if (err)
923                 goto out;
924         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
925         if (err)
926                 goto out;
927         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
928         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
929         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
930
931         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
932         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
933         if (err)
934                 goto out;
935         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
936         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
937         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
938         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
939
940         adev->gfx.rlc.save_and_restore_offset =
941                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
942         adev->gfx.rlc.clear_state_descriptor_offset =
943                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
944         adev->gfx.rlc.avail_scratch_ram_locations =
945                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
946         adev->gfx.rlc.reg_restore_list_size =
947                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
948         adev->gfx.rlc.reg_list_format_start =
949                         le32_to_cpu(rlc_hdr->reg_list_format_start);
950         adev->gfx.rlc.reg_list_format_separate_start =
951                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
952         adev->gfx.rlc.starting_offsets_start =
953                         le32_to_cpu(rlc_hdr->starting_offsets_start);
954         adev->gfx.rlc.reg_list_format_size_bytes =
955                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
956         adev->gfx.rlc.reg_list_size_bytes =
957                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
958
959         adev->gfx.rlc.register_list_format =
960                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
961                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
962
963         if (!adev->gfx.rlc.register_list_format) {
964                 err = -ENOMEM;
965                 goto out;
966         }
967
968         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
969                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
970         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
971                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
972
973         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
974
975         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
976                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
977         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
978                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
979
980         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
981         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
982         if (err)
983                 goto out;
984         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
985         if (err)
986                 goto out;
987         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
988         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
989         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
990
991         if ((adev->asic_type != CHIP_STONEY) &&
992             (adev->asic_type != CHIP_TOPAZ)) {
993                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
994                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
995                 if (!err) {
996                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
997                         if (err)
998                                 goto out;
999                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1000                                 adev->gfx.mec2_fw->data;
1001                         adev->gfx.mec2_fw_version =
1002                                 le32_to_cpu(cp_hdr->header.ucode_version);
1003                         adev->gfx.mec2_feature_version =
1004                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1005                 } else {
1006                         err = 0;
1007                         adev->gfx.mec2_fw = NULL;
1008                 }
1009         }
1010
1011         if (adev->firmware.smu_load) {
1012                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1013                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1014                 info->fw = adev->gfx.pfp_fw;
1015                 header = (const struct common_firmware_header *)info->fw->data;
1016                 adev->firmware.fw_size +=
1017                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1018
1019                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1020                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1021                 info->fw = adev->gfx.me_fw;
1022                 header = (const struct common_firmware_header *)info->fw->data;
1023                 adev->firmware.fw_size +=
1024                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1025
1026                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1027                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1028                 info->fw = adev->gfx.ce_fw;
1029                 header = (const struct common_firmware_header *)info->fw->data;
1030                 adev->firmware.fw_size +=
1031                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032
1033                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1034                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1035                 info->fw = adev->gfx.rlc_fw;
1036                 header = (const struct common_firmware_header *)info->fw->data;
1037                 adev->firmware.fw_size +=
1038                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039
1040                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1041                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1042                 info->fw = adev->gfx.mec_fw;
1043                 header = (const struct common_firmware_header *)info->fw->data;
1044                 adev->firmware.fw_size +=
1045                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046
1047                 if (adev->gfx.mec2_fw) {
1048                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1049                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1050                         info->fw = adev->gfx.mec2_fw;
1051                         header = (const struct common_firmware_header *)info->fw->data;
1052                         adev->firmware.fw_size +=
1053                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054                 }
1055
1056         }
1057
1058 out:
1059         if (err) {
1060                 dev_err(adev->dev,
1061                         "gfx8: Failed to load firmware \"%s\"\n",
1062                         fw_name);
1063                 release_firmware(adev->gfx.pfp_fw);
1064                 adev->gfx.pfp_fw = NULL;
1065                 release_firmware(adev->gfx.me_fw);
1066                 adev->gfx.me_fw = NULL;
1067                 release_firmware(adev->gfx.ce_fw);
1068                 adev->gfx.ce_fw = NULL;
1069                 release_firmware(adev->gfx.rlc_fw);
1070                 adev->gfx.rlc_fw = NULL;
1071                 release_firmware(adev->gfx.mec_fw);
1072                 adev->gfx.mec_fw = NULL;
1073                 release_firmware(adev->gfx.mec2_fw);
1074                 adev->gfx.mec2_fw = NULL;
1075         }
1076         return err;
1077 }
1078
1079 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1080                                     volatile u32 *buffer)
1081 {
1082         u32 count = 0, i;
1083         const struct cs_section_def *sect = NULL;
1084         const struct cs_extent_def *ext = NULL;
1085
1086         if (adev->gfx.rlc.cs_data == NULL)
1087                 return;
1088         if (buffer == NULL)
1089                 return;
1090
1091         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1092         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1093
1094         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1095         buffer[count++] = cpu_to_le32(0x80000000);
1096         buffer[count++] = cpu_to_le32(0x80000000);
1097
1098         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1099                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1100                         if (sect->id == SECT_CONTEXT) {
1101                                 buffer[count++] =
1102                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1103                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1104                                                 PACKET3_SET_CONTEXT_REG_START);
1105                                 for (i = 0; i < ext->reg_count; i++)
1106                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1107                         } else {
1108                                 return;
1109                         }
1110                 }
1111         }
1112
1113         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1114         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1115                         PACKET3_SET_CONTEXT_REG_START);
1116         switch (adev->asic_type) {
1117         case CHIP_TONGA:
1118         case CHIP_POLARIS10:
1119                 buffer[count++] = cpu_to_le32(0x16000012);
1120                 buffer[count++] = cpu_to_le32(0x0000002A);
1121                 break;
1122         case CHIP_POLARIS11:
1123                 buffer[count++] = cpu_to_le32(0x16000012);
1124                 buffer[count++] = cpu_to_le32(0x00000000);
1125                 break;
1126         case CHIP_FIJI:
1127                 buffer[count++] = cpu_to_le32(0x3a00161a);
1128                 buffer[count++] = cpu_to_le32(0x0000002e);
1129                 break;
1130         case CHIP_TOPAZ:
1131         case CHIP_CARRIZO:
1132                 buffer[count++] = cpu_to_le32(0x00000002);
1133                 buffer[count++] = cpu_to_le32(0x00000000);
1134                 break;
1135         case CHIP_STONEY:
1136                 buffer[count++] = cpu_to_le32(0x00000000);
1137                 buffer[count++] = cpu_to_le32(0x00000000);
1138                 break;
1139         default:
1140                 buffer[count++] = cpu_to_le32(0x00000000);
1141                 buffer[count++] = cpu_to_le32(0x00000000);
1142                 break;
1143         }
1144
1145         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1146         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1147
1148         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1149         buffer[count++] = cpu_to_le32(0);
1150 }
1151
1152 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1153 {
1154         int r;
1155
1156         /* clear state block */
1157         if (adev->gfx.rlc.clear_state_obj) {
1158                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1159                 if (unlikely(r != 0))
1160                         dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1161                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1162                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1163
1164                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1165                 adev->gfx.rlc.clear_state_obj = NULL;
1166         }
1167 }
1168
1169 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1170 {
1171         volatile u32 *dst_ptr;
1172         u32 dws;
1173         const struct cs_section_def *cs_data;
1174         int r;
1175
1176         adev->gfx.rlc.cs_data = vi_cs_data;
1177
1178         cs_data = adev->gfx.rlc.cs_data;
1179
1180         if (cs_data) {
1181                 /* clear state block */
1182                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1183
1184                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1185                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1186                                              AMDGPU_GEM_DOMAIN_VRAM,
1187                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1188                                              NULL, NULL,
1189                                              &adev->gfx.rlc.clear_state_obj);
1190                         if (r) {
1191                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1192                                 gfx_v8_0_rlc_fini(adev);
1193                                 return r;
1194                         }
1195                 }
1196                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1197                 if (unlikely(r != 0)) {
1198                         gfx_v8_0_rlc_fini(adev);
1199                         return r;
1200                 }
1201                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1202                                   &adev->gfx.rlc.clear_state_gpu_addr);
1203                 if (r) {
1204                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1205                         dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1206                         gfx_v8_0_rlc_fini(adev);
1207                         return r;
1208                 }
1209
1210                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1211                 if (r) {
1212                         dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1213                         gfx_v8_0_rlc_fini(adev);
1214                         return r;
1215                 }
1216                 /* set up the cs buffer */
1217                 dst_ptr = adev->gfx.rlc.cs_ptr;
1218                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1219                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1220                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1221         }
1222
1223         return 0;
1224 }
1225
1226 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1227 {
1228         int r;
1229
1230         if (adev->gfx.mec.hpd_eop_obj) {
1231                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1232                 if (unlikely(r != 0))
1233                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1234                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1235                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1236
1237                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1238                 adev->gfx.mec.hpd_eop_obj = NULL;
1239         }
1240 }
1241
1242 #define MEC_HPD_SIZE 2048
1243
1244 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1245 {
1246         int r;
1247         u32 *hpd;
1248
1249         /*
1250          * we assign only 1 pipe because all other pipes will
1251          * be handled by KFD
1252          */
1253         adev->gfx.mec.num_mec = 1;
1254         adev->gfx.mec.num_pipe = 1;
1255         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1256
1257         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1258                 r = amdgpu_bo_create(adev,
1259                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1260                                      PAGE_SIZE, true,
1261                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1262                                      &adev->gfx.mec.hpd_eop_obj);
1263                 if (r) {
1264                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1265                         return r;
1266                 }
1267         }
1268
1269         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1270         if (unlikely(r != 0)) {
1271                 gfx_v8_0_mec_fini(adev);
1272                 return r;
1273         }
1274         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1275                           &adev->gfx.mec.hpd_eop_gpu_addr);
1276         if (r) {
1277                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1278                 gfx_v8_0_mec_fini(adev);
1279                 return r;
1280         }
1281         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1282         if (r) {
1283                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1284                 gfx_v8_0_mec_fini(adev);
1285                 return r;
1286         }
1287
1288         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1289
1290         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1291         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1292
1293         return 0;
1294 }
1295
1296 static const u32 vgpr_init_compute_shader[] =
1297 {
1298         0x7e000209, 0x7e020208,
1299         0x7e040207, 0x7e060206,
1300         0x7e080205, 0x7e0a0204,
1301         0x7e0c0203, 0x7e0e0202,
1302         0x7e100201, 0x7e120200,
1303         0x7e140209, 0x7e160208,
1304         0x7e180207, 0x7e1a0206,
1305         0x7e1c0205, 0x7e1e0204,
1306         0x7e200203, 0x7e220202,
1307         0x7e240201, 0x7e260200,
1308         0x7e280209, 0x7e2a0208,
1309         0x7e2c0207, 0x7e2e0206,
1310         0x7e300205, 0x7e320204,
1311         0x7e340203, 0x7e360202,
1312         0x7e380201, 0x7e3a0200,
1313         0x7e3c0209, 0x7e3e0208,
1314         0x7e400207, 0x7e420206,
1315         0x7e440205, 0x7e460204,
1316         0x7e480203, 0x7e4a0202,
1317         0x7e4c0201, 0x7e4e0200,
1318         0x7e500209, 0x7e520208,
1319         0x7e540207, 0x7e560206,
1320         0x7e580205, 0x7e5a0204,
1321         0x7e5c0203, 0x7e5e0202,
1322         0x7e600201, 0x7e620200,
1323         0x7e640209, 0x7e660208,
1324         0x7e680207, 0x7e6a0206,
1325         0x7e6c0205, 0x7e6e0204,
1326         0x7e700203, 0x7e720202,
1327         0x7e740201, 0x7e760200,
1328         0x7e780209, 0x7e7a0208,
1329         0x7e7c0207, 0x7e7e0206,
1330         0xbf8a0000, 0xbf810000,
1331 };
1332
1333 static const u32 sgpr_init_compute_shader[] =
1334 {
1335         0xbe8a0100, 0xbe8c0102,
1336         0xbe8e0104, 0xbe900106,
1337         0xbe920108, 0xbe940100,
1338         0xbe960102, 0xbe980104,
1339         0xbe9a0106, 0xbe9c0108,
1340         0xbe9e0100, 0xbea00102,
1341         0xbea20104, 0xbea40106,
1342         0xbea60108, 0xbea80100,
1343         0xbeaa0102, 0xbeac0104,
1344         0xbeae0106, 0xbeb00108,
1345         0xbeb20100, 0xbeb40102,
1346         0xbeb60104, 0xbeb80106,
1347         0xbeba0108, 0xbebc0100,
1348         0xbebe0102, 0xbec00104,
1349         0xbec20106, 0xbec40108,
1350         0xbec60100, 0xbec80102,
1351         0xbee60004, 0xbee70005,
1352         0xbeea0006, 0xbeeb0007,
1353         0xbee80008, 0xbee90009,
1354         0xbefc0000, 0xbf8a0000,
1355         0xbf810000, 0x00000000,
1356 };
1357
1358 static const u32 vgpr_init_regs[] =
1359 {
1360         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1361         mmCOMPUTE_RESOURCE_LIMITS, 0,
1362         mmCOMPUTE_NUM_THREAD_X, 256*4,
1363         mmCOMPUTE_NUM_THREAD_Y, 1,
1364         mmCOMPUTE_NUM_THREAD_Z, 1,
1365         mmCOMPUTE_PGM_RSRC2, 20,
1366         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1367         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1368         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1369         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1370         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1371         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1372         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1373         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1374         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1375         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1376 };
1377
1378 static const u32 sgpr1_init_regs[] =
1379 {
1380         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1381         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1382         mmCOMPUTE_NUM_THREAD_X, 256*5,
1383         mmCOMPUTE_NUM_THREAD_Y, 1,
1384         mmCOMPUTE_NUM_THREAD_Z, 1,
1385         mmCOMPUTE_PGM_RSRC2, 20,
1386         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1387         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1388         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1389         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1390         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1391         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1392         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1393         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1394         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1395         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1396 };
1397
1398 static const u32 sgpr2_init_regs[] =
1399 {
1400         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1401         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1402         mmCOMPUTE_NUM_THREAD_X, 256*5,
1403         mmCOMPUTE_NUM_THREAD_Y, 1,
1404         mmCOMPUTE_NUM_THREAD_Z, 1,
1405         mmCOMPUTE_PGM_RSRC2, 20,
1406         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1407         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1408         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1409         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1410         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1411         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1412         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1413         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1414         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1415         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1416 };
1417
1418 static const u32 sec_ded_counter_registers[] =
1419 {
1420         mmCPC_EDC_ATC_CNT,
1421         mmCPC_EDC_SCRATCH_CNT,
1422         mmCPC_EDC_UCODE_CNT,
1423         mmCPF_EDC_ATC_CNT,
1424         mmCPF_EDC_ROQ_CNT,
1425         mmCPF_EDC_TAG_CNT,
1426         mmCPG_EDC_ATC_CNT,
1427         mmCPG_EDC_DMA_CNT,
1428         mmCPG_EDC_TAG_CNT,
1429         mmDC_EDC_CSINVOC_CNT,
1430         mmDC_EDC_RESTORE_CNT,
1431         mmDC_EDC_STATE_CNT,
1432         mmGDS_EDC_CNT,
1433         mmGDS_EDC_GRBM_CNT,
1434         mmGDS_EDC_OA_DED,
1435         mmSPI_EDC_CNT,
1436         mmSQC_ATC_EDC_GATCL1_CNT,
1437         mmSQC_EDC_CNT,
1438         mmSQ_EDC_DED_CNT,
1439         mmSQ_EDC_INFO,
1440         mmSQ_EDC_SEC_CNT,
1441         mmTCC_EDC_CNT,
1442         mmTCP_ATC_EDC_GATCL1_CNT,
1443         mmTCP_EDC_CNT,
1444         mmTD_EDC_CNT
1445 };
1446
1447 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1448 {
1449         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1450         struct amdgpu_ib ib;
1451         struct fence *f = NULL;
1452         int r, i;
1453         u32 tmp;
1454         unsigned total_size, vgpr_offset, sgpr_offset;
1455         u64 gpu_addr;
1456
1457         /* only supported on CZ */
1458         if (adev->asic_type != CHIP_CARRIZO)
1459                 return 0;
1460
1461         /* bail if the compute ring is not ready */
1462         if (!ring->ready)
1463                 return 0;
1464
1465         tmp = RREG32(mmGB_EDC_MODE);
1466         WREG32(mmGB_EDC_MODE, 0);
1467
1468         total_size =
1469                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1470         total_size +=
1471                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1472         total_size +=
1473                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1474         total_size = ALIGN(total_size, 256);
1475         vgpr_offset = total_size;
1476         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1477         sgpr_offset = total_size;
1478         total_size += sizeof(sgpr_init_compute_shader);
1479
1480         /* allocate an indirect buffer to put the commands in */
1481         memset(&ib, 0, sizeof(ib));
1482         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1483         if (r) {
1484                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1485                 return r;
1486         }
1487
1488         /* load the compute shaders */
1489         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1490                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1491
1492         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1493                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1494
1495         /* init the ib length to 0 */
1496         ib.length_dw = 0;
1497
1498         /* VGPR */
1499         /* write the register state for the compute dispatch */
1500         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1501                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1502                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1503                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1504         }
1505         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1506         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1507         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1508         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1509         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1510         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1511
1512         /* write dispatch packet */
1513         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1514         ib.ptr[ib.length_dw++] = 8; /* x */
1515         ib.ptr[ib.length_dw++] = 1; /* y */
1516         ib.ptr[ib.length_dw++] = 1; /* z */
1517         ib.ptr[ib.length_dw++] =
1518                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1519
1520         /* write CS partial flush packet */
1521         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1522         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1523
1524         /* SGPR1 */
1525         /* write the register state for the compute dispatch */
1526         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1527                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1528                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1529                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1530         }
1531         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1532         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1533         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1534         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1535         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1536         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1537
1538         /* write dispatch packet */
1539         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1540         ib.ptr[ib.length_dw++] = 8; /* x */
1541         ib.ptr[ib.length_dw++] = 1; /* y */
1542         ib.ptr[ib.length_dw++] = 1; /* z */
1543         ib.ptr[ib.length_dw++] =
1544                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1545
1546         /* write CS partial flush packet */
1547         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1548         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1549
1550         /* SGPR2 */
1551         /* write the register state for the compute dispatch */
1552         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1553                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1554                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1555                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1556         }
1557         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1558         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1559         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1560         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1561         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1562         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1563
1564         /* write dispatch packet */
1565         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1566         ib.ptr[ib.length_dw++] = 8; /* x */
1567         ib.ptr[ib.length_dw++] = 1; /* y */
1568         ib.ptr[ib.length_dw++] = 1; /* z */
1569         ib.ptr[ib.length_dw++] =
1570                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1571
1572         /* write CS partial flush packet */
1573         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1574         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1575
1576         /* shedule the ib on the ring */
1577         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1578         if (r) {
1579                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1580                 goto fail;
1581         }
1582
1583         /* wait for the GPU to finish processing the IB */
1584         r = fence_wait(f, false);
1585         if (r) {
1586                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1587                 goto fail;
1588         }
1589
1590         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1591         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1592         WREG32(mmGB_EDC_MODE, tmp);
1593
1594         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1595         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1596         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1597
1598
1599         /* read back registers to clear the counters */
1600         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1601                 RREG32(sec_ded_counter_registers[i]);
1602
1603 fail:
1604         fence_put(f);
1605         amdgpu_ib_free(adev, &ib, NULL);
1606         fence_put(f);
1607
1608         return r;
1609 }
1610
1611 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1612 {
1613         u32 gb_addr_config;
1614         u32 mc_shared_chmap, mc_arb_ramcfg;
1615         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1616         u32 tmp;
1617         int ret;
1618
1619         switch (adev->asic_type) {
1620         case CHIP_TOPAZ:
1621                 adev->gfx.config.max_shader_engines = 1;
1622                 adev->gfx.config.max_tile_pipes = 2;
1623                 adev->gfx.config.max_cu_per_sh = 6;
1624                 adev->gfx.config.max_sh_per_se = 1;
1625                 adev->gfx.config.max_backends_per_se = 2;
1626                 adev->gfx.config.max_texture_channel_caches = 2;
1627                 adev->gfx.config.max_gprs = 256;
1628                 adev->gfx.config.max_gs_threads = 32;
1629                 adev->gfx.config.max_hw_contexts = 8;
1630
1631                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1632                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1633                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1634                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1635                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1636                 break;
1637         case CHIP_FIJI:
1638                 adev->gfx.config.max_shader_engines = 4;
1639                 adev->gfx.config.max_tile_pipes = 16;
1640                 adev->gfx.config.max_cu_per_sh = 16;
1641                 adev->gfx.config.max_sh_per_se = 1;
1642                 adev->gfx.config.max_backends_per_se = 4;
1643                 adev->gfx.config.max_texture_channel_caches = 16;
1644                 adev->gfx.config.max_gprs = 256;
1645                 adev->gfx.config.max_gs_threads = 32;
1646                 adev->gfx.config.max_hw_contexts = 8;
1647
1648                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1649                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1650                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1651                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1652                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1653                 break;
1654         case CHIP_POLARIS11:
1655                 ret = amdgpu_atombios_get_gfx_info(adev);
1656                 if (ret)
1657                         return ret;
1658                 adev->gfx.config.max_gprs = 256;
1659                 adev->gfx.config.max_gs_threads = 32;
1660                 adev->gfx.config.max_hw_contexts = 8;
1661
1662                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1663                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1664                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1665                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1666                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1667                 break;
1668         case CHIP_POLARIS10:
1669                 ret = amdgpu_atombios_get_gfx_info(adev);
1670                 if (ret)
1671                         return ret;
1672                 adev->gfx.config.max_gprs = 256;
1673                 adev->gfx.config.max_gs_threads = 32;
1674                 adev->gfx.config.max_hw_contexts = 8;
1675
1676                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1677                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1678                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1679                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1680                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1681                 break;
1682         case CHIP_TONGA:
1683                 adev->gfx.config.max_shader_engines = 4;
1684                 adev->gfx.config.max_tile_pipes = 8;
1685                 adev->gfx.config.max_cu_per_sh = 8;
1686                 adev->gfx.config.max_sh_per_se = 1;
1687                 adev->gfx.config.max_backends_per_se = 2;
1688                 adev->gfx.config.max_texture_channel_caches = 8;
1689                 adev->gfx.config.max_gprs = 256;
1690                 adev->gfx.config.max_gs_threads = 32;
1691                 adev->gfx.config.max_hw_contexts = 8;
1692
1693                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1694                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1695                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1696                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1697                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1698                 break;
1699         case CHIP_CARRIZO:
1700                 adev->gfx.config.max_shader_engines = 1;
1701                 adev->gfx.config.max_tile_pipes = 2;
1702                 adev->gfx.config.max_sh_per_se = 1;
1703                 adev->gfx.config.max_backends_per_se = 2;
1704
1705                 switch (adev->pdev->revision) {
1706                 case 0xc4:
1707                 case 0x84:
1708                 case 0xc8:
1709                 case 0xcc:
1710                 case 0xe1:
1711                 case 0xe3:
1712                         /* B10 */
1713                         adev->gfx.config.max_cu_per_sh = 8;
1714                         break;
1715                 case 0xc5:
1716                 case 0x81:
1717                 case 0x85:
1718                 case 0xc9:
1719                 case 0xcd:
1720                 case 0xe2:
1721                 case 0xe4:
1722                         /* B8 */
1723                         adev->gfx.config.max_cu_per_sh = 6;
1724                         break;
1725                 case 0xc6:
1726                 case 0xca:
1727                 case 0xce:
1728                 case 0x88:
1729                         /* B6 */
1730                         adev->gfx.config.max_cu_per_sh = 6;
1731                         break;
1732                 case 0xc7:
1733                 case 0x87:
1734                 case 0xcb:
1735                 case 0xe5:
1736                 case 0x89:
1737                 default:
1738                         /* B4 */
1739                         adev->gfx.config.max_cu_per_sh = 4;
1740                         break;
1741                 }
1742
1743                 adev->gfx.config.max_texture_channel_caches = 2;
1744                 adev->gfx.config.max_gprs = 256;
1745                 adev->gfx.config.max_gs_threads = 32;
1746                 adev->gfx.config.max_hw_contexts = 8;
1747
1748                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1749                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1750                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1751                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1752                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1753                 break;
1754         case CHIP_STONEY:
1755                 adev->gfx.config.max_shader_engines = 1;
1756                 adev->gfx.config.max_tile_pipes = 2;
1757                 adev->gfx.config.max_sh_per_se = 1;
1758                 adev->gfx.config.max_backends_per_se = 1;
1759
1760                 switch (adev->pdev->revision) {
1761                 case 0xc0:
1762                 case 0xc1:
1763                 case 0xc2:
1764                 case 0xc4:
1765                 case 0xc8:
1766                 case 0xc9:
1767                         adev->gfx.config.max_cu_per_sh = 3;
1768                         break;
1769                 case 0xd0:
1770                 case 0xd1:
1771                 case 0xd2:
1772                 default:
1773                         adev->gfx.config.max_cu_per_sh = 2;
1774                         break;
1775                 }
1776
1777                 adev->gfx.config.max_texture_channel_caches = 2;
1778                 adev->gfx.config.max_gprs = 256;
1779                 adev->gfx.config.max_gs_threads = 16;
1780                 adev->gfx.config.max_hw_contexts = 8;
1781
1782                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1783                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1784                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1785                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1786                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1787                 break;
1788         default:
1789                 adev->gfx.config.max_shader_engines = 2;
1790                 adev->gfx.config.max_tile_pipes = 4;
1791                 adev->gfx.config.max_cu_per_sh = 2;
1792                 adev->gfx.config.max_sh_per_se = 1;
1793                 adev->gfx.config.max_backends_per_se = 2;
1794                 adev->gfx.config.max_texture_channel_caches = 4;
1795                 adev->gfx.config.max_gprs = 256;
1796                 adev->gfx.config.max_gs_threads = 32;
1797                 adev->gfx.config.max_hw_contexts = 8;
1798
1799                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1800                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1801                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1802                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1803                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1804                 break;
1805         }
1806
1807         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1808         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1809         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1810
1811         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1812         adev->gfx.config.mem_max_burst_length_bytes = 256;
1813         if (adev->flags & AMD_IS_APU) {
1814                 /* Get memory bank mapping mode. */
1815                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1816                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1817                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1818
1819                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1820                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1821                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1822
1823                 /* Validate settings in case only one DIMM installed. */
1824                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1825                         dimm00_addr_map = 0;
1826                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1827                         dimm01_addr_map = 0;
1828                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1829                         dimm10_addr_map = 0;
1830                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1831                         dimm11_addr_map = 0;
1832
1833                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1834                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1835                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1836                         adev->gfx.config.mem_row_size_in_kb = 2;
1837                 else
1838                         adev->gfx.config.mem_row_size_in_kb = 1;
1839         } else {
1840                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1841                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1842                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1843                         adev->gfx.config.mem_row_size_in_kb = 4;
1844         }
1845
1846         adev->gfx.config.shader_engine_tile_size = 32;
1847         adev->gfx.config.num_gpus = 1;
1848         adev->gfx.config.multi_gpu_tile_size = 64;
1849
1850         /* fix up row size */
1851         switch (adev->gfx.config.mem_row_size_in_kb) {
1852         case 1:
1853         default:
1854                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1855                 break;
1856         case 2:
1857                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1858                 break;
1859         case 4:
1860                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1861                 break;
1862         }
1863         adev->gfx.config.gb_addr_config = gb_addr_config;
1864
1865         return 0;
1866 }
1867
1868 static int gfx_v8_0_sw_init(void *handle)
1869 {
1870         int i, r;
1871         struct amdgpu_ring *ring;
1872         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1873
1874         /* EOP Event */
1875         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1876         if (r)
1877                 return r;
1878
1879         /* Privileged reg */
1880         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1881         if (r)
1882                 return r;
1883
1884         /* Privileged inst */
1885         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1886         if (r)
1887                 return r;
1888
1889         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1890
1891         gfx_v8_0_scratch_init(adev);
1892
1893         r = gfx_v8_0_init_microcode(adev);
1894         if (r) {
1895                 DRM_ERROR("Failed to load gfx firmware!\n");
1896                 return r;
1897         }
1898
1899         r = gfx_v8_0_rlc_init(adev);
1900         if (r) {
1901                 DRM_ERROR("Failed to init rlc BOs!\n");
1902                 return r;
1903         }
1904
1905         r = gfx_v8_0_mec_init(adev);
1906         if (r) {
1907                 DRM_ERROR("Failed to init MEC BOs!\n");
1908                 return r;
1909         }
1910
1911         /* set up the gfx ring */
1912         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1913                 ring = &adev->gfx.gfx_ring[i];
1914                 ring->ring_obj = NULL;
1915                 sprintf(ring->name, "gfx");
1916                 /* no gfx doorbells on iceland */
1917                 if (adev->asic_type != CHIP_TOPAZ) {
1918                         ring->use_doorbell = true;
1919                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1920                 }
1921
1922                 r = amdgpu_ring_init(adev, ring, 1024,
1923                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1924                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1925                                      AMDGPU_RING_TYPE_GFX);
1926                 if (r)
1927                         return r;
1928         }
1929
1930         /* set up the compute queues */
1931         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1932                 unsigned irq_type;
1933
1934                 /* max 32 queues per MEC */
1935                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1936                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1937                         break;
1938                 }
1939                 ring = &adev->gfx.compute_ring[i];
1940                 ring->ring_obj = NULL;
1941                 ring->use_doorbell = true;
1942                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1943                 ring->me = 1; /* first MEC */
1944                 ring->pipe = i / 8;
1945                 ring->queue = i % 8;
1946                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1947                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1948                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1949                 r = amdgpu_ring_init(adev, ring, 1024,
1950                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1951                                      &adev->gfx.eop_irq, irq_type,
1952                                      AMDGPU_RING_TYPE_COMPUTE);
1953                 if (r)
1954                         return r;
1955         }
1956
1957         /* reserve GDS, GWS and OA resource for gfx */
1958         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1959                         PAGE_SIZE, true,
1960                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1961                         NULL, &adev->gds.gds_gfx_bo);
1962         if (r)
1963                 return r;
1964
1965         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1966                 PAGE_SIZE, true,
1967                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1968                 NULL, &adev->gds.gws_gfx_bo);
1969         if (r)
1970                 return r;
1971
1972         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1973                         PAGE_SIZE, true,
1974                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1975                         NULL, &adev->gds.oa_gfx_bo);
1976         if (r)
1977                 return r;
1978
1979         adev->gfx.ce_ram_size = 0x8000;
1980
1981         r = gfx_v8_0_gpu_early_init(adev);
1982         if (r)
1983                 return r;
1984
1985         return 0;
1986 }
1987
1988 static int gfx_v8_0_sw_fini(void *handle)
1989 {
1990         int i;
1991         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1992
1993         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1994         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1995         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1996
1997         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1998                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1999         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2000                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2001
2002         gfx_v8_0_mec_fini(adev);
2003
2004         gfx_v8_0_rlc_fini(adev);
2005
2006         gfx_v8_0_free_microcode(adev);
2007
2008         return 0;
2009 }
2010
2011 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2012 {
2013         uint32_t *modearray, *mod2array;
2014         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2015         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2016         u32 reg_offset;
2017
2018         modearray = adev->gfx.config.tile_mode_array;
2019         mod2array = adev->gfx.config.macrotile_mode_array;
2020
2021         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2022                 modearray[reg_offset] = 0;
2023
2024         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2025                 mod2array[reg_offset] = 0;
2026
2027         switch (adev->asic_type) {
2028         case CHIP_TOPAZ:
2029                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2030                                 PIPE_CONFIG(ADDR_SURF_P2) |
2031                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2032                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2033                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034                                 PIPE_CONFIG(ADDR_SURF_P2) |
2035                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2036                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2037                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2038                                 PIPE_CONFIG(ADDR_SURF_P2) |
2039                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2040                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2041                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042                                 PIPE_CONFIG(ADDR_SURF_P2) |
2043                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2044                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2045                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046                                 PIPE_CONFIG(ADDR_SURF_P2) |
2047                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2048                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2049                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2050                                 PIPE_CONFIG(ADDR_SURF_P2) |
2051                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2052                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2053                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2054                                 PIPE_CONFIG(ADDR_SURF_P2) |
2055                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2056                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2057                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2058                                 PIPE_CONFIG(ADDR_SURF_P2));
2059                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2060                                 PIPE_CONFIG(ADDR_SURF_P2) |
2061                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2063                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2064                                  PIPE_CONFIG(ADDR_SURF_P2) |
2065                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2066                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068                                  PIPE_CONFIG(ADDR_SURF_P2) |
2069                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2070                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2071                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2072                                  PIPE_CONFIG(ADDR_SURF_P2) |
2073                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2074                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2075                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2076                                  PIPE_CONFIG(ADDR_SURF_P2) |
2077                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2080                                  PIPE_CONFIG(ADDR_SURF_P2) |
2081                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2082                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084                                  PIPE_CONFIG(ADDR_SURF_P2) |
2085                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2086                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2087                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2088                                  PIPE_CONFIG(ADDR_SURF_P2) |
2089                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2091                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2092                                  PIPE_CONFIG(ADDR_SURF_P2) |
2093                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2094                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2095                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2096                                  PIPE_CONFIG(ADDR_SURF_P2) |
2097                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2098                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2099                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2100                                  PIPE_CONFIG(ADDR_SURF_P2) |
2101                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2102                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2103                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2104                                  PIPE_CONFIG(ADDR_SURF_P2) |
2105                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2106                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2107                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2108                                  PIPE_CONFIG(ADDR_SURF_P2) |
2109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2111                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2112                                  PIPE_CONFIG(ADDR_SURF_P2) |
2113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2115                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2116                                  PIPE_CONFIG(ADDR_SURF_P2) |
2117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2119                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2120                                  PIPE_CONFIG(ADDR_SURF_P2) |
2121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2123                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124                                  PIPE_CONFIG(ADDR_SURF_P2) |
2125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2127                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2128                                  PIPE_CONFIG(ADDR_SURF_P2) |
2129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2131
2132                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2133                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2134                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2135                                 NUM_BANKS(ADDR_SURF_8_BANK));
2136                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2137                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2138                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139                                 NUM_BANKS(ADDR_SURF_8_BANK));
2140                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2141                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2143                                 NUM_BANKS(ADDR_SURF_8_BANK));
2144                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2145                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2147                                 NUM_BANKS(ADDR_SURF_8_BANK));
2148                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2149                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2150                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2151                                 NUM_BANKS(ADDR_SURF_8_BANK));
2152                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2154                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2155                                 NUM_BANKS(ADDR_SURF_8_BANK));
2156                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2158                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2159                                 NUM_BANKS(ADDR_SURF_8_BANK));
2160                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2161                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2162                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2163                                 NUM_BANKS(ADDR_SURF_16_BANK));
2164                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2165                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2166                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2167                                 NUM_BANKS(ADDR_SURF_16_BANK));
2168                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2169                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2171                                  NUM_BANKS(ADDR_SURF_16_BANK));
2172                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2173                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2174                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2175                                  NUM_BANKS(ADDR_SURF_16_BANK));
2176                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2178                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2179                                  NUM_BANKS(ADDR_SURF_16_BANK));
2180                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2182                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2183                                  NUM_BANKS(ADDR_SURF_16_BANK));
2184                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2187                                  NUM_BANKS(ADDR_SURF_8_BANK));
2188
2189                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2190                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2191                             reg_offset != 23)
2192                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2193
2194                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2195                         if (reg_offset != 7)
2196                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2197
2198                 break;
2199         case CHIP_FIJI:
2200                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2201                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2202                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2203                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2204                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2206                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2207                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2208                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2209                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2210                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2211                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2212                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2214                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2215                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2216                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2218                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2219                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2220                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2221                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2222                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2223                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2224                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2225                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2226                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2227                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2229                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2230                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2233                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2234                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2235                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2236                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2237                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2238                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2239                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2243                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2246                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2251                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2254                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2259                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2263                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2266                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2271                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2274                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2275                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2278                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2279                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2282                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2283                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2287                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2291                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2299                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2303                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2307                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2315                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2318                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2322
2323                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2324                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2325                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2326                                 NUM_BANKS(ADDR_SURF_8_BANK));
2327                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2330                                 NUM_BANKS(ADDR_SURF_8_BANK));
2331                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2334                                 NUM_BANKS(ADDR_SURF_8_BANK));
2335                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338                                 NUM_BANKS(ADDR_SURF_8_BANK));
2339                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2342                                 NUM_BANKS(ADDR_SURF_8_BANK));
2343                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2346                                 NUM_BANKS(ADDR_SURF_8_BANK));
2347                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2350                                 NUM_BANKS(ADDR_SURF_8_BANK));
2351                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354                                 NUM_BANKS(ADDR_SURF_8_BANK));
2355                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                 NUM_BANKS(ADDR_SURF_8_BANK));
2359                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2361                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2362                                  NUM_BANKS(ADDR_SURF_8_BANK));
2363                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2365                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2366                                  NUM_BANKS(ADDR_SURF_8_BANK));
2367                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2370                                  NUM_BANKS(ADDR_SURF_8_BANK));
2371                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2373                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374                                  NUM_BANKS(ADDR_SURF_8_BANK));
2375                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2378                                  NUM_BANKS(ADDR_SURF_4_BANK));
2379
2380                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2381                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2382
2383                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2384                         if (reg_offset != 7)
2385                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2386
2387                 break;
2388         case CHIP_TONGA:
2389                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2391                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2410                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2414                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2419                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2423                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2425                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2426                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2435                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2448                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2455                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2459                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2460                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2463                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2464                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2468                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2472                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2476                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2480                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2484                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2488                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2492                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2499                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2502                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2507                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511
2512                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2514                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2515                                 NUM_BANKS(ADDR_SURF_16_BANK));
2516                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519                                 NUM_BANKS(ADDR_SURF_16_BANK));
2520                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523                                 NUM_BANKS(ADDR_SURF_16_BANK));
2524                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2527                                 NUM_BANKS(ADDR_SURF_16_BANK));
2528                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2531                                 NUM_BANKS(ADDR_SURF_16_BANK));
2532                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535                                 NUM_BANKS(ADDR_SURF_16_BANK));
2536                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539                                 NUM_BANKS(ADDR_SURF_16_BANK));
2540                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2542                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2543                                 NUM_BANKS(ADDR_SURF_16_BANK));
2544                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2546                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2547                                 NUM_BANKS(ADDR_SURF_16_BANK));
2548                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2551                                  NUM_BANKS(ADDR_SURF_16_BANK));
2552                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2554                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2555                                  NUM_BANKS(ADDR_SURF_16_BANK));
2556                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559                                  NUM_BANKS(ADDR_SURF_8_BANK));
2560                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2563                                  NUM_BANKS(ADDR_SURF_4_BANK));
2564                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2567                                  NUM_BANKS(ADDR_SURF_4_BANK));
2568
2569                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2570                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2571
2572                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2573                         if (reg_offset != 7)
2574                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2575
2576                 break;
2577         case CHIP_POLARIS11:
2578                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2580                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2581                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2582                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2585                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2586                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2589                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2592                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2593                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2594                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2597                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2598                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2599                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2601                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2602                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2603                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2604                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2605                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2606                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2607                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2608                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2609                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2612                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2614                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2615                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2621                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2624                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2629                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2635                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2637                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2639                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2641                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2644                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2645                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2648                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2649                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2651                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2652                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2653                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2655                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2656                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2657                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2659                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2661                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2665                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2667                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2668                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2669                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2671                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2672                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2673                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2675                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2676                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2677                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2679                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2680                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2681                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2683                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2684                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2687                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2691                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2693                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2695                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2696                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2699                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2700
2701                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704                                 NUM_BANKS(ADDR_SURF_16_BANK));
2705
2706                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2708                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2709                                 NUM_BANKS(ADDR_SURF_16_BANK));
2710
2711                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2713                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2714                                 NUM_BANKS(ADDR_SURF_16_BANK));
2715
2716                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2718                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719                                 NUM_BANKS(ADDR_SURF_16_BANK));
2720
2721                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724                                 NUM_BANKS(ADDR_SURF_16_BANK));
2725
2726                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729                                 NUM_BANKS(ADDR_SURF_16_BANK));
2730
2731                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734                                 NUM_BANKS(ADDR_SURF_16_BANK));
2735
2736                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2737                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2738                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739                                 NUM_BANKS(ADDR_SURF_16_BANK));
2740
2741                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2742                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2743                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2744                                 NUM_BANKS(ADDR_SURF_16_BANK));
2745
2746                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2749                                 NUM_BANKS(ADDR_SURF_16_BANK));
2750
2751                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2753                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754                                 NUM_BANKS(ADDR_SURF_16_BANK));
2755
2756                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759                                 NUM_BANKS(ADDR_SURF_16_BANK));
2760
2761                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764                                 NUM_BANKS(ADDR_SURF_8_BANK));
2765
2766                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2769                                 NUM_BANKS(ADDR_SURF_4_BANK));
2770
2771                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2772                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2773
2774                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2775                         if (reg_offset != 7)
2776                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2777
2778                 break;
2779         case CHIP_POLARIS10:
2780                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2782                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2786                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2790                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2793                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2794                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2798                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2800                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2802                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2803                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2804                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2805                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2806                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2807                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2808                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2809                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2811                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2812                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2813                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2814                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2817                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2826                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2829                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2830                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2831                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2833                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2837                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2839                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2841                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2842                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2843                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2845                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2846                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2850                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2851                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2853                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2854                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2855                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2857                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2859                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2863                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2865                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2867                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2869                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2870                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2871                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2873                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2874                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2875                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2876                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2877                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2878                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2879                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2881                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2882                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2883                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2885                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2886                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2890                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2893                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2894                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2895                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2897                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2898                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2899                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2900                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2901                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2902
2903                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2904                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2905                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2906                                 NUM_BANKS(ADDR_SURF_16_BANK));
2907
2908                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2909                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911                                 NUM_BANKS(ADDR_SURF_16_BANK));
2912
2913                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2914                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2915                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2916                                 NUM_BANKS(ADDR_SURF_16_BANK));
2917
2918                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921                                 NUM_BANKS(ADDR_SURF_16_BANK));
2922
2923                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2925                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2926                                 NUM_BANKS(ADDR_SURF_16_BANK));
2927
2928                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2930                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2931                                 NUM_BANKS(ADDR_SURF_16_BANK));
2932
2933                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2936                                 NUM_BANKS(ADDR_SURF_16_BANK));
2937
2938                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2940                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941                                 NUM_BANKS(ADDR_SURF_16_BANK));
2942
2943                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2945                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2946                                 NUM_BANKS(ADDR_SURF_16_BANK));
2947
2948                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2950                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2951                                 NUM_BANKS(ADDR_SURF_16_BANK));
2952
2953                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2954                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2955                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2956                                 NUM_BANKS(ADDR_SURF_16_BANK));
2957
2958                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2959                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2960                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2961                                 NUM_BANKS(ADDR_SURF_8_BANK));
2962
2963                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2966                                 NUM_BANKS(ADDR_SURF_4_BANK));
2967
2968                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2971                                 NUM_BANKS(ADDR_SURF_4_BANK));
2972
2973                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2974                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2975
2976                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2977                         if (reg_offset != 7)
2978                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2979
2980                 break;
2981         case CHIP_STONEY:
2982                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983                                 PIPE_CONFIG(ADDR_SURF_P2) |
2984                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2986                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2987                                 PIPE_CONFIG(ADDR_SURF_P2) |
2988                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2991                                 PIPE_CONFIG(ADDR_SURF_P2) |
2992                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2994                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2995                                 PIPE_CONFIG(ADDR_SURF_P2) |
2996                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2998                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2999                                 PIPE_CONFIG(ADDR_SURF_P2) |
3000                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3002                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003                                 PIPE_CONFIG(ADDR_SURF_P2) |
3004                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3005                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3006                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3007                                 PIPE_CONFIG(ADDR_SURF_P2) |
3008                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3009                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3010                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3011                                 PIPE_CONFIG(ADDR_SURF_P2));
3012                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3013                                 PIPE_CONFIG(ADDR_SURF_P2) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3016                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017                                  PIPE_CONFIG(ADDR_SURF_P2) |
3018                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3019                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3020                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3021                                  PIPE_CONFIG(ADDR_SURF_P2) |
3022                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3023                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3024                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3025                                  PIPE_CONFIG(ADDR_SURF_P2) |
3026                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3027                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3028                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3029                                  PIPE_CONFIG(ADDR_SURF_P2) |
3030                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3031                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3033                                  PIPE_CONFIG(ADDR_SURF_P2) |
3034                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3035                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3036                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037                                  PIPE_CONFIG(ADDR_SURF_P2) |
3038                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3039                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3040                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3041                                  PIPE_CONFIG(ADDR_SURF_P2) |
3042                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3044                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3045                                  PIPE_CONFIG(ADDR_SURF_P2) |
3046                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3047                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3048                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3049                                  PIPE_CONFIG(ADDR_SURF_P2) |
3050                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3051                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3053                                  PIPE_CONFIG(ADDR_SURF_P2) |
3054                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3055                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3056                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3057                                  PIPE_CONFIG(ADDR_SURF_P2) |
3058                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3059                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3060                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3061                                  PIPE_CONFIG(ADDR_SURF_P2) |
3062                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3063                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3064                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3065                                  PIPE_CONFIG(ADDR_SURF_P2) |
3066                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3067                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3068                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3069                                  PIPE_CONFIG(ADDR_SURF_P2) |
3070                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3071                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3072                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3073                                  PIPE_CONFIG(ADDR_SURF_P2) |
3074                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3075                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3076                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3077                                  PIPE_CONFIG(ADDR_SURF_P2) |
3078                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3079                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3080                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3081                                  PIPE_CONFIG(ADDR_SURF_P2) |
3082                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3083                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3084
3085                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3087                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3088                                 NUM_BANKS(ADDR_SURF_8_BANK));
3089                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3091                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3092                                 NUM_BANKS(ADDR_SURF_8_BANK));
3093                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096                                 NUM_BANKS(ADDR_SURF_8_BANK));
3097                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3099                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3100                                 NUM_BANKS(ADDR_SURF_8_BANK));
3101                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3104                                 NUM_BANKS(ADDR_SURF_8_BANK));
3105                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108                                 NUM_BANKS(ADDR_SURF_8_BANK));
3109                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3110                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3111                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3112                                 NUM_BANKS(ADDR_SURF_8_BANK));
3113                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3114                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3115                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3116                                 NUM_BANKS(ADDR_SURF_16_BANK));
3117                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3118                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3119                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120                                 NUM_BANKS(ADDR_SURF_16_BANK));
3121                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3122                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3123                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3124                                  NUM_BANKS(ADDR_SURF_16_BANK));
3125                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3126                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3127                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128                                  NUM_BANKS(ADDR_SURF_16_BANK));
3129                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3130                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3131                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132                                  NUM_BANKS(ADDR_SURF_16_BANK));
3133                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3135                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136                                  NUM_BANKS(ADDR_SURF_16_BANK));
3137                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3140                                  NUM_BANKS(ADDR_SURF_8_BANK));
3141
3142                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3143                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3144                             reg_offset != 23)
3145                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3146
3147                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3148                         if (reg_offset != 7)
3149                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3150
3151                 break;
3152         default:
3153                 dev_warn(adev->dev,
3154                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3155                          adev->asic_type);
3156
3157         case CHIP_CARRIZO:
3158                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3159                                 PIPE_CONFIG(ADDR_SURF_P2) |
3160                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3162                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163                                 PIPE_CONFIG(ADDR_SURF_P2) |
3164                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3166                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3167                                 PIPE_CONFIG(ADDR_SURF_P2) |
3168                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3169                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3170                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3171                                 PIPE_CONFIG(ADDR_SURF_P2) |
3172                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3173                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3174                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175                                 PIPE_CONFIG(ADDR_SURF_P2) |
3176                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3177                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3178                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3179                                 PIPE_CONFIG(ADDR_SURF_P2) |
3180                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3181                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3182                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3183                                 PIPE_CONFIG(ADDR_SURF_P2) |
3184                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3185                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3186                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3187                                 PIPE_CONFIG(ADDR_SURF_P2));
3188                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3189                                 PIPE_CONFIG(ADDR_SURF_P2) |
3190                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3191                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3200                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3201                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3204                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3205                                  PIPE_CONFIG(ADDR_SURF_P2) |
3206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3208                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3209                                  PIPE_CONFIG(ADDR_SURF_P2) |
3210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3212                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3213                                  PIPE_CONFIG(ADDR_SURF_P2) |
3214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3216                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3217                                  PIPE_CONFIG(ADDR_SURF_P2) |
3218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3220                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3221                                  PIPE_CONFIG(ADDR_SURF_P2) |
3222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3224                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3225                                  PIPE_CONFIG(ADDR_SURF_P2) |
3226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3228                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3229                                  PIPE_CONFIG(ADDR_SURF_P2) |
3230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3232                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3233                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3236                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3237                                  PIPE_CONFIG(ADDR_SURF_P2) |
3238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3240                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3241                                  PIPE_CONFIG(ADDR_SURF_P2) |
3242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3244                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3245                                  PIPE_CONFIG(ADDR_SURF_P2) |
3246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3248                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3249                                  PIPE_CONFIG(ADDR_SURF_P2) |
3250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3252                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3253                                  PIPE_CONFIG(ADDR_SURF_P2) |
3254                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3255                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3257                                  PIPE_CONFIG(ADDR_SURF_P2) |
3258                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3259                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3260
3261                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3262                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3263                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3264                                 NUM_BANKS(ADDR_SURF_8_BANK));
3265                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3268                                 NUM_BANKS(ADDR_SURF_8_BANK));
3269                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3271                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3272                                 NUM_BANKS(ADDR_SURF_8_BANK));
3273                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3276                                 NUM_BANKS(ADDR_SURF_8_BANK));
3277                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3278                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3279                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3280                                 NUM_BANKS(ADDR_SURF_8_BANK));
3281                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284                                 NUM_BANKS(ADDR_SURF_8_BANK));
3285                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3288                                 NUM_BANKS(ADDR_SURF_8_BANK));
3289                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3290                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3291                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3292                                 NUM_BANKS(ADDR_SURF_16_BANK));
3293                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3295                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296                                 NUM_BANKS(ADDR_SURF_16_BANK));
3297                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3298                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3299                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300                                  NUM_BANKS(ADDR_SURF_16_BANK));
3301                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3302                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3303                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3304                                  NUM_BANKS(ADDR_SURF_16_BANK));
3305                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3306                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308                                  NUM_BANKS(ADDR_SURF_16_BANK));
3309                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312                                  NUM_BANKS(ADDR_SURF_16_BANK));
3313                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3315                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3316                                  NUM_BANKS(ADDR_SURF_8_BANK));
3317
3318                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3319                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3320                             reg_offset != 23)
3321                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3322
3323                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3324                         if (reg_offset != 7)
3325                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3326
3327                 break;
3328         }
3329 }
3330
3331 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3332 {
3333         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3334
3335         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3336                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3337                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3338         } else if (se_num == 0xffffffff) {
3339                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3340                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3341         } else if (sh_num == 0xffffffff) {
3342                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3343                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3344         } else {
3345                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3346                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3347         }
3348         WREG32(mmGRBM_GFX_INDEX, data);
3349 }
3350
3351 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3352 {
3353         return (u32)((1ULL << bit_width) - 1);
3354 }
3355
3356 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3357 {
3358         u32 data, mask;
3359
3360         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3361         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3362
3363         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3364         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3365
3366         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3367                                        adev->gfx.config.max_sh_per_se);
3368
3369         return (~data) & mask;
3370 }
3371
3372 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3373 {
3374         int i, j;
3375         u32 data;
3376         u32 active_rbs = 0;
3377         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3378                                         adev->gfx.config.max_sh_per_se;
3379
3380         mutex_lock(&adev->grbm_idx_mutex);
3381         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3382                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3383                         gfx_v8_0_select_se_sh(adev, i, j);
3384                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3385                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3386                                                rb_bitmap_width_per_sh);
3387                 }
3388         }
3389         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3390         mutex_unlock(&adev->grbm_idx_mutex);
3391
3392         adev->gfx.config.backend_enable_mask = active_rbs;
3393         adev->gfx.config.num_rbs = hweight32(active_rbs);
3394 }
3395
3396 /**
3397  * gfx_v8_0_init_compute_vmid - gart enable
3398  *
3399  * @rdev: amdgpu_device pointer
3400  *
3401  * Initialize compute vmid sh_mem registers
3402  *
3403  */
3404 #define DEFAULT_SH_MEM_BASES    (0x6000)
3405 #define FIRST_COMPUTE_VMID      (8)
3406 #define LAST_COMPUTE_VMID       (16)
3407 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3408 {
3409         int i;
3410         uint32_t sh_mem_config;
3411         uint32_t sh_mem_bases;
3412
3413         /*
3414          * Configure apertures:
3415          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3416          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3417          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3418          */
3419         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3420
3421         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3422                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3423                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3424                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3425                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3426                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3427
3428         mutex_lock(&adev->srbm_mutex);
3429         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3430                 vi_srbm_select(adev, 0, 0, 0, i);
3431                 /* CP and shaders */
3432                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3433                 WREG32(mmSH_MEM_APE1_BASE, 1);
3434                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3435                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3436         }
3437         vi_srbm_select(adev, 0, 0, 0, 0);
3438         mutex_unlock(&adev->srbm_mutex);
3439 }
3440
3441 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3442 {
3443         u32 tmp;
3444         int i;
3445
3446         tmp = RREG32(mmGRBM_CNTL);
3447         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3448         WREG32(mmGRBM_CNTL, tmp);
3449
3450         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3451         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3452         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3453
3454         gfx_v8_0_tiling_mode_table_init(adev);
3455
3456         gfx_v8_0_setup_rb(adev);
3457         gfx_v8_0_get_cu_info(adev);
3458
3459         /* XXX SH_MEM regs */
3460         /* where to put LDS, scratch, GPUVM in FSA64 space */
3461         mutex_lock(&adev->srbm_mutex);
3462         for (i = 0; i < 16; i++) {
3463                 vi_srbm_select(adev, 0, 0, 0, i);
3464                 /* CP and shaders */
3465                 if (i == 0) {
3466                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3467                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3468                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3469                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3470                         WREG32(mmSH_MEM_CONFIG, tmp);
3471                 } else {
3472                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3473                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3474                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3475                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3476                         WREG32(mmSH_MEM_CONFIG, tmp);
3477                 }
3478
3479                 WREG32(mmSH_MEM_APE1_BASE, 1);
3480                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3481                 WREG32(mmSH_MEM_BASES, 0);
3482         }
3483         vi_srbm_select(adev, 0, 0, 0, 0);
3484         mutex_unlock(&adev->srbm_mutex);
3485
3486         gfx_v8_0_init_compute_vmid(adev);
3487
3488         mutex_lock(&adev->grbm_idx_mutex);
3489         /*
3490          * making sure that the following register writes will be broadcasted
3491          * to all the shaders
3492          */
3493         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3494
3495         WREG32(mmPA_SC_FIFO_SIZE,
3496                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3497                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3498                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3499                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3500                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3501                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3502                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3503                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3504         mutex_unlock(&adev->grbm_idx_mutex);
3505
3506 }
3507
3508 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3509 {
3510         u32 i, j, k;
3511         u32 mask;
3512
3513         mutex_lock(&adev->grbm_idx_mutex);
3514         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3515                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3516                         gfx_v8_0_select_se_sh(adev, i, j);
3517                         for (k = 0; k < adev->usec_timeout; k++) {
3518                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3519                                         break;
3520                                 udelay(1);
3521                         }
3522                 }
3523         }
3524         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3525         mutex_unlock(&adev->grbm_idx_mutex);
3526
3527         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3528                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3529                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3530                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3531         for (k = 0; k < adev->usec_timeout; k++) {
3532                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3533                         break;
3534                 udelay(1);
3535         }
3536 }
3537
3538 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3539                                                bool enable)
3540 {
3541         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3542
3543         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3544         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3545         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3546         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3547
3548         WREG32(mmCP_INT_CNTL_RING0, tmp);
3549 }
3550
3551 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3552 {
3553         /* csib */
3554         WREG32(mmRLC_CSIB_ADDR_HI,
3555                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3556         WREG32(mmRLC_CSIB_ADDR_LO,
3557                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3558         WREG32(mmRLC_CSIB_LENGTH,
3559                         adev->gfx.rlc.clear_state_size);
3560 }
3561
3562 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3563                                 int ind_offset,
3564                                 int list_size,
3565                                 int *unique_indices,
3566                                 int *indices_count,
3567                                 int max_indices,
3568                                 int *ind_start_offsets,
3569                                 int *offset_count,
3570                                 int max_offset)
3571 {
3572         int indices;
3573         bool new_entry = true;
3574
3575         for (; ind_offset < list_size; ind_offset++) {
3576
3577                 if (new_entry) {
3578                         new_entry = false;
3579                         ind_start_offsets[*offset_count] = ind_offset;
3580                         *offset_count = *offset_count + 1;
3581                         BUG_ON(*offset_count >= max_offset);
3582                 }
3583
3584                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3585                         new_entry = true;
3586                         continue;
3587                 }
3588
3589                 ind_offset += 2;
3590
3591                 /* look for the matching indice */
3592                 for (indices = 0;
3593                         indices < *indices_count;
3594                         indices++) {
3595                         if (unique_indices[indices] ==
3596                                 register_list_format[ind_offset])
3597                                 break;
3598                 }
3599
3600                 if (indices >= *indices_count) {
3601                         unique_indices[*indices_count] =
3602                                 register_list_format[ind_offset];
3603                         indices = *indices_count;
3604                         *indices_count = *indices_count + 1;
3605                         BUG_ON(*indices_count >= max_indices);
3606                 }
3607
3608                 register_list_format[ind_offset] = indices;
3609         }
3610 }
3611
3612 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3613 {
3614         int i, temp, data;
3615         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3616         int indices_count = 0;
3617         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3618         int offset_count = 0;
3619
3620         int list_size;
3621         unsigned int *register_list_format =
3622                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3623         if (register_list_format == NULL)
3624                 return -ENOMEM;
3625         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3626                         adev->gfx.rlc.reg_list_format_size_bytes);
3627
3628         gfx_v8_0_parse_ind_reg_list(register_list_format,
3629                                 RLC_FormatDirectRegListLength,
3630                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3631                                 unique_indices,
3632                                 &indices_count,
3633                                 sizeof(unique_indices) / sizeof(int),
3634                                 indirect_start_offsets,
3635                                 &offset_count,
3636                                 sizeof(indirect_start_offsets)/sizeof(int));
3637
3638         /* save and restore list */
3639         temp = RREG32(mmRLC_SRM_CNTL);
3640         temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3641         WREG32(mmRLC_SRM_CNTL, temp);
3642
3643         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3644         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3645                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3646
3647         /* indirect list */
3648         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3649         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3650                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3651
3652         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3653         list_size = list_size >> 1;
3654         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3655         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3656
3657         /* starting offsets starts */
3658         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3659                 adev->gfx.rlc.starting_offsets_start);
3660         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3661                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3662                                 indirect_start_offsets[i]);
3663
3664         /* unique indices */
3665         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3666         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3667         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3668                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3669                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3670         }
3671         kfree(register_list_format);
3672
3673         return 0;
3674 }
3675
3676 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3677 {
3678         uint32_t data;
3679
3680         data = RREG32(mmRLC_SRM_CNTL);
3681         data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3682         WREG32(mmRLC_SRM_CNTL, data);
3683 }
3684
3685 static void polaris11_init_power_gating(struct amdgpu_device *adev)
3686 {
3687         uint32_t data;
3688
3689         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3690                         AMD_PG_SUPPORT_GFX_SMG |
3691                         AMD_PG_SUPPORT_GFX_DMG)) {
3692                 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3693                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3694                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3695                 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3696
3697                 data = 0;
3698                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3699                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3700                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3701                 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3702                 WREG32(mmRLC_PG_DELAY, data);
3703
3704                 data = RREG32(mmRLC_PG_DELAY_2);
3705                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3706                 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3707                 WREG32(mmRLC_PG_DELAY_2, data);
3708
3709                 data = RREG32(mmRLC_AUTO_PG_CTRL);
3710                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3711                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3712                 WREG32(mmRLC_AUTO_PG_CTRL, data);
3713         }
3714 }
3715
3716 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3717 {
3718         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3719                               AMD_PG_SUPPORT_GFX_SMG |
3720                               AMD_PG_SUPPORT_GFX_DMG |
3721                               AMD_PG_SUPPORT_CP |
3722                               AMD_PG_SUPPORT_GDS |
3723                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3724                 gfx_v8_0_init_csb(adev);
3725                 gfx_v8_0_init_save_restore_list(adev);
3726                 gfx_v8_0_enable_save_restore_machine(adev);
3727
3728                 if (adev->asic_type == CHIP_POLARIS11)
3729                         polaris11_init_power_gating(adev);
3730         }
3731 }
3732
3733 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3734 {
3735         u32 tmp = RREG32(mmRLC_CNTL);
3736
3737         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3738         WREG32(mmRLC_CNTL, tmp);
3739
3740         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3741
3742         gfx_v8_0_wait_for_rlc_serdes(adev);
3743 }
3744
3745 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3746 {
3747         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3748
3749         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3750         WREG32(mmGRBM_SOFT_RESET, tmp);
3751         udelay(50);
3752         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3753         WREG32(mmGRBM_SOFT_RESET, tmp);
3754         udelay(50);
3755 }
3756
3757 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3758 {
3759         u32 tmp = RREG32(mmRLC_CNTL);
3760
3761         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3762         WREG32(mmRLC_CNTL, tmp);
3763
3764         /* carrizo do enable cp interrupt after cp inited */
3765         if (!(adev->flags & AMD_IS_APU))
3766                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3767
3768         udelay(50);
3769 }
3770
3771 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3772 {
3773         const struct rlc_firmware_header_v2_0 *hdr;
3774         const __le32 *fw_data;
3775         unsigned i, fw_size;
3776
3777         if (!adev->gfx.rlc_fw)
3778                 return -EINVAL;
3779
3780         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3781         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3782
3783         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3784                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3785         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3786
3787         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3788         for (i = 0; i < fw_size; i++)
3789                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3790         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3791
3792         return 0;
3793 }
3794
3795 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3796 {
3797         int r;
3798
3799         gfx_v8_0_rlc_stop(adev);
3800
3801         /* disable CG */
3802         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3803         if (adev->asic_type == CHIP_POLARIS11 ||
3804                 adev->asic_type == CHIP_POLARIS10)
3805                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3806
3807         /* disable PG */
3808         WREG32(mmRLC_PG_CNTL, 0);
3809
3810         gfx_v8_0_rlc_reset(adev);
3811
3812         gfx_v8_0_init_pg(adev);
3813
3814         if (!adev->pp_enabled) {
3815                 if (!adev->firmware.smu_load) {
3816                         /* legacy rlc firmware loading */
3817                         r = gfx_v8_0_rlc_load_microcode(adev);
3818                         if (r)
3819                                 return r;
3820                 } else {
3821                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3822                                                         AMDGPU_UCODE_ID_RLC_G);
3823                         if (r)
3824                                 return -EINVAL;
3825                 }
3826         }
3827
3828         gfx_v8_0_rlc_start(adev);
3829
3830         return 0;
3831 }
3832
3833 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3834 {
3835         int i;
3836         u32 tmp = RREG32(mmCP_ME_CNTL);
3837
3838         if (enable) {
3839                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3840                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3841                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3842         } else {
3843                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3844                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3845                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3846                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3847                         adev->gfx.gfx_ring[i].ready = false;
3848         }
3849         WREG32(mmCP_ME_CNTL, tmp);
3850         udelay(50);
3851 }
3852
3853 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3854 {
3855         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3856         const struct gfx_firmware_header_v1_0 *ce_hdr;
3857         const struct gfx_firmware_header_v1_0 *me_hdr;
3858         const __le32 *fw_data;
3859         unsigned i, fw_size;
3860
3861         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3862                 return -EINVAL;
3863
3864         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3865                 adev->gfx.pfp_fw->data;
3866         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3867                 adev->gfx.ce_fw->data;
3868         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3869                 adev->gfx.me_fw->data;
3870
3871         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3872         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3873         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3874
3875         gfx_v8_0_cp_gfx_enable(adev, false);
3876
3877         /* PFP */
3878         fw_data = (const __le32 *)
3879                 (adev->gfx.pfp_fw->data +
3880                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3881         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3882         WREG32(mmCP_PFP_UCODE_ADDR, 0);
3883         for (i = 0; i < fw_size; i++)
3884                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3885         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3886
3887         /* CE */
3888         fw_data = (const __le32 *)
3889                 (adev->gfx.ce_fw->data +
3890                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3891         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3892         WREG32(mmCP_CE_UCODE_ADDR, 0);
3893         for (i = 0; i < fw_size; i++)
3894                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3895         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3896
3897         /* ME */
3898         fw_data = (const __le32 *)
3899                 (adev->gfx.me_fw->data +
3900                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3901         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3902         WREG32(mmCP_ME_RAM_WADDR, 0);
3903         for (i = 0; i < fw_size; i++)
3904                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3905         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3906
3907         return 0;
3908 }
3909
3910 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3911 {
3912         u32 count = 0;
3913         const struct cs_section_def *sect = NULL;
3914         const struct cs_extent_def *ext = NULL;
3915
3916         /* begin clear state */
3917         count += 2;
3918         /* context control state */
3919         count += 3;
3920
3921         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3922                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3923                         if (sect->id == SECT_CONTEXT)
3924                                 count += 2 + ext->reg_count;
3925                         else
3926                                 return 0;
3927                 }
3928         }
3929         /* pa_sc_raster_config/pa_sc_raster_config1 */
3930         count += 4;
3931         /* end clear state */
3932         count += 2;
3933         /* clear state */
3934         count += 2;
3935
3936         return count;
3937 }
3938
3939 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3940 {
3941         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3942         const struct cs_section_def *sect = NULL;
3943         const struct cs_extent_def *ext = NULL;
3944         int r, i;
3945
3946         /* init the CP */
3947         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3948         WREG32(mmCP_ENDIAN_SWAP, 0);
3949         WREG32(mmCP_DEVICE_ID, 1);
3950
3951         gfx_v8_0_cp_gfx_enable(adev, true);
3952
3953         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3954         if (r) {
3955                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3956                 return r;
3957         }
3958
3959         /* clear state buffer */
3960         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3961         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3962
3963         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3964         amdgpu_ring_write(ring, 0x80000000);
3965         amdgpu_ring_write(ring, 0x80000000);
3966
3967         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3968                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3969                         if (sect->id == SECT_CONTEXT) {
3970                                 amdgpu_ring_write(ring,
3971                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3972                                                ext->reg_count));
3973                                 amdgpu_ring_write(ring,
3974                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3975                                 for (i = 0; i < ext->reg_count; i++)
3976                                         amdgpu_ring_write(ring, ext->extent[i]);
3977                         }
3978                 }
3979         }
3980
3981         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3982         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3983         switch (adev->asic_type) {
3984         case CHIP_TONGA:
3985         case CHIP_POLARIS10:
3986                 amdgpu_ring_write(ring, 0x16000012);
3987                 amdgpu_ring_write(ring, 0x0000002A);
3988                 break;
3989         case CHIP_POLARIS11:
3990                 amdgpu_ring_write(ring, 0x16000012);
3991                 amdgpu_ring_write(ring, 0x00000000);
3992                 break;
3993         case CHIP_FIJI:
3994                 amdgpu_ring_write(ring, 0x3a00161a);
3995                 amdgpu_ring_write(ring, 0x0000002e);
3996                 break;
3997         case CHIP_CARRIZO:
3998                 amdgpu_ring_write(ring, 0x00000002);
3999                 amdgpu_ring_write(ring, 0x00000000);
4000                 break;
4001         case CHIP_TOPAZ:
4002                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4003                                 0x00000000 : 0x00000002);
4004                 amdgpu_ring_write(ring, 0x00000000);
4005                 break;
4006         case CHIP_STONEY:
4007                 amdgpu_ring_write(ring, 0x00000000);
4008                 amdgpu_ring_write(ring, 0x00000000);
4009                 break;
4010         default:
4011                 BUG();
4012         }
4013
4014         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4015         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4016
4017         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4018         amdgpu_ring_write(ring, 0);
4019
4020         /* init the CE partitions */
4021         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4022         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4023         amdgpu_ring_write(ring, 0x8000);
4024         amdgpu_ring_write(ring, 0x8000);
4025
4026         amdgpu_ring_commit(ring);
4027
4028         return 0;
4029 }
4030
4031 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4032 {
4033         struct amdgpu_ring *ring;
4034         u32 tmp;
4035         u32 rb_bufsz;
4036         u64 rb_addr, rptr_addr;
4037         int r;
4038
4039         /* Set the write pointer delay */
4040         WREG32(mmCP_RB_WPTR_DELAY, 0);
4041
4042         /* set the RB to use vmid 0 */
4043         WREG32(mmCP_RB_VMID, 0);
4044
4045         /* Set ring buffer size */
4046         ring = &adev->gfx.gfx_ring[0];
4047         rb_bufsz = order_base_2(ring->ring_size / 8);
4048         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4049         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4050         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4051         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4052 #ifdef __BIG_ENDIAN
4053         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4054 #endif
4055         WREG32(mmCP_RB0_CNTL, tmp);
4056
4057         /* Initialize the ring buffer's read and write pointers */
4058         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4059         ring->wptr = 0;
4060         WREG32(mmCP_RB0_WPTR, ring->wptr);
4061
4062         /* set the wb address wether it's enabled or not */
4063         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4064         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4065         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4066
4067         mdelay(1);
4068         WREG32(mmCP_RB0_CNTL, tmp);
4069
4070         rb_addr = ring->gpu_addr >> 8;
4071         WREG32(mmCP_RB0_BASE, rb_addr);
4072         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4073
4074         /* no gfx doorbells on iceland */
4075         if (adev->asic_type != CHIP_TOPAZ) {
4076                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4077                 if (ring->use_doorbell) {
4078                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4079                                             DOORBELL_OFFSET, ring->doorbell_index);
4080                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4081                                             DOORBELL_HIT, 0);
4082                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4083                                             DOORBELL_EN, 1);
4084                 } else {
4085                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4086                                             DOORBELL_EN, 0);
4087                 }
4088                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4089
4090                 if (adev->asic_type == CHIP_TONGA) {
4091                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4092                                             DOORBELL_RANGE_LOWER,
4093                                             AMDGPU_DOORBELL_GFX_RING0);
4094                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4095
4096                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4097                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4098                 }
4099
4100         }
4101
4102         /* start the ring */
4103         gfx_v8_0_cp_gfx_start(adev);
4104         ring->ready = true;
4105         r = amdgpu_ring_test_ring(ring);
4106         if (r) {
4107                 ring->ready = false;
4108                 return r;
4109         }
4110
4111         return 0;
4112 }
4113
4114 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4115 {
4116         int i;
4117
4118         if (enable) {
4119                 WREG32(mmCP_MEC_CNTL, 0);
4120         } else {
4121                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4122                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4123                         adev->gfx.compute_ring[i].ready = false;
4124         }
4125         udelay(50);
4126 }
4127
4128 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4129 {
4130         const struct gfx_firmware_header_v1_0 *mec_hdr;
4131         const __le32 *fw_data;
4132         unsigned i, fw_size;
4133
4134         if (!adev->gfx.mec_fw)
4135                 return -EINVAL;
4136
4137         gfx_v8_0_cp_compute_enable(adev, false);
4138
4139         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4140         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4141
4142         fw_data = (const __le32 *)
4143                 (adev->gfx.mec_fw->data +
4144                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4145         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4146
4147         /* MEC1 */
4148         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4149         for (i = 0; i < fw_size; i++)
4150                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4151         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4152
4153         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4154         if (adev->gfx.mec2_fw) {
4155                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4156
4157                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4158                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4159
4160                 fw_data = (const __le32 *)
4161                         (adev->gfx.mec2_fw->data +
4162                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4163                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4164
4165                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4166                 for (i = 0; i < fw_size; i++)
4167                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4168                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4169         }
4170
4171         return 0;
4172 }
4173
4174 struct vi_mqd {
4175         uint32_t header;  /* ordinal0 */
4176         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4177         uint32_t compute_dim_x;  /* ordinal2 */
4178         uint32_t compute_dim_y;  /* ordinal3 */
4179         uint32_t compute_dim_z;  /* ordinal4 */
4180         uint32_t compute_start_x;  /* ordinal5 */
4181         uint32_t compute_start_y;  /* ordinal6 */
4182         uint32_t compute_start_z;  /* ordinal7 */
4183         uint32_t compute_num_thread_x;  /* ordinal8 */
4184         uint32_t compute_num_thread_y;  /* ordinal9 */
4185         uint32_t compute_num_thread_z;  /* ordinal10 */
4186         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4187         uint32_t compute_perfcount_enable;  /* ordinal12 */
4188         uint32_t compute_pgm_lo;  /* ordinal13 */
4189         uint32_t compute_pgm_hi;  /* ordinal14 */
4190         uint32_t compute_tba_lo;  /* ordinal15 */
4191         uint32_t compute_tba_hi;  /* ordinal16 */
4192         uint32_t compute_tma_lo;  /* ordinal17 */
4193         uint32_t compute_tma_hi;  /* ordinal18 */
4194         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4195         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4196         uint32_t compute_vmid;  /* ordinal21 */
4197         uint32_t compute_resource_limits;  /* ordinal22 */
4198         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4199         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4200         uint32_t compute_tmpring_size;  /* ordinal25 */
4201         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4202         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4203         uint32_t compute_restart_x;  /* ordinal28 */
4204         uint32_t compute_restart_y;  /* ordinal29 */
4205         uint32_t compute_restart_z;  /* ordinal30 */
4206         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4207         uint32_t compute_misc_reserved;  /* ordinal32 */
4208         uint32_t compute_dispatch_id;  /* ordinal33 */
4209         uint32_t compute_threadgroup_id;  /* ordinal34 */
4210         uint32_t compute_relaunch;  /* ordinal35 */
4211         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4212         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4213         uint32_t compute_wave_restore_control;  /* ordinal38 */
4214         uint32_t reserved9;  /* ordinal39 */
4215         uint32_t reserved10;  /* ordinal40 */
4216         uint32_t reserved11;  /* ordinal41 */
4217         uint32_t reserved12;  /* ordinal42 */
4218         uint32_t reserved13;  /* ordinal43 */
4219         uint32_t reserved14;  /* ordinal44 */
4220         uint32_t reserved15;  /* ordinal45 */
4221         uint32_t reserved16;  /* ordinal46 */
4222         uint32_t reserved17;  /* ordinal47 */
4223         uint32_t reserved18;  /* ordinal48 */
4224         uint32_t reserved19;  /* ordinal49 */
4225         uint32_t reserved20;  /* ordinal50 */
4226         uint32_t reserved21;  /* ordinal51 */
4227         uint32_t reserved22;  /* ordinal52 */
4228         uint32_t reserved23;  /* ordinal53 */
4229         uint32_t reserved24;  /* ordinal54 */
4230         uint32_t reserved25;  /* ordinal55 */
4231         uint32_t reserved26;  /* ordinal56 */
4232         uint32_t reserved27;  /* ordinal57 */
4233         uint32_t reserved28;  /* ordinal58 */
4234         uint32_t reserved29;  /* ordinal59 */
4235         uint32_t reserved30;  /* ordinal60 */
4236         uint32_t reserved31;  /* ordinal61 */
4237         uint32_t reserved32;  /* ordinal62 */
4238         uint32_t reserved33;  /* ordinal63 */
4239         uint32_t reserved34;  /* ordinal64 */
4240         uint32_t compute_user_data_0;  /* ordinal65 */
4241         uint32_t compute_user_data_1;  /* ordinal66 */
4242         uint32_t compute_user_data_2;  /* ordinal67 */
4243         uint32_t compute_user_data_3;  /* ordinal68 */
4244         uint32_t compute_user_data_4;  /* ordinal69 */
4245         uint32_t compute_user_data_5;  /* ordinal70 */
4246         uint32_t compute_user_data_6;  /* ordinal71 */
4247         uint32_t compute_user_data_7;  /* ordinal72 */
4248         uint32_t compute_user_data_8;  /* ordinal73 */
4249         uint32_t compute_user_data_9;  /* ordinal74 */
4250         uint32_t compute_user_data_10;  /* ordinal75 */
4251         uint32_t compute_user_data_11;  /* ordinal76 */
4252         uint32_t compute_user_data_12;  /* ordinal77 */
4253         uint32_t compute_user_data_13;  /* ordinal78 */
4254         uint32_t compute_user_data_14;  /* ordinal79 */
4255         uint32_t compute_user_data_15;  /* ordinal80 */
4256         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4257         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4258         uint32_t reserved35;  /* ordinal83 */
4259         uint32_t reserved36;  /* ordinal84 */
4260         uint32_t reserved37;  /* ordinal85 */
4261         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4262         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4263         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4264         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4265         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4266         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4267         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4268         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4269         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4270         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4271         uint32_t reserved38;  /* ordinal96 */
4272         uint32_t reserved39;  /* ordinal97 */
4273         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4274         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4275         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4276         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4277         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4278         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4279         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4280         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4281         uint32_t reserved40;  /* ordinal106 */
4282         uint32_t reserved41;  /* ordinal107 */
4283         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4284         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4285         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4286         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4287         uint32_t reserved42;  /* ordinal112 */
4288         uint32_t reserved43;  /* ordinal113 */
4289         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4290         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4291         uint32_t cp_packet_id_lo;  /* ordinal116 */
4292         uint32_t cp_packet_id_hi;  /* ordinal117 */
4293         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4294         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4295         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4296         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4297         uint32_t gds_save_mask_lo;  /* ordinal122 */
4298         uint32_t gds_save_mask_hi;  /* ordinal123 */
4299         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4300         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4301         uint32_t reserved44;  /* ordinal126 */
4302         uint32_t reserved45;  /* ordinal127 */
4303         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4304         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4305         uint32_t cp_hqd_active;  /* ordinal130 */
4306         uint32_t cp_hqd_vmid;  /* ordinal131 */
4307         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4308         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4309         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4310         uint32_t cp_hqd_quantum;  /* ordinal135 */
4311         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4312         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4313         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4314         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4315         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4316         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4317         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4318         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4319         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4320         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4321         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4322         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4323         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4324         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4325         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4326         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4327         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4328         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4329         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4330         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4331         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4332         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4333         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4334         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4335         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4336         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4337         uint32_t cp_mqd_control;  /* ordinal162 */
4338         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4339         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4340         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4341         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4342         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4343         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4344         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4345         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4346         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4347         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4348         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4349         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4350         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4351         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4352         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4353         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4354         uint32_t cp_hqd_error;  /* ordinal179 */
4355         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4356         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4357         uint32_t reserved46;  /* ordinal182 */
4358         uint32_t reserved47;  /* ordinal183 */
4359         uint32_t reserved48;  /* ordinal184 */
4360         uint32_t reserved49;  /* ordinal185 */
4361         uint32_t reserved50;  /* ordinal186 */
4362         uint32_t reserved51;  /* ordinal187 */
4363         uint32_t reserved52;  /* ordinal188 */
4364         uint32_t reserved53;  /* ordinal189 */
4365         uint32_t reserved54;  /* ordinal190 */
4366         uint32_t reserved55;  /* ordinal191 */
4367         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4368         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4369         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4370         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4371         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4372         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4373         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4374         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4375         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4376         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4377         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4378         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4379         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4380         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4381         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4382         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4383         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4384         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4385         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4386         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4387         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4388         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4389         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4390         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4391         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4392         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4393         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4394         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4395         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4396         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4397         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4398         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4399         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4400         uint32_t reserved56;  /* ordinal225 */
4401         uint32_t reserved57;  /* ordinal226 */
4402         uint32_t reserved58;  /* ordinal227 */
4403         uint32_t set_resources_header;  /* ordinal228 */
4404         uint32_t set_resources_dw1;  /* ordinal229 */
4405         uint32_t set_resources_dw2;  /* ordinal230 */
4406         uint32_t set_resources_dw3;  /* ordinal231 */
4407         uint32_t set_resources_dw4;  /* ordinal232 */
4408         uint32_t set_resources_dw5;  /* ordinal233 */
4409         uint32_t set_resources_dw6;  /* ordinal234 */
4410         uint32_t set_resources_dw7;  /* ordinal235 */
4411         uint32_t reserved59;  /* ordinal236 */
4412         uint32_t reserved60;  /* ordinal237 */
4413         uint32_t reserved61;  /* ordinal238 */
4414         uint32_t reserved62;  /* ordinal239 */
4415         uint32_t reserved63;  /* ordinal240 */
4416         uint32_t reserved64;  /* ordinal241 */
4417         uint32_t reserved65;  /* ordinal242 */
4418         uint32_t reserved66;  /* ordinal243 */
4419         uint32_t reserved67;  /* ordinal244 */
4420         uint32_t reserved68;  /* ordinal245 */
4421         uint32_t reserved69;  /* ordinal246 */
4422         uint32_t reserved70;  /* ordinal247 */
4423         uint32_t reserved71;  /* ordinal248 */
4424         uint32_t reserved72;  /* ordinal249 */
4425         uint32_t reserved73;  /* ordinal250 */
4426         uint32_t reserved74;  /* ordinal251 */
4427         uint32_t reserved75;  /* ordinal252 */
4428         uint32_t reserved76;  /* ordinal253 */
4429         uint32_t reserved77;  /* ordinal254 */
4430         uint32_t reserved78;  /* ordinal255 */
4431
4432         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4433 };
4434
4435 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4436 {
4437         int i, r;
4438
4439         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4440                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4441
4442                 if (ring->mqd_obj) {
4443                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4444                         if (unlikely(r != 0))
4445                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4446
4447                         amdgpu_bo_unpin(ring->mqd_obj);
4448                         amdgpu_bo_unreserve(ring->mqd_obj);
4449
4450                         amdgpu_bo_unref(&ring->mqd_obj);
4451                         ring->mqd_obj = NULL;
4452                 }
4453         }
4454 }
4455
4456 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4457 {
4458         int r, i, j;
4459         u32 tmp;
4460         bool use_doorbell = true;
4461         u64 hqd_gpu_addr;
4462         u64 mqd_gpu_addr;
4463         u64 eop_gpu_addr;
4464         u64 wb_gpu_addr;
4465         u32 *buf;
4466         struct vi_mqd *mqd;
4467
4468         /* init the pipes */
4469         mutex_lock(&adev->srbm_mutex);
4470         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4471                 int me = (i < 4) ? 1 : 2;
4472                 int pipe = (i < 4) ? i : (i - 4);
4473
4474                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4475                 eop_gpu_addr >>= 8;
4476
4477                 vi_srbm_select(adev, me, pipe, 0, 0);
4478
4479                 /* write the EOP addr */
4480                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4481                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4482
4483                 /* set the VMID assigned */
4484                 WREG32(mmCP_HQD_VMID, 0);
4485
4486                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4487                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4488                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4489                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4490                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4491         }
4492         vi_srbm_select(adev, 0, 0, 0, 0);
4493         mutex_unlock(&adev->srbm_mutex);
4494
4495         /* init the queues.  Just two for now. */
4496         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4497                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4498
4499                 if (ring->mqd_obj == NULL) {
4500                         r = amdgpu_bo_create(adev,
4501                                              sizeof(struct vi_mqd),
4502                                              PAGE_SIZE, true,
4503                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4504                                              NULL, &ring->mqd_obj);
4505                         if (r) {
4506                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4507                                 return r;
4508                         }
4509                 }
4510
4511                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4512                 if (unlikely(r != 0)) {
4513                         gfx_v8_0_cp_compute_fini(adev);
4514                         return r;
4515                 }
4516                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4517                                   &mqd_gpu_addr);
4518                 if (r) {
4519                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4520                         gfx_v8_0_cp_compute_fini(adev);
4521                         return r;
4522                 }
4523                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4524                 if (r) {
4525                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4526                         gfx_v8_0_cp_compute_fini(adev);
4527                         return r;
4528                 }
4529
4530                 /* init the mqd struct */
4531                 memset(buf, 0, sizeof(struct vi_mqd));
4532
4533                 mqd = (struct vi_mqd *)buf;
4534                 mqd->header = 0xC0310800;
4535                 mqd->compute_pipelinestat_enable = 0x00000001;
4536                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4537                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4538                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4539                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4540                 mqd->compute_misc_reserved = 0x00000003;
4541
4542                 mutex_lock(&adev->srbm_mutex);
4543                 vi_srbm_select(adev, ring->me,
4544                                ring->pipe,
4545                                ring->queue, 0);
4546
4547                 /* disable wptr polling */
4548                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4549                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4550                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4551
4552                 mqd->cp_hqd_eop_base_addr_lo =
4553                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4554                 mqd->cp_hqd_eop_base_addr_hi =
4555                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4556
4557                 /* enable doorbell? */
4558                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4559                 if (use_doorbell) {
4560                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4561                 } else {
4562                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4563                 }
4564                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4565                 mqd->cp_hqd_pq_doorbell_control = tmp;
4566
4567                 /* disable the queue if it's active */
4568                 mqd->cp_hqd_dequeue_request = 0;
4569                 mqd->cp_hqd_pq_rptr = 0;
4570                 mqd->cp_hqd_pq_wptr= 0;
4571                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4572                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4573                         for (j = 0; j < adev->usec_timeout; j++) {
4574                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4575                                         break;
4576                                 udelay(1);
4577                         }
4578                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4579                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4580                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4581                 }
4582
4583                 /* set the pointer to the MQD */
4584                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4585                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4586                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4587                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4588
4589                 /* set MQD vmid to 0 */
4590                 tmp = RREG32(mmCP_MQD_CONTROL);
4591                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4592                 WREG32(mmCP_MQD_CONTROL, tmp);
4593                 mqd->cp_mqd_control = tmp;
4594
4595                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4596                 hqd_gpu_addr = ring->gpu_addr >> 8;
4597                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4598                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4599                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4600                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4601
4602                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4603                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4604                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4605                                     (order_base_2(ring->ring_size / 4) - 1));
4606                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4607                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4608 #ifdef __BIG_ENDIAN
4609                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4610 #endif
4611                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4612                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4613                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4614                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4615                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4616                 mqd->cp_hqd_pq_control = tmp;
4617
4618                 /* set the wb address wether it's enabled or not */
4619                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4620                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4621                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4622                         upper_32_bits(wb_gpu_addr) & 0xffff;
4623                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4624                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4625                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4626                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4627
4628                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4629                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4630                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4631                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4632                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4633                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4634                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4635
4636                 /* enable the doorbell if requested */
4637                 if (use_doorbell) {
4638                         if ((adev->asic_type == CHIP_CARRIZO) ||
4639                             (adev->asic_type == CHIP_FIJI) ||
4640                             (adev->asic_type == CHIP_STONEY) ||
4641                             (adev->asic_type == CHIP_POLARIS11) ||
4642                             (adev->asic_type == CHIP_POLARIS10)) {
4643                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4644                                        AMDGPU_DOORBELL_KIQ << 2);
4645                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4646                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4647                         }
4648                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4649                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4650                                             DOORBELL_OFFSET, ring->doorbell_index);
4651                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4652                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4653                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4654                         mqd->cp_hqd_pq_doorbell_control = tmp;
4655
4656                 } else {
4657                         mqd->cp_hqd_pq_doorbell_control = 0;
4658                 }
4659                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4660                        mqd->cp_hqd_pq_doorbell_control);
4661
4662                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4663                 ring->wptr = 0;
4664                 mqd->cp_hqd_pq_wptr = ring->wptr;
4665                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4666                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4667
4668                 /* set the vmid for the queue */
4669                 mqd->cp_hqd_vmid = 0;
4670                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4671
4672                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4673                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4674                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4675                 mqd->cp_hqd_persistent_state = tmp;
4676                 if (adev->asic_type == CHIP_STONEY ||
4677                         adev->asic_type == CHIP_POLARIS11 ||
4678                         adev->asic_type == CHIP_POLARIS10) {
4679                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4680                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4681                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4682                 }
4683
4684                 /* activate the queue */
4685                 mqd->cp_hqd_active = 1;
4686                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4687
4688                 vi_srbm_select(adev, 0, 0, 0, 0);
4689                 mutex_unlock(&adev->srbm_mutex);
4690
4691                 amdgpu_bo_kunmap(ring->mqd_obj);
4692                 amdgpu_bo_unreserve(ring->mqd_obj);
4693         }
4694
4695         if (use_doorbell) {
4696                 tmp = RREG32(mmCP_PQ_STATUS);
4697                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4698                 WREG32(mmCP_PQ_STATUS, tmp);
4699         }
4700
4701         gfx_v8_0_cp_compute_enable(adev, true);
4702
4703         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4704                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4705
4706                 ring->ready = true;
4707                 r = amdgpu_ring_test_ring(ring);
4708                 if (r)
4709                         ring->ready = false;
4710         }
4711
4712         return 0;
4713 }
4714
4715 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4716 {
4717         int r;
4718
4719         if (!(adev->flags & AMD_IS_APU))
4720                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4721
4722         if (!adev->pp_enabled) {
4723                 if (!adev->firmware.smu_load) {
4724                         /* legacy firmware loading */
4725                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4726                         if (r)
4727                                 return r;
4728
4729                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4730                         if (r)
4731                                 return r;
4732                 } else {
4733                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4734                                                         AMDGPU_UCODE_ID_CP_CE);
4735                         if (r)
4736                                 return -EINVAL;
4737
4738                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4739                                                         AMDGPU_UCODE_ID_CP_PFP);
4740                         if (r)
4741                                 return -EINVAL;
4742
4743                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4744                                                         AMDGPU_UCODE_ID_CP_ME);
4745                         if (r)
4746                                 return -EINVAL;
4747
4748                         if (adev->asic_type == CHIP_TOPAZ) {
4749                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4750                                 if (r)
4751                                         return r;
4752                         } else {
4753                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4754                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4755                                 if (r)
4756                                         return -EINVAL;
4757                         }
4758                 }
4759         }
4760
4761         r = gfx_v8_0_cp_gfx_resume(adev);
4762         if (r)
4763                 return r;
4764
4765         r = gfx_v8_0_cp_compute_resume(adev);
4766         if (r)
4767                 return r;
4768
4769         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4770
4771         return 0;
4772 }
4773
4774 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4775 {
4776         gfx_v8_0_cp_gfx_enable(adev, enable);
4777         gfx_v8_0_cp_compute_enable(adev, enable);
4778 }
4779
4780 static int gfx_v8_0_hw_init(void *handle)
4781 {
4782         int r;
4783         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4784
4785         gfx_v8_0_init_golden_registers(adev);
4786
4787         gfx_v8_0_gpu_init(adev);
4788
4789         r = gfx_v8_0_rlc_resume(adev);
4790         if (r)
4791                 return r;
4792
4793         r = gfx_v8_0_cp_resume(adev);
4794         if (r)
4795                 return r;
4796
4797         return r;
4798 }
4799
4800 static int gfx_v8_0_hw_fini(void *handle)
4801 {
4802         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4803
4804         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4805         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4806         gfx_v8_0_cp_enable(adev, false);
4807         gfx_v8_0_rlc_stop(adev);
4808         gfx_v8_0_cp_compute_fini(adev);
4809
4810         amdgpu_set_powergating_state(adev,
4811                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4812
4813         return 0;
4814 }
4815
4816 static int gfx_v8_0_suspend(void *handle)
4817 {
4818         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4819
4820         return gfx_v8_0_hw_fini(adev);
4821 }
4822
4823 static int gfx_v8_0_resume(void *handle)
4824 {
4825         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4826
4827         return gfx_v8_0_hw_init(adev);
4828 }
4829
4830 static bool gfx_v8_0_is_idle(void *handle)
4831 {
4832         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4833
4834         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4835                 return false;
4836         else
4837                 return true;
4838 }
4839
4840 static int gfx_v8_0_wait_for_idle(void *handle)
4841 {
4842         unsigned i;
4843         u32 tmp;
4844         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4845
4846         for (i = 0; i < adev->usec_timeout; i++) {
4847                 /* read MC_STATUS */
4848                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4849
4850                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4851                         return 0;
4852                 udelay(1);
4853         }
4854         return -ETIMEDOUT;
4855 }
4856
4857 static int gfx_v8_0_soft_reset(void *handle)
4858 {
4859         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4860         u32 tmp;
4861         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4862
4863         /* GRBM_STATUS */
4864         tmp = RREG32(mmGRBM_STATUS);
4865         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4866                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4867                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4868                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4869                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4870                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4871                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4872                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4873                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4874                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4875         }
4876
4877         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4878                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4879                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4880                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4881                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4882         }
4883
4884         /* GRBM_STATUS2 */
4885         tmp = RREG32(mmGRBM_STATUS2);
4886         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4887                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4888                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4889
4890         /* SRBM_STATUS */
4891         tmp = RREG32(mmSRBM_STATUS);
4892         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4893                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4894                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4895
4896         if (grbm_soft_reset || srbm_soft_reset) {
4897                 /* stop the rlc */
4898                 gfx_v8_0_rlc_stop(adev);
4899
4900                 /* Disable GFX parsing/prefetching */
4901                 gfx_v8_0_cp_gfx_enable(adev, false);
4902
4903                 /* Disable MEC parsing/prefetching */
4904                 gfx_v8_0_cp_compute_enable(adev, false);
4905
4906                 if (grbm_soft_reset || srbm_soft_reset) {
4907                         tmp = RREG32(mmGMCON_DEBUG);
4908                         tmp = REG_SET_FIELD(tmp,
4909                                             GMCON_DEBUG, GFX_STALL, 1);
4910                         tmp = REG_SET_FIELD(tmp,
4911                                             GMCON_DEBUG, GFX_CLEAR, 1);
4912                         WREG32(mmGMCON_DEBUG, tmp);
4913
4914                         udelay(50);
4915                 }
4916
4917                 if (grbm_soft_reset) {
4918                         tmp = RREG32(mmGRBM_SOFT_RESET);
4919                         tmp |= grbm_soft_reset;
4920                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4921                         WREG32(mmGRBM_SOFT_RESET, tmp);
4922                         tmp = RREG32(mmGRBM_SOFT_RESET);
4923
4924                         udelay(50);
4925
4926                         tmp &= ~grbm_soft_reset;
4927                         WREG32(mmGRBM_SOFT_RESET, tmp);
4928                         tmp = RREG32(mmGRBM_SOFT_RESET);
4929                 }
4930
4931                 if (srbm_soft_reset) {
4932                         tmp = RREG32(mmSRBM_SOFT_RESET);
4933                         tmp |= srbm_soft_reset;
4934                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4935                         WREG32(mmSRBM_SOFT_RESET, tmp);
4936                         tmp = RREG32(mmSRBM_SOFT_RESET);
4937
4938                         udelay(50);
4939
4940                         tmp &= ~srbm_soft_reset;
4941                         WREG32(mmSRBM_SOFT_RESET, tmp);
4942                         tmp = RREG32(mmSRBM_SOFT_RESET);
4943                 }
4944
4945                 if (grbm_soft_reset || srbm_soft_reset) {
4946                         tmp = RREG32(mmGMCON_DEBUG);
4947                         tmp = REG_SET_FIELD(tmp,
4948                                             GMCON_DEBUG, GFX_STALL, 0);
4949                         tmp = REG_SET_FIELD(tmp,
4950                                             GMCON_DEBUG, GFX_CLEAR, 0);
4951                         WREG32(mmGMCON_DEBUG, tmp);
4952                 }
4953
4954                 /* Wait a little for things to settle down */
4955                 udelay(50);
4956         }
4957         return 0;
4958 }
4959
4960 /**
4961  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4962  *
4963  * @adev: amdgpu_device pointer
4964  *
4965  * Fetches a GPU clock counter snapshot.
4966  * Returns the 64 bit clock counter snapshot.
4967  */
4968 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4969 {
4970         uint64_t clock;
4971
4972         mutex_lock(&adev->gfx.gpu_clock_mutex);
4973         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4974         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4975                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4976         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4977         return clock;
4978 }
4979
4980 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4981                                           uint32_t vmid,
4982                                           uint32_t gds_base, uint32_t gds_size,
4983                                           uint32_t gws_base, uint32_t gws_size,
4984                                           uint32_t oa_base, uint32_t oa_size)
4985 {
4986         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4987         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4988
4989         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4990         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4991
4992         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4993         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4994
4995         /* GDS Base */
4996         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4997         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4998                                 WRITE_DATA_DST_SEL(0)));
4999         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5000         amdgpu_ring_write(ring, 0);
5001         amdgpu_ring_write(ring, gds_base);
5002
5003         /* GDS Size */
5004         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5005         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5006                                 WRITE_DATA_DST_SEL(0)));
5007         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5008         amdgpu_ring_write(ring, 0);
5009         amdgpu_ring_write(ring, gds_size);
5010
5011         /* GWS */
5012         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5013         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5014                                 WRITE_DATA_DST_SEL(0)));
5015         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5016         amdgpu_ring_write(ring, 0);
5017         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5018
5019         /* OA */
5020         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5021         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5022                                 WRITE_DATA_DST_SEL(0)));
5023         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5024         amdgpu_ring_write(ring, 0);
5025         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5026 }
5027
5028 static int gfx_v8_0_early_init(void *handle)
5029 {
5030         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5031
5032         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5033         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5034         gfx_v8_0_set_ring_funcs(adev);
5035         gfx_v8_0_set_irq_funcs(adev);
5036         gfx_v8_0_set_gds_init(adev);
5037         gfx_v8_0_set_rlc_funcs(adev);
5038
5039         return 0;
5040 }
5041
5042 static int gfx_v8_0_late_init(void *handle)
5043 {
5044         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5045         int r;
5046
5047         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5048         if (r)
5049                 return r;
5050
5051         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5052         if (r)
5053                 return r;
5054
5055         /* requires IBs so do in late init after IB pool is initialized */
5056         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5057         if (r)
5058                 return r;
5059
5060         amdgpu_set_powergating_state(adev,
5061                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5062
5063         return 0;
5064 }
5065
5066 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5067                 bool enable)
5068 {
5069         uint32_t data, temp;
5070
5071         /* Send msg to SMU via Powerplay */
5072         amdgpu_set_powergating_state(adev,
5073                         AMD_IP_BLOCK_TYPE_SMC,
5074                         enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5075
5076         if (enable) {
5077                 /* Enable static MGPG */
5078                 temp = data = RREG32(mmRLC_PG_CNTL);
5079                 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5080
5081                 if (temp != data)
5082                         WREG32(mmRLC_PG_CNTL, data);
5083         } else {
5084                 temp = data = RREG32(mmRLC_PG_CNTL);
5085                 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5086
5087                 if (temp != data)
5088                         WREG32(mmRLC_PG_CNTL, data);
5089         }
5090 }
5091
5092 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5093                 bool enable)
5094 {
5095         uint32_t data, temp;
5096
5097         if (enable) {
5098                 /* Enable dynamic MGPG */
5099                 temp = data = RREG32(mmRLC_PG_CNTL);
5100                 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5101
5102                 if (temp != data)
5103                         WREG32(mmRLC_PG_CNTL, data);
5104         } else {
5105                 temp = data = RREG32(mmRLC_PG_CNTL);
5106                 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5107
5108                 if (temp != data)
5109                         WREG32(mmRLC_PG_CNTL, data);
5110         }
5111 }
5112
5113 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5114                 bool enable)
5115 {
5116         uint32_t data, temp;
5117
5118         if (enable) {
5119                 /* Enable quick PG */
5120                 temp = data = RREG32(mmRLC_PG_CNTL);
5121                 data |= 0x100000;
5122
5123                 if (temp != data)
5124                         WREG32(mmRLC_PG_CNTL, data);
5125         } else {
5126                 temp = data = RREG32(mmRLC_PG_CNTL);
5127                 data &= ~0x100000;
5128
5129                 if (temp != data)
5130                         WREG32(mmRLC_PG_CNTL, data);
5131         }
5132 }
5133
5134 static int gfx_v8_0_set_powergating_state(void *handle,
5135                                           enum amd_powergating_state state)
5136 {
5137         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5138
5139         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5140                 return 0;
5141
5142         switch (adev->asic_type) {
5143         case CHIP_POLARIS11:
5144                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5145                         polaris11_enable_gfx_static_mg_power_gating(adev,
5146                                         state == AMD_PG_STATE_GATE ? true : false);
5147                 else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5148                         polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5149                                         state == AMD_PG_STATE_GATE ? true : false);
5150                 else
5151                         polaris11_enable_gfx_quick_mg_power_gating(adev,
5152                                         state == AMD_PG_STATE_GATE ? true : false);
5153                 break;
5154         default:
5155                 break;
5156         }
5157
5158         return 0;
5159 }
5160
5161 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5162                                      uint32_t reg_addr, uint32_t cmd)
5163 {
5164         uint32_t data;
5165
5166         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5167
5168         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5169         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5170
5171         data = RREG32(mmRLC_SERDES_WR_CTRL);
5172         if (adev->asic_type == CHIP_STONEY)
5173                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5174                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5175                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5176                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5177                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5178                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5179                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5180                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5181                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5182         else
5183                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5184                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5185                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5186                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5187                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5188                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5189                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5190                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5191                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5192                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5193                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5194         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5195                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5196                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5197                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5198
5199         WREG32(mmRLC_SERDES_WR_CTRL, data);
5200 }
5201
5202 #define MSG_ENTER_RLC_SAFE_MODE     1
5203 #define MSG_EXIT_RLC_SAFE_MODE      0
5204
5205 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5206 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5207 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5208
5209 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5210 {
5211         u32 data = 0;
5212         unsigned i;
5213
5214         data = RREG32(mmRLC_CNTL);
5215         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5216                 return;
5217
5218         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5219             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5220                                AMD_PG_SUPPORT_GFX_DMG))) {
5221                 data |= RLC_GPR_REG2__REQ_MASK;
5222                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5223                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5224                 WREG32(mmRLC_GPR_REG2, data);
5225
5226                 for (i = 0; i < adev->usec_timeout; i++) {
5227                         if ((RREG32(mmRLC_GPM_STAT) &
5228                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5229                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5230                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5231                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5232                                 break;
5233                         udelay(1);
5234                 }
5235
5236                 for (i = 0; i < adev->usec_timeout; i++) {
5237                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5238                                 break;
5239                         udelay(1);
5240                 }
5241                 adev->gfx.rlc.in_safe_mode = true;
5242         }
5243 }
5244
5245 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5246 {
5247         u32 data;
5248         unsigned i;
5249
5250         data = RREG32(mmRLC_CNTL);
5251         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5252                 return;
5253
5254         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5255             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5256                                AMD_PG_SUPPORT_GFX_DMG))) {
5257                 data |= RLC_GPR_REG2__REQ_MASK;
5258                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5259                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5260                 WREG32(mmRLC_GPR_REG2, data);
5261                 adev->gfx.rlc.in_safe_mode = false;
5262         }
5263
5264         for (i = 0; i < adev->usec_timeout; i++) {
5265                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5266                         break;
5267                 udelay(1);
5268         }
5269 }
5270
5271 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5272 {
5273         u32 data;
5274         unsigned i;
5275
5276         data = RREG32(mmRLC_CNTL);
5277         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5278                 return;
5279
5280         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5281                 data |= RLC_SAFE_MODE__CMD_MASK;
5282                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5283                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5284                 WREG32(mmRLC_SAFE_MODE, data);
5285
5286                 for (i = 0; i < adev->usec_timeout; i++) {
5287                         if ((RREG32(mmRLC_GPM_STAT) &
5288                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5289                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5290                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5291                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5292                                 break;
5293                         udelay(1);
5294                 }
5295
5296                 for (i = 0; i < adev->usec_timeout; i++) {
5297                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5298                                 break;
5299                         udelay(1);
5300                 }
5301                 adev->gfx.rlc.in_safe_mode = true;
5302         }
5303 }
5304
5305 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5306 {
5307         u32 data = 0;
5308         unsigned i;
5309
5310         data = RREG32(mmRLC_CNTL);
5311         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5312                 return;
5313
5314         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5315                 if (adev->gfx.rlc.in_safe_mode) {
5316                         data |= RLC_SAFE_MODE__CMD_MASK;
5317                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5318                         WREG32(mmRLC_SAFE_MODE, data);
5319                         adev->gfx.rlc.in_safe_mode = false;
5320                 }
5321         }
5322
5323         for (i = 0; i < adev->usec_timeout; i++) {
5324                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5325                         break;
5326                 udelay(1);
5327         }
5328 }
5329
5330 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5331 {
5332         adev->gfx.rlc.in_safe_mode = true;
5333 }
5334
5335 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5336 {
5337         adev->gfx.rlc.in_safe_mode = false;
5338 }
5339
5340 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5341         .enter_safe_mode = cz_enter_rlc_safe_mode,
5342         .exit_safe_mode = cz_exit_rlc_safe_mode
5343 };
5344
5345 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5346         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5347         .exit_safe_mode = iceland_exit_rlc_safe_mode
5348 };
5349
5350 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5351         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5352         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5353 };
5354
5355 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5356                                                       bool enable)
5357 {
5358         uint32_t temp, data;
5359
5360         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5361
5362         /* It is disabled by HW by default */
5363         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5364                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5365                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5366                                 /* 1 - RLC memory Light sleep */
5367                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5368                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5369                                 if (temp != data)
5370                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5371                         }
5372
5373                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5374                                 /* 2 - CP memory Light sleep */
5375                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5376                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5377                                 if (temp != data)
5378                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5379                         }
5380                 }
5381
5382                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5383                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5384                 if (adev->flags & AMD_IS_APU)
5385                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5386                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5387                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5388                 else
5389                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5390                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5391                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5392                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5393
5394                 if (temp != data)
5395                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5396
5397                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5398                 gfx_v8_0_wait_for_rlc_serdes(adev);
5399
5400                 /* 5 - clear mgcg override */
5401                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5402
5403                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5404                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5405                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5406                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5407                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5408                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5409                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5410                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5411                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5412                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5413                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5414                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5415                         if (temp != data)
5416                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5417                 }
5418                 udelay(50);
5419
5420                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5421                 gfx_v8_0_wait_for_rlc_serdes(adev);
5422         } else {
5423                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5424                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5425                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5426                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5427                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5428                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5429                 if (temp != data)
5430                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5431
5432                 /* 2 - disable MGLS in RLC */
5433                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5434                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5435                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5436                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5437                 }
5438
5439                 /* 3 - disable MGLS in CP */
5440                 data = RREG32(mmCP_MEM_SLP_CNTL);
5441                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5442                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5443                         WREG32(mmCP_MEM_SLP_CNTL, data);
5444                 }
5445
5446                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5447                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5448                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5449                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5450                 if (temp != data)
5451                         WREG32(mmCGTS_SM_CTRL_REG, data);
5452
5453                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5454                 gfx_v8_0_wait_for_rlc_serdes(adev);
5455
5456                 /* 6 - set mgcg override */
5457                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5458
5459                 udelay(50);
5460
5461                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5462                 gfx_v8_0_wait_for_rlc_serdes(adev);
5463         }
5464
5465         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5466 }
5467
5468 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5469                                                       bool enable)
5470 {
5471         uint32_t temp, temp1, data, data1;
5472
5473         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5474
5475         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5476
5477         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5478                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5479                  * Cmp_busy/GFX_Idle interrupts
5480                  */
5481                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5482
5483                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5484                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5485                 if (temp1 != data1)
5486                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5487
5488                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5489                 gfx_v8_0_wait_for_rlc_serdes(adev);
5490
5491                 /* 3 - clear cgcg override */
5492                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5493
5494                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5495                 gfx_v8_0_wait_for_rlc_serdes(adev);
5496
5497                 /* 4 - write cmd to set CGLS */
5498                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5499
5500                 /* 5 - enable cgcg */
5501                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5502
5503                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5504                         /* enable cgls*/
5505                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5506
5507                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5508                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5509
5510                         if (temp1 != data1)
5511                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5512                 } else {
5513                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5514                 }
5515
5516                 if (temp != data)
5517                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5518         } else {
5519                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5520                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5521
5522                 /* TEST CGCG */
5523                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5524                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5525                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5526                 if (temp1 != data1)
5527                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5528
5529                 /* read gfx register to wake up cgcg */
5530                 RREG32(mmCB_CGTT_SCLK_CTRL);
5531                 RREG32(mmCB_CGTT_SCLK_CTRL);
5532                 RREG32(mmCB_CGTT_SCLK_CTRL);
5533                 RREG32(mmCB_CGTT_SCLK_CTRL);
5534
5535                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5536                 gfx_v8_0_wait_for_rlc_serdes(adev);
5537
5538                 /* write cmd to Set CGCG Overrride */
5539                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5540
5541                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5542                 gfx_v8_0_wait_for_rlc_serdes(adev);
5543
5544                 /* write cmd to Clear CGLS */
5545                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5546
5547                 /* disable cgcg, cgls should be disabled too. */
5548                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5549                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5550                 if (temp != data)
5551                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5552         }
5553
5554         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5555 }
5556 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5557                                             bool enable)
5558 {
5559         if (enable) {
5560                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5561                  * ===  MGCG + MGLS + TS(CG/LS) ===
5562                  */
5563                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5564                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5565         } else {
5566                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5567                  * ===  CGCG + CGLS ===
5568                  */
5569                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5570                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5571         }
5572         return 0;
5573 }
5574
5575 static int gfx_v8_0_set_clockgating_state(void *handle,
5576                                           enum amd_clockgating_state state)
5577 {
5578         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5579
5580         switch (adev->asic_type) {
5581         case CHIP_FIJI:
5582         case CHIP_CARRIZO:
5583         case CHIP_STONEY:
5584                 gfx_v8_0_update_gfx_clock_gating(adev,
5585                                                  state == AMD_CG_STATE_GATE ? true : false);
5586                 break;
5587         default:
5588                 break;
5589         }
5590         return 0;
5591 }
5592
5593 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5594 {
5595         u32 rptr;
5596
5597         rptr = ring->adev->wb.wb[ring->rptr_offs];
5598
5599         return rptr;
5600 }
5601
5602 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5603 {
5604         struct amdgpu_device *adev = ring->adev;
5605         u32 wptr;
5606
5607         if (ring->use_doorbell)
5608                 /* XXX check if swapping is necessary on BE */
5609                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5610         else
5611                 wptr = RREG32(mmCP_RB0_WPTR);
5612
5613         return wptr;
5614 }
5615
5616 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5617 {
5618         struct amdgpu_device *adev = ring->adev;
5619
5620         if (ring->use_doorbell) {
5621                 /* XXX check if swapping is necessary on BE */
5622                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5623                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5624         } else {
5625                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5626                 (void)RREG32(mmCP_RB0_WPTR);
5627         }
5628 }
5629
5630 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5631 {
5632         u32 ref_and_mask, reg_mem_engine;
5633
5634         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5635                 switch (ring->me) {
5636                 case 1:
5637                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5638                         break;
5639                 case 2:
5640                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5641                         break;
5642                 default:
5643                         return;
5644                 }
5645                 reg_mem_engine = 0;
5646         } else {
5647                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5648                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5649         }
5650
5651         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5652         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5653                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5654                                  reg_mem_engine));
5655         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5656         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5657         amdgpu_ring_write(ring, ref_and_mask);
5658         amdgpu_ring_write(ring, ref_and_mask);
5659         amdgpu_ring_write(ring, 0x20); /* poll interval */
5660 }
5661
5662 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5663 {
5664         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5665         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5666                                  WRITE_DATA_DST_SEL(0) |
5667                                  WR_CONFIRM));
5668         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5669         amdgpu_ring_write(ring, 0);
5670         amdgpu_ring_write(ring, 1);
5671
5672 }
5673
5674 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5675                                       struct amdgpu_ib *ib,
5676                                       unsigned vm_id, bool ctx_switch)
5677 {
5678         u32 header, control = 0;
5679         u32 next_rptr = ring->wptr + 5;
5680
5681         if (ctx_switch)
5682                 next_rptr += 2;
5683
5684         next_rptr += 4;
5685         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5686         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5687         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5688         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5689         amdgpu_ring_write(ring, next_rptr);
5690
5691         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5692         if (ctx_switch) {
5693                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5694                 amdgpu_ring_write(ring, 0);
5695         }
5696
5697         if (ib->flags & AMDGPU_IB_FLAG_CE)
5698                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5699         else
5700                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5701
5702         control |= ib->length_dw | (vm_id << 24);
5703
5704         amdgpu_ring_write(ring, header);
5705         amdgpu_ring_write(ring,
5706 #ifdef __BIG_ENDIAN
5707                           (2 << 0) |
5708 #endif
5709                           (ib->gpu_addr & 0xFFFFFFFC));
5710         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5711         amdgpu_ring_write(ring, control);
5712 }
5713
5714 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5715                                           struct amdgpu_ib *ib,
5716                                           unsigned vm_id, bool ctx_switch)
5717 {
5718         u32 header, control = 0;
5719         u32 next_rptr = ring->wptr + 5;
5720
5721         control |= INDIRECT_BUFFER_VALID;
5722
5723         next_rptr += 4;
5724         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5725         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5726         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5727         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5728         amdgpu_ring_write(ring, next_rptr);
5729
5730         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5731
5732         control |= ib->length_dw | (vm_id << 24);
5733
5734         amdgpu_ring_write(ring, header);
5735         amdgpu_ring_write(ring,
5736 #ifdef __BIG_ENDIAN
5737                                           (2 << 0) |
5738 #endif
5739                                           (ib->gpu_addr & 0xFFFFFFFC));
5740         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5741         amdgpu_ring_write(ring, control);
5742 }
5743
5744 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5745                                          u64 seq, unsigned flags)
5746 {
5747         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5748         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5749
5750         /* EVENT_WRITE_EOP - flush caches, send int */
5751         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5752         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5753                                  EOP_TC_ACTION_EN |
5754                                  EOP_TC_WB_ACTION_EN |
5755                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5756                                  EVENT_INDEX(5)));
5757         amdgpu_ring_write(ring, addr & 0xfffffffc);
5758         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5759                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5760         amdgpu_ring_write(ring, lower_32_bits(seq));
5761         amdgpu_ring_write(ring, upper_32_bits(seq));
5762
5763 }
5764
5765 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5766 {
5767         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5768         uint32_t seq = ring->fence_drv.sync_seq;
5769         uint64_t addr = ring->fence_drv.gpu_addr;
5770
5771         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5772         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5773                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
5774                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5775         amdgpu_ring_write(ring, addr & 0xfffffffc);
5776         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5777         amdgpu_ring_write(ring, seq);
5778         amdgpu_ring_write(ring, 0xffffffff);
5779         amdgpu_ring_write(ring, 4); /* poll interval */
5780
5781         if (usepfp) {
5782                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5783                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5784                 amdgpu_ring_write(ring, 0);
5785                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5786                 amdgpu_ring_write(ring, 0);
5787         }
5788 }
5789
5790 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5791                                         unsigned vm_id, uint64_t pd_addr)
5792 {
5793         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5794
5795         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5796         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5797                                  WRITE_DATA_DST_SEL(0)) |
5798                                  WR_CONFIRM);
5799         if (vm_id < 8) {
5800                 amdgpu_ring_write(ring,
5801                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5802         } else {
5803                 amdgpu_ring_write(ring,
5804                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5805         }
5806         amdgpu_ring_write(ring, 0);
5807         amdgpu_ring_write(ring, pd_addr >> 12);
5808
5809         /* bits 0-15 are the VM contexts0-15 */
5810         /* invalidate the cache */
5811         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5812         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5813                                  WRITE_DATA_DST_SEL(0)));
5814         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5815         amdgpu_ring_write(ring, 0);
5816         amdgpu_ring_write(ring, 1 << vm_id);
5817
5818         /* wait for the invalidate to complete */
5819         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5820         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5821                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5822                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5823         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5824         amdgpu_ring_write(ring, 0);
5825         amdgpu_ring_write(ring, 0); /* ref */
5826         amdgpu_ring_write(ring, 0); /* mask */
5827         amdgpu_ring_write(ring, 0x20); /* poll interval */
5828
5829         /* compute doesn't have PFP */
5830         if (usepfp) {
5831                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5832                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5833                 amdgpu_ring_write(ring, 0x0);
5834                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5835                 amdgpu_ring_write(ring, 0);
5836                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5837                 amdgpu_ring_write(ring, 0);
5838         }
5839 }
5840
5841 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5842 {
5843         return ring->adev->wb.wb[ring->rptr_offs];
5844 }
5845
5846 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5847 {
5848         return ring->adev->wb.wb[ring->wptr_offs];
5849 }
5850
5851 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5852 {
5853         struct amdgpu_device *adev = ring->adev;
5854
5855         /* XXX check if swapping is necessary on BE */
5856         adev->wb.wb[ring->wptr_offs] = ring->wptr;
5857         WDOORBELL32(ring->doorbell_index, ring->wptr);
5858 }
5859
5860 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5861                                              u64 addr, u64 seq,
5862                                              unsigned flags)
5863 {
5864         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5865         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5866
5867         /* RELEASE_MEM - flush caches, send int */
5868         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5869         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5870                                  EOP_TC_ACTION_EN |
5871                                  EOP_TC_WB_ACTION_EN |
5872                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5873                                  EVENT_INDEX(5)));
5874         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5875         amdgpu_ring_write(ring, addr & 0xfffffffc);
5876         amdgpu_ring_write(ring, upper_32_bits(addr));
5877         amdgpu_ring_write(ring, lower_32_bits(seq));
5878         amdgpu_ring_write(ring, upper_32_bits(seq));
5879 }
5880
5881 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5882                                                  enum amdgpu_interrupt_state state)
5883 {
5884         u32 cp_int_cntl;
5885
5886         switch (state) {
5887         case AMDGPU_IRQ_STATE_DISABLE:
5888                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5889                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5890                                             TIME_STAMP_INT_ENABLE, 0);
5891                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5892                 break;
5893         case AMDGPU_IRQ_STATE_ENABLE:
5894                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5895                 cp_int_cntl =
5896                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5897                                       TIME_STAMP_INT_ENABLE, 1);
5898                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5899                 break;
5900         default:
5901                 break;
5902         }
5903 }
5904
5905 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5906                                                      int me, int pipe,
5907                                                      enum amdgpu_interrupt_state state)
5908 {
5909         u32 mec_int_cntl, mec_int_cntl_reg;
5910
5911         /*
5912          * amdgpu controls only pipe 0 of MEC1. That's why this function only
5913          * handles the setting of interrupts for this specific pipe. All other
5914          * pipes' interrupts are set by amdkfd.
5915          */
5916
5917         if (me == 1) {
5918                 switch (pipe) {
5919                 case 0:
5920                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5921                         break;
5922                 default:
5923                         DRM_DEBUG("invalid pipe %d\n", pipe);
5924                         return;
5925                 }
5926         } else {
5927                 DRM_DEBUG("invalid me %d\n", me);
5928                 return;
5929         }
5930
5931         switch (state) {
5932         case AMDGPU_IRQ_STATE_DISABLE:
5933                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5934                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5935                                              TIME_STAMP_INT_ENABLE, 0);
5936                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5937                 break;
5938         case AMDGPU_IRQ_STATE_ENABLE:
5939                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5940                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5941                                              TIME_STAMP_INT_ENABLE, 1);
5942                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5943                 break;
5944         default:
5945                 break;
5946         }
5947 }
5948
5949 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5950                                              struct amdgpu_irq_src *source,
5951                                              unsigned type,
5952                                              enum amdgpu_interrupt_state state)
5953 {
5954         u32 cp_int_cntl;
5955
5956         switch (state) {
5957         case AMDGPU_IRQ_STATE_DISABLE:
5958                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5959                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5960                                             PRIV_REG_INT_ENABLE, 0);
5961                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5962                 break;
5963         case AMDGPU_IRQ_STATE_ENABLE:
5964                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5965                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5966                                             PRIV_REG_INT_ENABLE, 1);
5967                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5968                 break;
5969         default:
5970                 break;
5971         }
5972
5973         return 0;
5974 }
5975
5976 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5977                                               struct amdgpu_irq_src *source,
5978                                               unsigned type,
5979                                               enum amdgpu_interrupt_state state)
5980 {
5981         u32 cp_int_cntl;
5982
5983         switch (state) {
5984         case AMDGPU_IRQ_STATE_DISABLE:
5985                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5986                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5987                                             PRIV_INSTR_INT_ENABLE, 0);
5988                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5989                 break;
5990         case AMDGPU_IRQ_STATE_ENABLE:
5991                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5992                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5993                                             PRIV_INSTR_INT_ENABLE, 1);
5994                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5995                 break;
5996         default:
5997                 break;
5998         }
5999
6000         return 0;
6001 }
6002
6003 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6004                                             struct amdgpu_irq_src *src,
6005                                             unsigned type,
6006                                             enum amdgpu_interrupt_state state)
6007 {
6008         switch (type) {
6009         case AMDGPU_CP_IRQ_GFX_EOP:
6010                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6011                 break;
6012         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6013                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6014                 break;
6015         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6016                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6017                 break;
6018         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6019                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6020                 break;
6021         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6022                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6023                 break;
6024         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6025                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6026                 break;
6027         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6028                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6029                 break;
6030         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6031                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6032                 break;
6033         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6034                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6035                 break;
6036         default:
6037                 break;
6038         }
6039         return 0;
6040 }
6041
6042 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6043                             struct amdgpu_irq_src *source,
6044                             struct amdgpu_iv_entry *entry)
6045 {
6046         int i;
6047         u8 me_id, pipe_id, queue_id;
6048         struct amdgpu_ring *ring;
6049
6050         DRM_DEBUG("IH: CP EOP\n");
6051         me_id = (entry->ring_id & 0x0c) >> 2;
6052         pipe_id = (entry->ring_id & 0x03) >> 0;
6053         queue_id = (entry->ring_id & 0x70) >> 4;
6054
6055         switch (me_id) {
6056         case 0:
6057                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6058                 break;
6059         case 1:
6060         case 2:
6061                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6062                         ring = &adev->gfx.compute_ring[i];
6063                         /* Per-queue interrupt is supported for MEC starting from VI.
6064                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6065                           */
6066                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6067                                 amdgpu_fence_process(ring);
6068                 }
6069                 break;
6070         }
6071         return 0;
6072 }
6073
6074 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6075                                  struct amdgpu_irq_src *source,
6076                                  struct amdgpu_iv_entry *entry)
6077 {
6078         DRM_ERROR("Illegal register access in command stream\n");
6079         schedule_work(&adev->reset_work);
6080         return 0;
6081 }
6082
6083 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6084                                   struct amdgpu_irq_src *source,
6085                                   struct amdgpu_iv_entry *entry)
6086 {
6087         DRM_ERROR("Illegal instruction in command stream\n");
6088         schedule_work(&adev->reset_work);
6089         return 0;
6090 }
6091
6092 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6093         .name = "gfx_v8_0",
6094         .early_init = gfx_v8_0_early_init,
6095         .late_init = gfx_v8_0_late_init,
6096         .sw_init = gfx_v8_0_sw_init,
6097         .sw_fini = gfx_v8_0_sw_fini,
6098         .hw_init = gfx_v8_0_hw_init,
6099         .hw_fini = gfx_v8_0_hw_fini,
6100         .suspend = gfx_v8_0_suspend,
6101         .resume = gfx_v8_0_resume,
6102         .is_idle = gfx_v8_0_is_idle,
6103         .wait_for_idle = gfx_v8_0_wait_for_idle,
6104         .soft_reset = gfx_v8_0_soft_reset,
6105         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6106         .set_powergating_state = gfx_v8_0_set_powergating_state,
6107 };
6108
6109 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6110         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6111         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6112         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6113         .parse_cs = NULL,
6114         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6115         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6116         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6117         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6118         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6119         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6120         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6121         .test_ring = gfx_v8_0_ring_test_ring,
6122         .test_ib = gfx_v8_0_ring_test_ib,
6123         .insert_nop = amdgpu_ring_insert_nop,
6124         .pad_ib = amdgpu_ring_generic_pad_ib,
6125 };
6126
6127 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6128         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6129         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6130         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6131         .parse_cs = NULL,
6132         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6133         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6134         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6135         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6136         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6137         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6138         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6139         .test_ring = gfx_v8_0_ring_test_ring,
6140         .test_ib = gfx_v8_0_ring_test_ib,
6141         .insert_nop = amdgpu_ring_insert_nop,
6142         .pad_ib = amdgpu_ring_generic_pad_ib,
6143 };
6144
6145 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6146 {
6147         int i;
6148
6149         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6150                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6151
6152         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6153                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6154 }
6155
6156 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6157         .set = gfx_v8_0_set_eop_interrupt_state,
6158         .process = gfx_v8_0_eop_irq,
6159 };
6160
6161 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6162         .set = gfx_v8_0_set_priv_reg_fault_state,
6163         .process = gfx_v8_0_priv_reg_irq,
6164 };
6165
6166 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6167         .set = gfx_v8_0_set_priv_inst_fault_state,
6168         .process = gfx_v8_0_priv_inst_irq,
6169 };
6170
6171 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6172 {
6173         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6174         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6175
6176         adev->gfx.priv_reg_irq.num_types = 1;
6177         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6178
6179         adev->gfx.priv_inst_irq.num_types = 1;
6180         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6181 }
6182
6183 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6184 {
6185         switch (adev->asic_type) {
6186         case CHIP_TOPAZ:
6187         case CHIP_STONEY:
6188                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6189                 break;
6190         case CHIP_CARRIZO:
6191                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6192                 break;
6193         default:
6194                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6195                 break;
6196         }
6197 }
6198
6199 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6200 {
6201         /* init asci gds info */
6202         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6203         adev->gds.gws.total_size = 64;
6204         adev->gds.oa.total_size = 16;
6205
6206         if (adev->gds.mem.total_size == 64 * 1024) {
6207                 adev->gds.mem.gfx_partition_size = 4096;
6208                 adev->gds.mem.cs_partition_size = 4096;
6209
6210                 adev->gds.gws.gfx_partition_size = 4;
6211                 adev->gds.gws.cs_partition_size = 4;
6212
6213                 adev->gds.oa.gfx_partition_size = 4;
6214                 adev->gds.oa.cs_partition_size = 1;
6215         } else {
6216                 adev->gds.mem.gfx_partition_size = 1024;
6217                 adev->gds.mem.cs_partition_size = 1024;
6218
6219                 adev->gds.gws.gfx_partition_size = 16;
6220                 adev->gds.gws.cs_partition_size = 16;
6221
6222                 adev->gds.oa.gfx_partition_size = 4;
6223                 adev->gds.oa.cs_partition_size = 4;
6224         }
6225 }
6226
6227 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6228 {
6229         u32 data, mask;
6230
6231         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6232         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6233
6234         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6235         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6236
6237         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6238
6239         return (~data) & mask;
6240 }
6241
6242 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6243 {
6244         int i, j, k, counter, active_cu_number = 0;
6245         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6246         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6247
6248         memset(cu_info, 0, sizeof(*cu_info));
6249
6250         mutex_lock(&adev->grbm_idx_mutex);
6251         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6252                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6253                         mask = 1;
6254                         ao_bitmap = 0;
6255                         counter = 0;
6256                         gfx_v8_0_select_se_sh(adev, i, j);
6257                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6258                         cu_info->bitmap[i][j] = bitmap;
6259
6260                         for (k = 0; k < 16; k ++) {
6261                                 if (bitmap & mask) {
6262                                         if (counter < 2)
6263                                                 ao_bitmap |= mask;
6264                                         counter ++;
6265                                 }
6266                                 mask <<= 1;
6267                         }
6268                         active_cu_number += counter;
6269                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6270                 }
6271         }
6272         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6273         mutex_unlock(&adev->grbm_idx_mutex);
6274
6275         cu_info->number = active_cu_number;
6276         cu_info->ao_cu_mask = ao_cu_mask;
6277 }