PM / runtime: Asynchronous "idle" in pm_runtime_allow()
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49
50 #define GFX8_NUM_GFX_RINGS     1
51 #define GFX8_NUM_COMPUTE_RINGS 8
52
53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
55 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
57
58 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
59 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
60 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
61 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
62 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
63 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
64 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
65 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
66 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
67
68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
74
75 /* BPM SERDES CMD */
76 #define SET_BPM_SERDES_CMD    1
77 #define CLE_BPM_SERDES_CMD    0
78
79 /* BPM Register Address*/
80 enum {
81         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
82         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
83         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
84         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
85         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
86         BPM_REG_FGCG_MAX
87 };
88
89 #define RLC_FormatDirectRegListLength        14
90
91 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
97
98 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
110
111 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
123
124 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
125 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
126 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
130
131 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
137
138 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
139 {
140         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
141         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
142         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
143         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
144         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
145         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
146         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
147         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
148         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
149         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
150         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
151         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
152         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
153         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
154         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
155         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
156 };
157
158 static const u32 golden_settings_tonga_a11[] =
159 {
160         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
161         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
162         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
163         mmGB_GPU_ID, 0x0000000f, 0x00000000,
164         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
165         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
166         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
167         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
168         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
169         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
170         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
171         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
172         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
173         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
174         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
175 };
176
177 static const u32 tonga_golden_common_all[] =
178 {
179         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
181         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
182         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
183         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
184         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
185         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
186         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
187 };
188
189 static const u32 tonga_mgcg_cgcg_init[] =
190 {
191         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
192         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
193         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
194         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
195         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
198         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
200         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
202         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
203         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
209         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
210         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
213         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
214         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
216         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
217         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
218         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
219         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
220         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
222         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
225         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
230         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
235         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
240         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
245         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
250         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
255         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
260         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
263         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
264         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
265         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
266 };
267
268 static const u32 golden_settings_polaris11_a11[] =
269 {
270         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
271         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
272         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
273         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
274         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
275         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
276         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
277         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
278         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
279         mmSQ_CONFIG, 0x07f80000, 0x07180000,
280         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
281         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
282         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
283         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
284         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
285 };
286
287 static const u32 polaris11_golden_common_all[] =
288 {
289         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
290         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
291         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
292         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
293         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
294         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
295 };
296
297 static const u32 golden_settings_polaris10_a11[] =
298 {
299         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
300         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
301         mmCB_HW_CONTROL_2, 0, 0x0f000000,
302         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
303         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
304         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
305         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
306         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
307         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
308         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
309         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
310         mmSQ_CONFIG, 0x07f80000, 0x07180000,
311         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
312         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
313         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
314         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
315 };
316
317 static const u32 polaris10_golden_common_all[] =
318 {
319         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
320         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
322         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
323         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
324         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
325         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
326         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
327 };
328
329 static const u32 fiji_golden_common_all[] =
330 {
331         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
333         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
334         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
335         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
336         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
337         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
338         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
339         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
341 };
342
343 static const u32 golden_settings_fiji_a10[] =
344 {
345         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
346         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
347         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
348         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
349         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
354         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
355         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
356 };
357
358 static const u32 fiji_mgcg_cgcg_init[] =
359 {
360         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
366         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
367         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
369         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
371         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
377         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
378         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
379         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
380         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
381         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
382         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
384         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
385         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
386         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
387         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
389         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
390         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
391         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
392         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
393         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
394         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
395 };
396
397 static const u32 golden_settings_iceland_a11[] =
398 {
399         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
400         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
401         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
402         mmGB_GPU_ID, 0x0000000f, 0x00000000,
403         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
404         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
405         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
406         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
407         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
409         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
410         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
411         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
412         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
413         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
414 };
415
416 static const u32 iceland_golden_common_all[] =
417 {
418         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
419         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
420         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
421         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
422         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
423         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
424         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
425         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
426 };
427
428 static const u32 iceland_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
462         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
463         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
464         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
465         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
466         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
469         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
474         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
484         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
492         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
493         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
494 };
495
496 static const u32 cz_golden_settings_a11[] =
497 {
498         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
499         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
500         mmGB_GPU_ID, 0x0000000f, 0x00000000,
501         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
502         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
503         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
504         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
505         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
506         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
507         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
508 };
509
510 static const u32 cz_golden_common_all[] =
511 {
512         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
513         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
514         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
515         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
516         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
517         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
518         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
519         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
520 };
521
522 static const u32 cz_mgcg_cgcg_init[] =
523 {
524         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
525         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
526         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
530         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
532         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
533         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
534         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
535         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
542         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
543         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
544         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
545         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
546         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
549         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
550         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
551         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
552         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
553         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
554         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
555         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
558         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
568         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
573         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
578         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
583         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
588         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
593         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
596         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
597         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
598         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
599 };
600
601 static const u32 stoney_golden_settings_a11[] =
602 {
603         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
604         mmGB_GPU_ID, 0x0000000f, 0x00000000,
605         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
606         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
607         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
608         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
609         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
610         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
611         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
612         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
613 };
614
615 static const u32 stoney_golden_common_all[] =
616 {
617         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
618         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
619         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
620         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
621         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
622         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
623         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
624         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
625 };
626
627 static const u32 stoney_mgcg_cgcg_init[] =
628 {
629         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
630         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
632         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
633         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
634         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
635 };
636
637 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
638 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
639 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
640 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
641 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
642 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
643
644 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
645 {
646         switch (adev->asic_type) {
647         case CHIP_TOPAZ:
648                 amdgpu_program_register_sequence(adev,
649                                                  iceland_mgcg_cgcg_init,
650                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
651                 amdgpu_program_register_sequence(adev,
652                                                  golden_settings_iceland_a11,
653                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
654                 amdgpu_program_register_sequence(adev,
655                                                  iceland_golden_common_all,
656                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
657                 break;
658         case CHIP_FIJI:
659                 amdgpu_program_register_sequence(adev,
660                                                  fiji_mgcg_cgcg_init,
661                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
662                 amdgpu_program_register_sequence(adev,
663                                                  golden_settings_fiji_a10,
664                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
665                 amdgpu_program_register_sequence(adev,
666                                                  fiji_golden_common_all,
667                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
668                 break;
669
670         case CHIP_TONGA:
671                 amdgpu_program_register_sequence(adev,
672                                                  tonga_mgcg_cgcg_init,
673                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
674                 amdgpu_program_register_sequence(adev,
675                                                  golden_settings_tonga_a11,
676                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
677                 amdgpu_program_register_sequence(adev,
678                                                  tonga_golden_common_all,
679                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
680                 break;
681         case CHIP_POLARIS11:
682                 amdgpu_program_register_sequence(adev,
683                                                  golden_settings_polaris11_a11,
684                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
685                 amdgpu_program_register_sequence(adev,
686                                                  polaris11_golden_common_all,
687                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
688                 break;
689         case CHIP_POLARIS10:
690                 amdgpu_program_register_sequence(adev,
691                                                  golden_settings_polaris10_a11,
692                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
693                 amdgpu_program_register_sequence(adev,
694                                                  polaris10_golden_common_all,
695                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
696                 break;
697         case CHIP_CARRIZO:
698                 amdgpu_program_register_sequence(adev,
699                                                  cz_mgcg_cgcg_init,
700                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
701                 amdgpu_program_register_sequence(adev,
702                                                  cz_golden_settings_a11,
703                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
704                 amdgpu_program_register_sequence(adev,
705                                                  cz_golden_common_all,
706                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
707                 break;
708         case CHIP_STONEY:
709                 amdgpu_program_register_sequence(adev,
710                                                  stoney_mgcg_cgcg_init,
711                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
712                 amdgpu_program_register_sequence(adev,
713                                                  stoney_golden_settings_a11,
714                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
715                 amdgpu_program_register_sequence(adev,
716                                                  stoney_golden_common_all,
717                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
718                 break;
719         default:
720                 break;
721         }
722 }
723
724 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
725 {
726         int i;
727
728         adev->gfx.scratch.num_reg = 7;
729         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
730         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
731                 adev->gfx.scratch.free[i] = true;
732                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
733         }
734 }
735
736 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
737 {
738         struct amdgpu_device *adev = ring->adev;
739         uint32_t scratch;
740         uint32_t tmp = 0;
741         unsigned i;
742         int r;
743
744         r = amdgpu_gfx_scratch_get(adev, &scratch);
745         if (r) {
746                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
747                 return r;
748         }
749         WREG32(scratch, 0xCAFEDEAD);
750         r = amdgpu_ring_alloc(ring, 3);
751         if (r) {
752                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
753                           ring->idx, r);
754                 amdgpu_gfx_scratch_free(adev, scratch);
755                 return r;
756         }
757         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
758         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
759         amdgpu_ring_write(ring, 0xDEADBEEF);
760         amdgpu_ring_commit(ring);
761
762         for (i = 0; i < adev->usec_timeout; i++) {
763                 tmp = RREG32(scratch);
764                 if (tmp == 0xDEADBEEF)
765                         break;
766                 DRM_UDELAY(1);
767         }
768         if (i < adev->usec_timeout) {
769                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
770                          ring->idx, i);
771         } else {
772                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
773                           ring->idx, scratch, tmp);
774                 r = -EINVAL;
775         }
776         amdgpu_gfx_scratch_free(adev, scratch);
777         return r;
778 }
779
780 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
781 {
782         struct amdgpu_device *adev = ring->adev;
783         struct amdgpu_ib ib;
784         struct fence *f = NULL;
785         uint32_t scratch;
786         uint32_t tmp = 0;
787         unsigned i;
788         int r;
789
790         r = amdgpu_gfx_scratch_get(adev, &scratch);
791         if (r) {
792                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
793                 return r;
794         }
795         WREG32(scratch, 0xCAFEDEAD);
796         memset(&ib, 0, sizeof(ib));
797         r = amdgpu_ib_get(adev, NULL, 256, &ib);
798         if (r) {
799                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
800                 goto err1;
801         }
802         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
803         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
804         ib.ptr[2] = 0xDEADBEEF;
805         ib.length_dw = 3;
806
807         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
808         if (r)
809                 goto err2;
810
811         r = fence_wait(f, false);
812         if (r) {
813                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
814                 goto err2;
815         }
816         for (i = 0; i < adev->usec_timeout; i++) {
817                 tmp = RREG32(scratch);
818                 if (tmp == 0xDEADBEEF)
819                         break;
820                 DRM_UDELAY(1);
821         }
822         if (i < adev->usec_timeout) {
823                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
824                          ring->idx, i);
825                 goto err2;
826         } else {
827                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
828                           scratch, tmp);
829                 r = -EINVAL;
830         }
831 err2:
832         fence_put(f);
833         amdgpu_ib_free(adev, &ib, NULL);
834         fence_put(f);
835 err1:
836         amdgpu_gfx_scratch_free(adev, scratch);
837         return r;
838 }
839
840
841 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
842         release_firmware(adev->gfx.pfp_fw);
843         adev->gfx.pfp_fw = NULL;
844         release_firmware(adev->gfx.me_fw);
845         adev->gfx.me_fw = NULL;
846         release_firmware(adev->gfx.ce_fw);
847         adev->gfx.ce_fw = NULL;
848         release_firmware(adev->gfx.rlc_fw);
849         adev->gfx.rlc_fw = NULL;
850         release_firmware(adev->gfx.mec_fw);
851         adev->gfx.mec_fw = NULL;
852         if ((adev->asic_type != CHIP_STONEY) &&
853             (adev->asic_type != CHIP_TOPAZ))
854                 release_firmware(adev->gfx.mec2_fw);
855         adev->gfx.mec2_fw = NULL;
856
857         kfree(adev->gfx.rlc.register_list_format);
858 }
859
860 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
861 {
862         const char *chip_name;
863         char fw_name[30];
864         int err;
865         struct amdgpu_firmware_info *info = NULL;
866         const struct common_firmware_header *header = NULL;
867         const struct gfx_firmware_header_v1_0 *cp_hdr;
868         const struct rlc_firmware_header_v2_0 *rlc_hdr;
869         unsigned int *tmp = NULL, i;
870
871         DRM_DEBUG("\n");
872
873         switch (adev->asic_type) {
874         case CHIP_TOPAZ:
875                 chip_name = "topaz";
876                 break;
877         case CHIP_TONGA:
878                 chip_name = "tonga";
879                 break;
880         case CHIP_CARRIZO:
881                 chip_name = "carrizo";
882                 break;
883         case CHIP_FIJI:
884                 chip_name = "fiji";
885                 break;
886         case CHIP_POLARIS11:
887                 chip_name = "polaris11";
888                 break;
889         case CHIP_POLARIS10:
890                 chip_name = "polaris10";
891                 break;
892         case CHIP_STONEY:
893                 chip_name = "stoney";
894                 break;
895         default:
896                 BUG();
897         }
898
899         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
900         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
901         if (err)
902                 goto out;
903         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
904         if (err)
905                 goto out;
906         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
907         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
908         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
909
910         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
911         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
912         if (err)
913                 goto out;
914         err = amdgpu_ucode_validate(adev->gfx.me_fw);
915         if (err)
916                 goto out;
917         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
918         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
919         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
920
921         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
922         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
923         if (err)
924                 goto out;
925         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
926         if (err)
927                 goto out;
928         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
929         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931
932         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
933         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
934         if (err)
935                 goto out;
936         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
937         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
938         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
939         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
940
941         adev->gfx.rlc.save_and_restore_offset =
942                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
943         adev->gfx.rlc.clear_state_descriptor_offset =
944                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
945         adev->gfx.rlc.avail_scratch_ram_locations =
946                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
947         adev->gfx.rlc.reg_restore_list_size =
948                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
949         adev->gfx.rlc.reg_list_format_start =
950                         le32_to_cpu(rlc_hdr->reg_list_format_start);
951         adev->gfx.rlc.reg_list_format_separate_start =
952                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
953         adev->gfx.rlc.starting_offsets_start =
954                         le32_to_cpu(rlc_hdr->starting_offsets_start);
955         adev->gfx.rlc.reg_list_format_size_bytes =
956                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
957         adev->gfx.rlc.reg_list_size_bytes =
958                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
959
960         adev->gfx.rlc.register_list_format =
961                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
962                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
963
964         if (!adev->gfx.rlc.register_list_format) {
965                 err = -ENOMEM;
966                 goto out;
967         }
968
969         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
970                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
971         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
972                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
973
974         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
975
976         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
977                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
978         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
979                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
980
981         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
982         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
983         if (err)
984                 goto out;
985         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
986         if (err)
987                 goto out;
988         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
989         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
990         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
991
992         if ((adev->asic_type != CHIP_STONEY) &&
993             (adev->asic_type != CHIP_TOPAZ)) {
994                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
995                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
996                 if (!err) {
997                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
998                         if (err)
999                                 goto out;
1000                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1001                                 adev->gfx.mec2_fw->data;
1002                         adev->gfx.mec2_fw_version =
1003                                 le32_to_cpu(cp_hdr->header.ucode_version);
1004                         adev->gfx.mec2_feature_version =
1005                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1006                 } else {
1007                         err = 0;
1008                         adev->gfx.mec2_fw = NULL;
1009                 }
1010         }
1011
1012         if (adev->firmware.smu_load) {
1013                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1014                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1015                 info->fw = adev->gfx.pfp_fw;
1016                 header = (const struct common_firmware_header *)info->fw->data;
1017                 adev->firmware.fw_size +=
1018                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1019
1020                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1021                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1022                 info->fw = adev->gfx.me_fw;
1023                 header = (const struct common_firmware_header *)info->fw->data;
1024                 adev->firmware.fw_size +=
1025                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1026
1027                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1028                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1029                 info->fw = adev->gfx.ce_fw;
1030                 header = (const struct common_firmware_header *)info->fw->data;
1031                 adev->firmware.fw_size +=
1032                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1033
1034                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1035                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1036                 info->fw = adev->gfx.rlc_fw;
1037                 header = (const struct common_firmware_header *)info->fw->data;
1038                 adev->firmware.fw_size +=
1039                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1040
1041                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1042                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1043                 info->fw = adev->gfx.mec_fw;
1044                 header = (const struct common_firmware_header *)info->fw->data;
1045                 adev->firmware.fw_size +=
1046                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1047
1048                 if (adev->gfx.mec2_fw) {
1049                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1050                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1051                         info->fw = adev->gfx.mec2_fw;
1052                         header = (const struct common_firmware_header *)info->fw->data;
1053                         adev->firmware.fw_size +=
1054                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1055                 }
1056
1057         }
1058
1059 out:
1060         if (err) {
1061                 dev_err(adev->dev,
1062                         "gfx8: Failed to load firmware \"%s\"\n",
1063                         fw_name);
1064                 release_firmware(adev->gfx.pfp_fw);
1065                 adev->gfx.pfp_fw = NULL;
1066                 release_firmware(adev->gfx.me_fw);
1067                 adev->gfx.me_fw = NULL;
1068                 release_firmware(adev->gfx.ce_fw);
1069                 adev->gfx.ce_fw = NULL;
1070                 release_firmware(adev->gfx.rlc_fw);
1071                 adev->gfx.rlc_fw = NULL;
1072                 release_firmware(adev->gfx.mec_fw);
1073                 adev->gfx.mec_fw = NULL;
1074                 release_firmware(adev->gfx.mec2_fw);
1075                 adev->gfx.mec2_fw = NULL;
1076         }
1077         return err;
1078 }
1079
1080 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1081                                     volatile u32 *buffer)
1082 {
1083         u32 count = 0, i;
1084         const struct cs_section_def *sect = NULL;
1085         const struct cs_extent_def *ext = NULL;
1086
1087         if (adev->gfx.rlc.cs_data == NULL)
1088                 return;
1089         if (buffer == NULL)
1090                 return;
1091
1092         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1093         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1094
1095         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1096         buffer[count++] = cpu_to_le32(0x80000000);
1097         buffer[count++] = cpu_to_le32(0x80000000);
1098
1099         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1100                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1101                         if (sect->id == SECT_CONTEXT) {
1102                                 buffer[count++] =
1103                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1104                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1105                                                 PACKET3_SET_CONTEXT_REG_START);
1106                                 for (i = 0; i < ext->reg_count; i++)
1107                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1108                         } else {
1109                                 return;
1110                         }
1111                 }
1112         }
1113
1114         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1115         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1116                         PACKET3_SET_CONTEXT_REG_START);
1117         switch (adev->asic_type) {
1118         case CHIP_TONGA:
1119         case CHIP_POLARIS10:
1120                 buffer[count++] = cpu_to_le32(0x16000012);
1121                 buffer[count++] = cpu_to_le32(0x0000002A);
1122                 break;
1123         case CHIP_POLARIS11:
1124                 buffer[count++] = cpu_to_le32(0x16000012);
1125                 buffer[count++] = cpu_to_le32(0x00000000);
1126                 break;
1127         case CHIP_FIJI:
1128                 buffer[count++] = cpu_to_le32(0x3a00161a);
1129                 buffer[count++] = cpu_to_le32(0x0000002e);
1130                 break;
1131         case CHIP_TOPAZ:
1132         case CHIP_CARRIZO:
1133                 buffer[count++] = cpu_to_le32(0x00000002);
1134                 buffer[count++] = cpu_to_le32(0x00000000);
1135                 break;
1136         case CHIP_STONEY:
1137                 buffer[count++] = cpu_to_le32(0x00000000);
1138                 buffer[count++] = cpu_to_le32(0x00000000);
1139                 break;
1140         default:
1141                 buffer[count++] = cpu_to_le32(0x00000000);
1142                 buffer[count++] = cpu_to_le32(0x00000000);
1143                 break;
1144         }
1145
1146         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1147         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1148
1149         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1150         buffer[count++] = cpu_to_le32(0);
1151 }
1152
1153 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1154 {
1155         int r;
1156
1157         /* clear state block */
1158         if (adev->gfx.rlc.clear_state_obj) {
1159                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1160                 if (unlikely(r != 0))
1161                         dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1162                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1163                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1164
1165                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1166                 adev->gfx.rlc.clear_state_obj = NULL;
1167         }
1168 }
1169
1170 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1171 {
1172         volatile u32 *dst_ptr;
1173         u32 dws;
1174         const struct cs_section_def *cs_data;
1175         int r;
1176
1177         adev->gfx.rlc.cs_data = vi_cs_data;
1178
1179         cs_data = adev->gfx.rlc.cs_data;
1180
1181         if (cs_data) {
1182                 /* clear state block */
1183                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1184
1185                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1186                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1187                                              AMDGPU_GEM_DOMAIN_VRAM,
1188                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1189                                              NULL, NULL,
1190                                              &adev->gfx.rlc.clear_state_obj);
1191                         if (r) {
1192                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1193                                 gfx_v8_0_rlc_fini(adev);
1194                                 return r;
1195                         }
1196                 }
1197                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1198                 if (unlikely(r != 0)) {
1199                         gfx_v8_0_rlc_fini(adev);
1200                         return r;
1201                 }
1202                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1203                                   &adev->gfx.rlc.clear_state_gpu_addr);
1204                 if (r) {
1205                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1206                         dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1207                         gfx_v8_0_rlc_fini(adev);
1208                         return r;
1209                 }
1210
1211                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1212                 if (r) {
1213                         dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1214                         gfx_v8_0_rlc_fini(adev);
1215                         return r;
1216                 }
1217                 /* set up the cs buffer */
1218                 dst_ptr = adev->gfx.rlc.cs_ptr;
1219                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1220                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1221                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1222         }
1223
1224         return 0;
1225 }
1226
1227 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1228 {
1229         int r;
1230
1231         if (adev->gfx.mec.hpd_eop_obj) {
1232                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1233                 if (unlikely(r != 0))
1234                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1235                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1236                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1237
1238                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1239                 adev->gfx.mec.hpd_eop_obj = NULL;
1240         }
1241 }
1242
1243 #define MEC_HPD_SIZE 2048
1244
1245 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1246 {
1247         int r;
1248         u32 *hpd;
1249
1250         /*
1251          * we assign only 1 pipe because all other pipes will
1252          * be handled by KFD
1253          */
1254         adev->gfx.mec.num_mec = 1;
1255         adev->gfx.mec.num_pipe = 1;
1256         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1257
1258         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1259                 r = amdgpu_bo_create(adev,
1260                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1261                                      PAGE_SIZE, true,
1262                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1263                                      &adev->gfx.mec.hpd_eop_obj);
1264                 if (r) {
1265                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1266                         return r;
1267                 }
1268         }
1269
1270         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1271         if (unlikely(r != 0)) {
1272                 gfx_v8_0_mec_fini(adev);
1273                 return r;
1274         }
1275         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1276                           &adev->gfx.mec.hpd_eop_gpu_addr);
1277         if (r) {
1278                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1279                 gfx_v8_0_mec_fini(adev);
1280                 return r;
1281         }
1282         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1283         if (r) {
1284                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1285                 gfx_v8_0_mec_fini(adev);
1286                 return r;
1287         }
1288
1289         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1290
1291         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1292         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1293
1294         return 0;
1295 }
1296
1297 static const u32 vgpr_init_compute_shader[] =
1298 {
1299         0x7e000209, 0x7e020208,
1300         0x7e040207, 0x7e060206,
1301         0x7e080205, 0x7e0a0204,
1302         0x7e0c0203, 0x7e0e0202,
1303         0x7e100201, 0x7e120200,
1304         0x7e140209, 0x7e160208,
1305         0x7e180207, 0x7e1a0206,
1306         0x7e1c0205, 0x7e1e0204,
1307         0x7e200203, 0x7e220202,
1308         0x7e240201, 0x7e260200,
1309         0x7e280209, 0x7e2a0208,
1310         0x7e2c0207, 0x7e2e0206,
1311         0x7e300205, 0x7e320204,
1312         0x7e340203, 0x7e360202,
1313         0x7e380201, 0x7e3a0200,
1314         0x7e3c0209, 0x7e3e0208,
1315         0x7e400207, 0x7e420206,
1316         0x7e440205, 0x7e460204,
1317         0x7e480203, 0x7e4a0202,
1318         0x7e4c0201, 0x7e4e0200,
1319         0x7e500209, 0x7e520208,
1320         0x7e540207, 0x7e560206,
1321         0x7e580205, 0x7e5a0204,
1322         0x7e5c0203, 0x7e5e0202,
1323         0x7e600201, 0x7e620200,
1324         0x7e640209, 0x7e660208,
1325         0x7e680207, 0x7e6a0206,
1326         0x7e6c0205, 0x7e6e0204,
1327         0x7e700203, 0x7e720202,
1328         0x7e740201, 0x7e760200,
1329         0x7e780209, 0x7e7a0208,
1330         0x7e7c0207, 0x7e7e0206,
1331         0xbf8a0000, 0xbf810000,
1332 };
1333
1334 static const u32 sgpr_init_compute_shader[] =
1335 {
1336         0xbe8a0100, 0xbe8c0102,
1337         0xbe8e0104, 0xbe900106,
1338         0xbe920108, 0xbe940100,
1339         0xbe960102, 0xbe980104,
1340         0xbe9a0106, 0xbe9c0108,
1341         0xbe9e0100, 0xbea00102,
1342         0xbea20104, 0xbea40106,
1343         0xbea60108, 0xbea80100,
1344         0xbeaa0102, 0xbeac0104,
1345         0xbeae0106, 0xbeb00108,
1346         0xbeb20100, 0xbeb40102,
1347         0xbeb60104, 0xbeb80106,
1348         0xbeba0108, 0xbebc0100,
1349         0xbebe0102, 0xbec00104,
1350         0xbec20106, 0xbec40108,
1351         0xbec60100, 0xbec80102,
1352         0xbee60004, 0xbee70005,
1353         0xbeea0006, 0xbeeb0007,
1354         0xbee80008, 0xbee90009,
1355         0xbefc0000, 0xbf8a0000,
1356         0xbf810000, 0x00000000,
1357 };
1358
1359 static const u32 vgpr_init_regs[] =
1360 {
1361         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1362         mmCOMPUTE_RESOURCE_LIMITS, 0,
1363         mmCOMPUTE_NUM_THREAD_X, 256*4,
1364         mmCOMPUTE_NUM_THREAD_Y, 1,
1365         mmCOMPUTE_NUM_THREAD_Z, 1,
1366         mmCOMPUTE_PGM_RSRC2, 20,
1367         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1368         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1369         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1370         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1371         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1372         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1373         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1374         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1375         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1376         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1377 };
1378
1379 static const u32 sgpr1_init_regs[] =
1380 {
1381         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1382         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1383         mmCOMPUTE_NUM_THREAD_X, 256*5,
1384         mmCOMPUTE_NUM_THREAD_Y, 1,
1385         mmCOMPUTE_NUM_THREAD_Z, 1,
1386         mmCOMPUTE_PGM_RSRC2, 20,
1387         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1388         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1389         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1390         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1391         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1392         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1393         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1394         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1395         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1396         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1397 };
1398
1399 static const u32 sgpr2_init_regs[] =
1400 {
1401         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1402         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1403         mmCOMPUTE_NUM_THREAD_X, 256*5,
1404         mmCOMPUTE_NUM_THREAD_Y, 1,
1405         mmCOMPUTE_NUM_THREAD_Z, 1,
1406         mmCOMPUTE_PGM_RSRC2, 20,
1407         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1408         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1409         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1410         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1411         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1412         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1413         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1414         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1415         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1416         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1417 };
1418
1419 static const u32 sec_ded_counter_registers[] =
1420 {
1421         mmCPC_EDC_ATC_CNT,
1422         mmCPC_EDC_SCRATCH_CNT,
1423         mmCPC_EDC_UCODE_CNT,
1424         mmCPF_EDC_ATC_CNT,
1425         mmCPF_EDC_ROQ_CNT,
1426         mmCPF_EDC_TAG_CNT,
1427         mmCPG_EDC_ATC_CNT,
1428         mmCPG_EDC_DMA_CNT,
1429         mmCPG_EDC_TAG_CNT,
1430         mmDC_EDC_CSINVOC_CNT,
1431         mmDC_EDC_RESTORE_CNT,
1432         mmDC_EDC_STATE_CNT,
1433         mmGDS_EDC_CNT,
1434         mmGDS_EDC_GRBM_CNT,
1435         mmGDS_EDC_OA_DED,
1436         mmSPI_EDC_CNT,
1437         mmSQC_ATC_EDC_GATCL1_CNT,
1438         mmSQC_EDC_CNT,
1439         mmSQ_EDC_DED_CNT,
1440         mmSQ_EDC_INFO,
1441         mmSQ_EDC_SEC_CNT,
1442         mmTCC_EDC_CNT,
1443         mmTCP_ATC_EDC_GATCL1_CNT,
1444         mmTCP_EDC_CNT,
1445         mmTD_EDC_CNT
1446 };
1447
1448 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1449 {
1450         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1451         struct amdgpu_ib ib;
1452         struct fence *f = NULL;
1453         int r, i;
1454         u32 tmp;
1455         unsigned total_size, vgpr_offset, sgpr_offset;
1456         u64 gpu_addr;
1457
1458         /* only supported on CZ */
1459         if (adev->asic_type != CHIP_CARRIZO)
1460                 return 0;
1461
1462         /* bail if the compute ring is not ready */
1463         if (!ring->ready)
1464                 return 0;
1465
1466         tmp = RREG32(mmGB_EDC_MODE);
1467         WREG32(mmGB_EDC_MODE, 0);
1468
1469         total_size =
1470                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1471         total_size +=
1472                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1473         total_size +=
1474                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1475         total_size = ALIGN(total_size, 256);
1476         vgpr_offset = total_size;
1477         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1478         sgpr_offset = total_size;
1479         total_size += sizeof(sgpr_init_compute_shader);
1480
1481         /* allocate an indirect buffer to put the commands in */
1482         memset(&ib, 0, sizeof(ib));
1483         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1484         if (r) {
1485                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1486                 return r;
1487         }
1488
1489         /* load the compute shaders */
1490         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1491                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1492
1493         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1494                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1495
1496         /* init the ib length to 0 */
1497         ib.length_dw = 0;
1498
1499         /* VGPR */
1500         /* write the register state for the compute dispatch */
1501         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1502                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1503                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1504                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1505         }
1506         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1507         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1508         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1509         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1510         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1511         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1512
1513         /* write dispatch packet */
1514         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1515         ib.ptr[ib.length_dw++] = 8; /* x */
1516         ib.ptr[ib.length_dw++] = 1; /* y */
1517         ib.ptr[ib.length_dw++] = 1; /* z */
1518         ib.ptr[ib.length_dw++] =
1519                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1520
1521         /* write CS partial flush packet */
1522         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1523         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1524
1525         /* SGPR1 */
1526         /* write the register state for the compute dispatch */
1527         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1528                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1529                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1530                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1531         }
1532         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1533         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1534         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1535         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1536         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1537         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1538
1539         /* write dispatch packet */
1540         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1541         ib.ptr[ib.length_dw++] = 8; /* x */
1542         ib.ptr[ib.length_dw++] = 1; /* y */
1543         ib.ptr[ib.length_dw++] = 1; /* z */
1544         ib.ptr[ib.length_dw++] =
1545                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1546
1547         /* write CS partial flush packet */
1548         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1549         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1550
1551         /* SGPR2 */
1552         /* write the register state for the compute dispatch */
1553         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1554                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1555                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1556                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1557         }
1558         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1559         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1560         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1561         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1562         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1563         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1564
1565         /* write dispatch packet */
1566         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1567         ib.ptr[ib.length_dw++] = 8; /* x */
1568         ib.ptr[ib.length_dw++] = 1; /* y */
1569         ib.ptr[ib.length_dw++] = 1; /* z */
1570         ib.ptr[ib.length_dw++] =
1571                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1572
1573         /* write CS partial flush packet */
1574         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1575         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1576
1577         /* shedule the ib on the ring */
1578         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1579         if (r) {
1580                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1581                 goto fail;
1582         }
1583
1584         /* wait for the GPU to finish processing the IB */
1585         r = fence_wait(f, false);
1586         if (r) {
1587                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1588                 goto fail;
1589         }
1590
1591         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1592         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1593         WREG32(mmGB_EDC_MODE, tmp);
1594
1595         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1596         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1597         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1598
1599
1600         /* read back registers to clear the counters */
1601         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1602                 RREG32(sec_ded_counter_registers[i]);
1603
1604 fail:
1605         fence_put(f);
1606         amdgpu_ib_free(adev, &ib, NULL);
1607         fence_put(f);
1608
1609         return r;
1610 }
1611
1612 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1613 {
1614         u32 gb_addr_config;
1615         u32 mc_shared_chmap, mc_arb_ramcfg;
1616         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1617         u32 tmp;
1618         int ret;
1619
1620         switch (adev->asic_type) {
1621         case CHIP_TOPAZ:
1622                 adev->gfx.config.max_shader_engines = 1;
1623                 adev->gfx.config.max_tile_pipes = 2;
1624                 adev->gfx.config.max_cu_per_sh = 6;
1625                 adev->gfx.config.max_sh_per_se = 1;
1626                 adev->gfx.config.max_backends_per_se = 2;
1627                 adev->gfx.config.max_texture_channel_caches = 2;
1628                 adev->gfx.config.max_gprs = 256;
1629                 adev->gfx.config.max_gs_threads = 32;
1630                 adev->gfx.config.max_hw_contexts = 8;
1631
1632                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1633                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1634                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1635                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1636                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1637                 break;
1638         case CHIP_FIJI:
1639                 adev->gfx.config.max_shader_engines = 4;
1640                 adev->gfx.config.max_tile_pipes = 16;
1641                 adev->gfx.config.max_cu_per_sh = 16;
1642                 adev->gfx.config.max_sh_per_se = 1;
1643                 adev->gfx.config.max_backends_per_se = 4;
1644                 adev->gfx.config.max_texture_channel_caches = 16;
1645                 adev->gfx.config.max_gprs = 256;
1646                 adev->gfx.config.max_gs_threads = 32;
1647                 adev->gfx.config.max_hw_contexts = 8;
1648
1649                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1650                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1651                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1652                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1653                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1654                 break;
1655         case CHIP_POLARIS11:
1656                 ret = amdgpu_atombios_get_gfx_info(adev);
1657                 if (ret)
1658                         return ret;
1659                 adev->gfx.config.max_gprs = 256;
1660                 adev->gfx.config.max_gs_threads = 32;
1661                 adev->gfx.config.max_hw_contexts = 8;
1662
1663                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1664                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1665                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1666                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1667                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1668                 break;
1669         case CHIP_POLARIS10:
1670                 ret = amdgpu_atombios_get_gfx_info(adev);
1671                 if (ret)
1672                         return ret;
1673                 adev->gfx.config.max_gprs = 256;
1674                 adev->gfx.config.max_gs_threads = 32;
1675                 adev->gfx.config.max_hw_contexts = 8;
1676
1677                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1678                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1679                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1680                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1681                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1682                 break;
1683         case CHIP_TONGA:
1684                 adev->gfx.config.max_shader_engines = 4;
1685                 adev->gfx.config.max_tile_pipes = 8;
1686                 adev->gfx.config.max_cu_per_sh = 8;
1687                 adev->gfx.config.max_sh_per_se = 1;
1688                 adev->gfx.config.max_backends_per_se = 2;
1689                 adev->gfx.config.max_texture_channel_caches = 8;
1690                 adev->gfx.config.max_gprs = 256;
1691                 adev->gfx.config.max_gs_threads = 32;
1692                 adev->gfx.config.max_hw_contexts = 8;
1693
1694                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1695                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1696                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1697                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1698                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1699                 break;
1700         case CHIP_CARRIZO:
1701                 adev->gfx.config.max_shader_engines = 1;
1702                 adev->gfx.config.max_tile_pipes = 2;
1703                 adev->gfx.config.max_sh_per_se = 1;
1704                 adev->gfx.config.max_backends_per_se = 2;
1705
1706                 switch (adev->pdev->revision) {
1707                 case 0xc4:
1708                 case 0x84:
1709                 case 0xc8:
1710                 case 0xcc:
1711                 case 0xe1:
1712                 case 0xe3:
1713                         /* B10 */
1714                         adev->gfx.config.max_cu_per_sh = 8;
1715                         break;
1716                 case 0xc5:
1717                 case 0x81:
1718                 case 0x85:
1719                 case 0xc9:
1720                 case 0xcd:
1721                 case 0xe2:
1722                 case 0xe4:
1723                         /* B8 */
1724                         adev->gfx.config.max_cu_per_sh = 6;
1725                         break;
1726                 case 0xc6:
1727                 case 0xca:
1728                 case 0xce:
1729                 case 0x88:
1730                         /* B6 */
1731                         adev->gfx.config.max_cu_per_sh = 6;
1732                         break;
1733                 case 0xc7:
1734                 case 0x87:
1735                 case 0xcb:
1736                 case 0xe5:
1737                 case 0x89:
1738                 default:
1739                         /* B4 */
1740                         adev->gfx.config.max_cu_per_sh = 4;
1741                         break;
1742                 }
1743
1744                 adev->gfx.config.max_texture_channel_caches = 2;
1745                 adev->gfx.config.max_gprs = 256;
1746                 adev->gfx.config.max_gs_threads = 32;
1747                 adev->gfx.config.max_hw_contexts = 8;
1748
1749                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1750                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1751                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1752                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1753                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1754                 break;
1755         case CHIP_STONEY:
1756                 adev->gfx.config.max_shader_engines = 1;
1757                 adev->gfx.config.max_tile_pipes = 2;
1758                 adev->gfx.config.max_sh_per_se = 1;
1759                 adev->gfx.config.max_backends_per_se = 1;
1760
1761                 switch (adev->pdev->revision) {
1762                 case 0xc0:
1763                 case 0xc1:
1764                 case 0xc2:
1765                 case 0xc4:
1766                 case 0xc8:
1767                 case 0xc9:
1768                         adev->gfx.config.max_cu_per_sh = 3;
1769                         break;
1770                 case 0xd0:
1771                 case 0xd1:
1772                 case 0xd2:
1773                 default:
1774                         adev->gfx.config.max_cu_per_sh = 2;
1775                         break;
1776                 }
1777
1778                 adev->gfx.config.max_texture_channel_caches = 2;
1779                 adev->gfx.config.max_gprs = 256;
1780                 adev->gfx.config.max_gs_threads = 16;
1781                 adev->gfx.config.max_hw_contexts = 8;
1782
1783                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1784                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1785                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1786                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1787                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1788                 break;
1789         default:
1790                 adev->gfx.config.max_shader_engines = 2;
1791                 adev->gfx.config.max_tile_pipes = 4;
1792                 adev->gfx.config.max_cu_per_sh = 2;
1793                 adev->gfx.config.max_sh_per_se = 1;
1794                 adev->gfx.config.max_backends_per_se = 2;
1795                 adev->gfx.config.max_texture_channel_caches = 4;
1796                 adev->gfx.config.max_gprs = 256;
1797                 adev->gfx.config.max_gs_threads = 32;
1798                 adev->gfx.config.max_hw_contexts = 8;
1799
1800                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1801                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1802                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1803                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1804                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1805                 break;
1806         }
1807
1808         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1809         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1810         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1811
1812         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1813         adev->gfx.config.mem_max_burst_length_bytes = 256;
1814         if (adev->flags & AMD_IS_APU) {
1815                 /* Get memory bank mapping mode. */
1816                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1817                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1818                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1819
1820                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1821                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1822                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1823
1824                 /* Validate settings in case only one DIMM installed. */
1825                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1826                         dimm00_addr_map = 0;
1827                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1828                         dimm01_addr_map = 0;
1829                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1830                         dimm10_addr_map = 0;
1831                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1832                         dimm11_addr_map = 0;
1833
1834                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1835                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1836                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1837                         adev->gfx.config.mem_row_size_in_kb = 2;
1838                 else
1839                         adev->gfx.config.mem_row_size_in_kb = 1;
1840         } else {
1841                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1842                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1843                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1844                         adev->gfx.config.mem_row_size_in_kb = 4;
1845         }
1846
1847         adev->gfx.config.shader_engine_tile_size = 32;
1848         adev->gfx.config.num_gpus = 1;
1849         adev->gfx.config.multi_gpu_tile_size = 64;
1850
1851         /* fix up row size */
1852         switch (adev->gfx.config.mem_row_size_in_kb) {
1853         case 1:
1854         default:
1855                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1856                 break;
1857         case 2:
1858                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1859                 break;
1860         case 4:
1861                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1862                 break;
1863         }
1864         adev->gfx.config.gb_addr_config = gb_addr_config;
1865
1866         return 0;
1867 }
1868
1869 static int gfx_v8_0_sw_init(void *handle)
1870 {
1871         int i, r;
1872         struct amdgpu_ring *ring;
1873         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1874
1875         /* EOP Event */
1876         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1877         if (r)
1878                 return r;
1879
1880         /* Privileged reg */
1881         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1882         if (r)
1883                 return r;
1884
1885         /* Privileged inst */
1886         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1887         if (r)
1888                 return r;
1889
1890         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1891
1892         gfx_v8_0_scratch_init(adev);
1893
1894         r = gfx_v8_0_init_microcode(adev);
1895         if (r) {
1896                 DRM_ERROR("Failed to load gfx firmware!\n");
1897                 return r;
1898         }
1899
1900         r = gfx_v8_0_rlc_init(adev);
1901         if (r) {
1902                 DRM_ERROR("Failed to init rlc BOs!\n");
1903                 return r;
1904         }
1905
1906         r = gfx_v8_0_mec_init(adev);
1907         if (r) {
1908                 DRM_ERROR("Failed to init MEC BOs!\n");
1909                 return r;
1910         }
1911
1912         /* set up the gfx ring */
1913         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1914                 ring = &adev->gfx.gfx_ring[i];
1915                 ring->ring_obj = NULL;
1916                 sprintf(ring->name, "gfx");
1917                 /* no gfx doorbells on iceland */
1918                 if (adev->asic_type != CHIP_TOPAZ) {
1919                         ring->use_doorbell = true;
1920                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1921                 }
1922
1923                 r = amdgpu_ring_init(adev, ring, 1024,
1924                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1925                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1926                                      AMDGPU_RING_TYPE_GFX);
1927                 if (r)
1928                         return r;
1929         }
1930
1931         /* set up the compute queues */
1932         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1933                 unsigned irq_type;
1934
1935                 /* max 32 queues per MEC */
1936                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1937                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1938                         break;
1939                 }
1940                 ring = &adev->gfx.compute_ring[i];
1941                 ring->ring_obj = NULL;
1942                 ring->use_doorbell = true;
1943                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1944                 ring->me = 1; /* first MEC */
1945                 ring->pipe = i / 8;
1946                 ring->queue = i % 8;
1947                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1948                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1949                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1950                 r = amdgpu_ring_init(adev, ring, 1024,
1951                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1952                                      &adev->gfx.eop_irq, irq_type,
1953                                      AMDGPU_RING_TYPE_COMPUTE);
1954                 if (r)
1955                         return r;
1956         }
1957
1958         /* reserve GDS, GWS and OA resource for gfx */
1959         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1960                         PAGE_SIZE, true,
1961                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1962                         NULL, &adev->gds.gds_gfx_bo);
1963         if (r)
1964                 return r;
1965
1966         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1967                 PAGE_SIZE, true,
1968                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1969                 NULL, &adev->gds.gws_gfx_bo);
1970         if (r)
1971                 return r;
1972
1973         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1974                         PAGE_SIZE, true,
1975                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1976                         NULL, &adev->gds.oa_gfx_bo);
1977         if (r)
1978                 return r;
1979
1980         adev->gfx.ce_ram_size = 0x8000;
1981
1982         r = gfx_v8_0_gpu_early_init(adev);
1983         if (r)
1984                 return r;
1985
1986         return 0;
1987 }
1988
1989 static int gfx_v8_0_sw_fini(void *handle)
1990 {
1991         int i;
1992         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1993
1994         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1995         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1996         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1997
1998         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1999                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2000         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2001                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2002
2003         gfx_v8_0_mec_fini(adev);
2004
2005         gfx_v8_0_rlc_fini(adev);
2006
2007         gfx_v8_0_free_microcode(adev);
2008
2009         return 0;
2010 }
2011
2012 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2013 {
2014         uint32_t *modearray, *mod2array;
2015         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2016         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2017         u32 reg_offset;
2018
2019         modearray = adev->gfx.config.tile_mode_array;
2020         mod2array = adev->gfx.config.macrotile_mode_array;
2021
2022         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2023                 modearray[reg_offset] = 0;
2024
2025         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2026                 mod2array[reg_offset] = 0;
2027
2028         switch (adev->asic_type) {
2029         case CHIP_TOPAZ:
2030                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2031                                 PIPE_CONFIG(ADDR_SURF_P2) |
2032                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2033                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2034                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035                                 PIPE_CONFIG(ADDR_SURF_P2) |
2036                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2037                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2038                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2039                                 PIPE_CONFIG(ADDR_SURF_P2) |
2040                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043                                 PIPE_CONFIG(ADDR_SURF_P2) |
2044                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2045                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2046                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2047                                 PIPE_CONFIG(ADDR_SURF_P2) |
2048                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2049                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2050                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051                                 PIPE_CONFIG(ADDR_SURF_P2) |
2052                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2053                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2054                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2055                                 PIPE_CONFIG(ADDR_SURF_P2) |
2056                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2057                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2058                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2059                                 PIPE_CONFIG(ADDR_SURF_P2));
2060                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061                                 PIPE_CONFIG(ADDR_SURF_P2) |
2062                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2063                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065                                  PIPE_CONFIG(ADDR_SURF_P2) |
2066                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2067                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2068                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2069                                  PIPE_CONFIG(ADDR_SURF_P2) |
2070                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2071                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2072                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073                                  PIPE_CONFIG(ADDR_SURF_P2) |
2074                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077                                  PIPE_CONFIG(ADDR_SURF_P2) |
2078                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2079                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2081                                  PIPE_CONFIG(ADDR_SURF_P2) |
2082                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2083                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2084                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2085                                  PIPE_CONFIG(ADDR_SURF_P2) |
2086                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2087                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2088                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2089                                  PIPE_CONFIG(ADDR_SURF_P2) |
2090                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2092                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2093                                  PIPE_CONFIG(ADDR_SURF_P2) |
2094                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2095                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2096                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2097                                  PIPE_CONFIG(ADDR_SURF_P2) |
2098                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2099                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2100                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2101                                  PIPE_CONFIG(ADDR_SURF_P2) |
2102                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2103                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2104                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2105                                  PIPE_CONFIG(ADDR_SURF_P2) |
2106                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2107                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2108                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2109                                  PIPE_CONFIG(ADDR_SURF_P2) |
2110                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2111                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2112                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2113                                  PIPE_CONFIG(ADDR_SURF_P2) |
2114                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2115                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2116                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2117                                  PIPE_CONFIG(ADDR_SURF_P2) |
2118                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2119                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2120                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2121                                  PIPE_CONFIG(ADDR_SURF_P2) |
2122                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2123                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2124                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2125                                  PIPE_CONFIG(ADDR_SURF_P2) |
2126                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2127                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2129                                  PIPE_CONFIG(ADDR_SURF_P2) |
2130                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2131                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2132
2133                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2134                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2135                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2136                                 NUM_BANKS(ADDR_SURF_8_BANK));
2137                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2138                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2139                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2140                                 NUM_BANKS(ADDR_SURF_8_BANK));
2141                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2142                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2143                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2144                                 NUM_BANKS(ADDR_SURF_8_BANK));
2145                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2146                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2147                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2148                                 NUM_BANKS(ADDR_SURF_8_BANK));
2149                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2151                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2152                                 NUM_BANKS(ADDR_SURF_8_BANK));
2153                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2154                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2155                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2156                                 NUM_BANKS(ADDR_SURF_8_BANK));
2157                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2158                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2159                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2160                                 NUM_BANKS(ADDR_SURF_8_BANK));
2161                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2162                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2163                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2164                                 NUM_BANKS(ADDR_SURF_16_BANK));
2165                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2166                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2167                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2168                                 NUM_BANKS(ADDR_SURF_16_BANK));
2169                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2170                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2171                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2172                                  NUM_BANKS(ADDR_SURF_16_BANK));
2173                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2174                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2176                                  NUM_BANKS(ADDR_SURF_16_BANK));
2177                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2179                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2180                                  NUM_BANKS(ADDR_SURF_16_BANK));
2181                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2182                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2183                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2184                                  NUM_BANKS(ADDR_SURF_16_BANK));
2185                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188                                  NUM_BANKS(ADDR_SURF_8_BANK));
2189
2190                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2191                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2192                             reg_offset != 23)
2193                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2194
2195                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2196                         if (reg_offset != 7)
2197                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2198
2199                 break;
2200         case CHIP_FIJI:
2201                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2202                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2203                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2204                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2205                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2206                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2207                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2208                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2209                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2210                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2211                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2212                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2213                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2214                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2215                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2216                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2217                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2218                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2219                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2220                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2221                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2222                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2223                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2224                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2225                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2226                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2227                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2228                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2229                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2230                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2231                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2232                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2233                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2234                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2235                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2236                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2237                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2238                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2239                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2243                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2244                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2247                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2248                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2251                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2252                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2255                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2256                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2259                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2260                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2263                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2264                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2267                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2268                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2271                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2272                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2275                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2276                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2279                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2280                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2283                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2284                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2287                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2288                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2291                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2292                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2295                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2296                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2299                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2300                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2303                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2304                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2307                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2308                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2319                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2323
2324                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2327                                 NUM_BANKS(ADDR_SURF_8_BANK));
2328                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2331                                 NUM_BANKS(ADDR_SURF_8_BANK));
2332                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2335                                 NUM_BANKS(ADDR_SURF_8_BANK));
2336                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2339                                 NUM_BANKS(ADDR_SURF_8_BANK));
2340                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2341                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2342                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2343                                 NUM_BANKS(ADDR_SURF_8_BANK));
2344                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2347                                 NUM_BANKS(ADDR_SURF_8_BANK));
2348                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2351                                 NUM_BANKS(ADDR_SURF_8_BANK));
2352                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2355                                 NUM_BANKS(ADDR_SURF_8_BANK));
2356                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2359                                 NUM_BANKS(ADDR_SURF_8_BANK));
2360                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2361                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2362                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2363                                  NUM_BANKS(ADDR_SURF_8_BANK));
2364                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2366                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2367                                  NUM_BANKS(ADDR_SURF_8_BANK));
2368                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2370                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371                                  NUM_BANKS(ADDR_SURF_8_BANK));
2372                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375                                  NUM_BANKS(ADDR_SURF_8_BANK));
2376                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2379                                  NUM_BANKS(ADDR_SURF_4_BANK));
2380
2381                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2382                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2383
2384                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2385                         if (reg_offset != 7)
2386                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2387
2388                 break;
2389         case CHIP_TONGA:
2390                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2391                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2392                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2393                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2394                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2396                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2397                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2398                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2400                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2401                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2402                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2404                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2405                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2408                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2409                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2410                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2411                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2412                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2413                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2414                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2416                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2417                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2418                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2419                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2420                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2421                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2422                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2423                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2424                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2426                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2433                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2436                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2438                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2439                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2440                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2441                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2442                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2443                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2446                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2449                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2450                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2453                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2454                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2456                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2457                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2458                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2459                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2460                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2461                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2462                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2463                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2465                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2466                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2469                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2470                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2471                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2473                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2474                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2475                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2477                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2481                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2484                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2485                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2488                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2489                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2492                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2493                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2496                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2497                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2503                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2507                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2508                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2512
2513                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2516                                 NUM_BANKS(ADDR_SURF_16_BANK));
2517                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2519                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2520                                 NUM_BANKS(ADDR_SURF_16_BANK));
2521                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2523                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2524                                 NUM_BANKS(ADDR_SURF_16_BANK));
2525                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2527                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2528                                 NUM_BANKS(ADDR_SURF_16_BANK));
2529                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2531                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532                                 NUM_BANKS(ADDR_SURF_16_BANK));
2533                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536                                 NUM_BANKS(ADDR_SURF_16_BANK));
2537                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2539                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2540                                 NUM_BANKS(ADDR_SURF_16_BANK));
2541                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2543                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2544                                 NUM_BANKS(ADDR_SURF_16_BANK));
2545                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2548                                 NUM_BANKS(ADDR_SURF_16_BANK));
2549                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2551                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552                                  NUM_BANKS(ADDR_SURF_16_BANK));
2553                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2555                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2556                                  NUM_BANKS(ADDR_SURF_16_BANK));
2557                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560                                  NUM_BANKS(ADDR_SURF_8_BANK));
2561                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2563                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2564                                  NUM_BANKS(ADDR_SURF_4_BANK));
2565                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2567                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2568                                  NUM_BANKS(ADDR_SURF_4_BANK));
2569
2570                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2571                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2572
2573                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2574                         if (reg_offset != 7)
2575                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2576
2577                 break;
2578         case CHIP_POLARIS11:
2579                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2581                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2582                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2586                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2588                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2589                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2590                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2593                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2594                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2598                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2601                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2602                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2603                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2606                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2608                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2609                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2610                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2611                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2612                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2613                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2622                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2624                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2625                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2627                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2628                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2629                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2630                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2631                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2638                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2640                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2642                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2643                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2644                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2645                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2649                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2650                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2652                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2653                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2654                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2656                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2658                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2660                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2662                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2664                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2666                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2670                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2676                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2677                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2680                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2681                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2684                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2685                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2689                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2692                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2696                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2697                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2700                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2701
2702                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705                                 NUM_BANKS(ADDR_SURF_16_BANK));
2706
2707                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2709                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2710                                 NUM_BANKS(ADDR_SURF_16_BANK));
2711
2712                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2714                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2715                                 NUM_BANKS(ADDR_SURF_16_BANK));
2716
2717                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2719                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2720                                 NUM_BANKS(ADDR_SURF_16_BANK));
2721
2722                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2724                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2725                                 NUM_BANKS(ADDR_SURF_16_BANK));
2726
2727                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2730                                 NUM_BANKS(ADDR_SURF_16_BANK));
2731
2732                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735                                 NUM_BANKS(ADDR_SURF_16_BANK));
2736
2737                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2738                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2739                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2740                                 NUM_BANKS(ADDR_SURF_16_BANK));
2741
2742                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2743                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2744                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2745                                 NUM_BANKS(ADDR_SURF_16_BANK));
2746
2747                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750                                 NUM_BANKS(ADDR_SURF_16_BANK));
2751
2752                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2754                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2755                                 NUM_BANKS(ADDR_SURF_16_BANK));
2756
2757                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2759                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2760                                 NUM_BANKS(ADDR_SURF_16_BANK));
2761
2762                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765                                 NUM_BANKS(ADDR_SURF_8_BANK));
2766
2767                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770                                 NUM_BANKS(ADDR_SURF_4_BANK));
2771
2772                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2773                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2774
2775                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2776                         if (reg_offset != 7)
2777                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2778
2779                 break;
2780         case CHIP_POLARIS10:
2781                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2783                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2785                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2787                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2788                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2789                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2790                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2791                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2792                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2793                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2795                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2797                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2799                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2801                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2802                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2803                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2805                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2806                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2807                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2809                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2814                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2815                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2817                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2818                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2821                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2823                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2825                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2826                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2828                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2831                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2832                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2834                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2838                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2840                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2843                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2846                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2847                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2848                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2851                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2852                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2855                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2856                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2860                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2863                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2864                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2867                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2868                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2871                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2872                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2875                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2876                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2879                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2890                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2894                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2895                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2898                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2899                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2902                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2903
2904                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2905                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2906                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2907                                 NUM_BANKS(ADDR_SURF_16_BANK));
2908
2909                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2910                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2911                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2912                                 NUM_BANKS(ADDR_SURF_16_BANK));
2913
2914                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2916                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                 NUM_BANKS(ADDR_SURF_16_BANK));
2918
2919                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2920                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2921                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922                                 NUM_BANKS(ADDR_SURF_16_BANK));
2923
2924                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2926                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2927                                 NUM_BANKS(ADDR_SURF_16_BANK));
2928
2929                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2931                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2932                                 NUM_BANKS(ADDR_SURF_16_BANK));
2933
2934                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2935                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2936                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2937                                 NUM_BANKS(ADDR_SURF_16_BANK));
2938
2939                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2940                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2941                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2942                                 NUM_BANKS(ADDR_SURF_16_BANK));
2943
2944                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2946                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2947                                 NUM_BANKS(ADDR_SURF_16_BANK));
2948
2949                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2951                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2952                                 NUM_BANKS(ADDR_SURF_16_BANK));
2953
2954                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2956                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2957                                 NUM_BANKS(ADDR_SURF_16_BANK));
2958
2959                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2962                                 NUM_BANKS(ADDR_SURF_8_BANK));
2963
2964                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2967                                 NUM_BANKS(ADDR_SURF_4_BANK));
2968
2969                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2970                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2971                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2972                                 NUM_BANKS(ADDR_SURF_4_BANK));
2973
2974                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2975                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2976
2977                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2978                         if (reg_offset != 7)
2979                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2980
2981                 break;
2982         case CHIP_STONEY:
2983                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2984                                 PIPE_CONFIG(ADDR_SURF_P2) |
2985                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2987                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2988                                 PIPE_CONFIG(ADDR_SURF_P2) |
2989                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2990                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2991                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2992                                 PIPE_CONFIG(ADDR_SURF_P2) |
2993                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2994                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2995                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2996                                 PIPE_CONFIG(ADDR_SURF_P2) |
2997                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2999                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3000                                 PIPE_CONFIG(ADDR_SURF_P2) |
3001                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3003                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3004                                 PIPE_CONFIG(ADDR_SURF_P2) |
3005                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3007                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3008                                 PIPE_CONFIG(ADDR_SURF_P2) |
3009                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3010                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3011                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3012                                 PIPE_CONFIG(ADDR_SURF_P2));
3013                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P2) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3016                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3018                                  PIPE_CONFIG(ADDR_SURF_P2) |
3019                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3020                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                  PIPE_CONFIG(ADDR_SURF_P2) |
3023                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3024                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026                                  PIPE_CONFIG(ADDR_SURF_P2) |
3027                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3028                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3029                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3030                                  PIPE_CONFIG(ADDR_SURF_P2) |
3031                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3034                                  PIPE_CONFIG(ADDR_SURF_P2) |
3035                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3036                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3038                                  PIPE_CONFIG(ADDR_SURF_P2) |
3039                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3040                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3041                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3042                                  PIPE_CONFIG(ADDR_SURF_P2) |
3043                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3045                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3046                                  PIPE_CONFIG(ADDR_SURF_P2) |
3047                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3048                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3050                                  PIPE_CONFIG(ADDR_SURF_P2) |
3051                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3052                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3054                                  PIPE_CONFIG(ADDR_SURF_P2) |
3055                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3056                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3058                                  PIPE_CONFIG(ADDR_SURF_P2) |
3059                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3062                                  PIPE_CONFIG(ADDR_SURF_P2) |
3063                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3064                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3066                                  PIPE_CONFIG(ADDR_SURF_P2) |
3067                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3068                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3069                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3070                                  PIPE_CONFIG(ADDR_SURF_P2) |
3071                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3072                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3073                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3074                                  PIPE_CONFIG(ADDR_SURF_P2) |
3075                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3076                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3077                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                                  PIPE_CONFIG(ADDR_SURF_P2) |
3079                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3080                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3081                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3082                                  PIPE_CONFIG(ADDR_SURF_P2) |
3083                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3084                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3085
3086                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3088                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3089                                 NUM_BANKS(ADDR_SURF_8_BANK));
3090                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3092                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3093                                 NUM_BANKS(ADDR_SURF_8_BANK));
3094                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3096                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3097                                 NUM_BANKS(ADDR_SURF_8_BANK));
3098                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3099                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3100                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3101                                 NUM_BANKS(ADDR_SURF_8_BANK));
3102                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3104                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105                                 NUM_BANKS(ADDR_SURF_8_BANK));
3106                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3108                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3109                                 NUM_BANKS(ADDR_SURF_8_BANK));
3110                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3112                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3113                                 NUM_BANKS(ADDR_SURF_8_BANK));
3114                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3115                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3116                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3117                                 NUM_BANKS(ADDR_SURF_16_BANK));
3118                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121                                 NUM_BANKS(ADDR_SURF_16_BANK));
3122                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3123                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125                                  NUM_BANKS(ADDR_SURF_16_BANK));
3126                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3127                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3128                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3129                                  NUM_BANKS(ADDR_SURF_16_BANK));
3130                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3131                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3132                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133                                  NUM_BANKS(ADDR_SURF_16_BANK));
3134                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3136                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3137                                  NUM_BANKS(ADDR_SURF_16_BANK));
3138                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3140                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3141                                  NUM_BANKS(ADDR_SURF_8_BANK));
3142
3143                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3144                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3145                             reg_offset != 23)
3146                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3147
3148                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3149                         if (reg_offset != 7)
3150                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3151
3152                 break;
3153         default:
3154                 dev_warn(adev->dev,
3155                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3156                          adev->asic_type);
3157
3158         case CHIP_CARRIZO:
3159                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160                                 PIPE_CONFIG(ADDR_SURF_P2) |
3161                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3162                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3163                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3164                                 PIPE_CONFIG(ADDR_SURF_P2) |
3165                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3166                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3167                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3168                                 PIPE_CONFIG(ADDR_SURF_P2) |
3169                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3171                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3172                                 PIPE_CONFIG(ADDR_SURF_P2) |
3173                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3174                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3175                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176                                 PIPE_CONFIG(ADDR_SURF_P2) |
3177                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3178                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3179                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180                                 PIPE_CONFIG(ADDR_SURF_P2) |
3181                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3182                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3183                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3184                                 PIPE_CONFIG(ADDR_SURF_P2) |
3185                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3186                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3187                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3188                                 PIPE_CONFIG(ADDR_SURF_P2));
3189                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3190                                 PIPE_CONFIG(ADDR_SURF_P2) |
3191                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3192                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3193                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194                                  PIPE_CONFIG(ADDR_SURF_P2) |
3195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3197                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3198                                  PIPE_CONFIG(ADDR_SURF_P2) |
3199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3201                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3202                                  PIPE_CONFIG(ADDR_SURF_P2) |
3203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3206                                  PIPE_CONFIG(ADDR_SURF_P2) |
3207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3209                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3210                                  PIPE_CONFIG(ADDR_SURF_P2) |
3211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3213                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3214                                  PIPE_CONFIG(ADDR_SURF_P2) |
3215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3217                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3218                                  PIPE_CONFIG(ADDR_SURF_P2) |
3219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3221                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3222                                  PIPE_CONFIG(ADDR_SURF_P2) |
3223                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3224                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3225                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3226                                  PIPE_CONFIG(ADDR_SURF_P2) |
3227                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3228                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3229                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3230                                  PIPE_CONFIG(ADDR_SURF_P2) |
3231                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3232                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3233                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3234                                  PIPE_CONFIG(ADDR_SURF_P2) |
3235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3237                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3238                                  PIPE_CONFIG(ADDR_SURF_P2) |
3239                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3240                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3241                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3242                                  PIPE_CONFIG(ADDR_SURF_P2) |
3243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3245                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3246                                  PIPE_CONFIG(ADDR_SURF_P2) |
3247                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3248                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3249                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250                                  PIPE_CONFIG(ADDR_SURF_P2) |
3251                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3252                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3253                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3254                                  PIPE_CONFIG(ADDR_SURF_P2) |
3255                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3256                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3257                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3258                                  PIPE_CONFIG(ADDR_SURF_P2) |
3259                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3260                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3261
3262                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3264                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265                                 NUM_BANKS(ADDR_SURF_8_BANK));
3266                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3267                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3268                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3269                                 NUM_BANKS(ADDR_SURF_8_BANK));
3270                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3271                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3272                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3273                                 NUM_BANKS(ADDR_SURF_8_BANK));
3274                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3275                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3276                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3277                                 NUM_BANKS(ADDR_SURF_8_BANK));
3278                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3279                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3280                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3281                                 NUM_BANKS(ADDR_SURF_8_BANK));
3282                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3285                                 NUM_BANKS(ADDR_SURF_8_BANK));
3286                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3287                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3288                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3289                                 NUM_BANKS(ADDR_SURF_8_BANK));
3290                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3291                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3292                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3293                                 NUM_BANKS(ADDR_SURF_16_BANK));
3294                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3295                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3296                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3297                                 NUM_BANKS(ADDR_SURF_16_BANK));
3298                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3299                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3300                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301                                  NUM_BANKS(ADDR_SURF_16_BANK));
3302                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3303                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3304                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3305                                  NUM_BANKS(ADDR_SURF_16_BANK));
3306                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3307                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309                                  NUM_BANKS(ADDR_SURF_16_BANK));
3310                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313                                  NUM_BANKS(ADDR_SURF_16_BANK));
3314                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3315                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3316                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3317                                  NUM_BANKS(ADDR_SURF_8_BANK));
3318
3319                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3320                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3321                             reg_offset != 23)
3322                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3323
3324                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3325                         if (reg_offset != 7)
3326                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3327
3328                 break;
3329         }
3330 }
3331
3332 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3333 {
3334         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3335
3336         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3337                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3338                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3339         } else if (se_num == 0xffffffff) {
3340                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3341                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3342         } else if (sh_num == 0xffffffff) {
3343                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3344                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3345         } else {
3346                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3347                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3348         }
3349         WREG32(mmGRBM_GFX_INDEX, data);
3350 }
3351
3352 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3353 {
3354         return (u32)((1ULL << bit_width) - 1);
3355 }
3356
3357 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3358 {
3359         u32 data, mask;
3360
3361         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3362         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3363
3364         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3365         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3366
3367         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3368                                        adev->gfx.config.max_sh_per_se);
3369
3370         return (~data) & mask;
3371 }
3372
3373 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3374 {
3375         int i, j;
3376         u32 data;
3377         u32 active_rbs = 0;
3378         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3379                                         adev->gfx.config.max_sh_per_se;
3380
3381         mutex_lock(&adev->grbm_idx_mutex);
3382         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3383                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3384                         gfx_v8_0_select_se_sh(adev, i, j);
3385                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3386                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3387                                                rb_bitmap_width_per_sh);
3388                 }
3389         }
3390         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3391         mutex_unlock(&adev->grbm_idx_mutex);
3392
3393         adev->gfx.config.backend_enable_mask = active_rbs;
3394         adev->gfx.config.num_rbs = hweight32(active_rbs);
3395 }
3396
3397 /**
3398  * gfx_v8_0_init_compute_vmid - gart enable
3399  *
3400  * @rdev: amdgpu_device pointer
3401  *
3402  * Initialize compute vmid sh_mem registers
3403  *
3404  */
3405 #define DEFAULT_SH_MEM_BASES    (0x6000)
3406 #define FIRST_COMPUTE_VMID      (8)
3407 #define LAST_COMPUTE_VMID       (16)
3408 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3409 {
3410         int i;
3411         uint32_t sh_mem_config;
3412         uint32_t sh_mem_bases;
3413
3414         /*
3415          * Configure apertures:
3416          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3417          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3418          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3419          */
3420         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3421
3422         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3423                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3424                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3425                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3426                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3427                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3428
3429         mutex_lock(&adev->srbm_mutex);
3430         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3431                 vi_srbm_select(adev, 0, 0, 0, i);
3432                 /* CP and shaders */
3433                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3434                 WREG32(mmSH_MEM_APE1_BASE, 1);
3435                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3436                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3437         }
3438         vi_srbm_select(adev, 0, 0, 0, 0);
3439         mutex_unlock(&adev->srbm_mutex);
3440 }
3441
3442 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3443 {
3444         u32 tmp;
3445         int i;
3446
3447         tmp = RREG32(mmGRBM_CNTL);
3448         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3449         WREG32(mmGRBM_CNTL, tmp);
3450
3451         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3452         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3453         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3454
3455         gfx_v8_0_tiling_mode_table_init(adev);
3456
3457         gfx_v8_0_setup_rb(adev);
3458         gfx_v8_0_get_cu_info(adev);
3459
3460         /* XXX SH_MEM regs */
3461         /* where to put LDS, scratch, GPUVM in FSA64 space */
3462         mutex_lock(&adev->srbm_mutex);
3463         for (i = 0; i < 16; i++) {
3464                 vi_srbm_select(adev, 0, 0, 0, i);
3465                 /* CP and shaders */
3466                 if (i == 0) {
3467                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3468                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3469                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3470                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3471                         WREG32(mmSH_MEM_CONFIG, tmp);
3472                 } else {
3473                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3474                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3475                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3476                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3477                         WREG32(mmSH_MEM_CONFIG, tmp);
3478                 }
3479
3480                 WREG32(mmSH_MEM_APE1_BASE, 1);
3481                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3482                 WREG32(mmSH_MEM_BASES, 0);
3483         }
3484         vi_srbm_select(adev, 0, 0, 0, 0);
3485         mutex_unlock(&adev->srbm_mutex);
3486
3487         gfx_v8_0_init_compute_vmid(adev);
3488
3489         mutex_lock(&adev->grbm_idx_mutex);
3490         /*
3491          * making sure that the following register writes will be broadcasted
3492          * to all the shaders
3493          */
3494         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3495
3496         WREG32(mmPA_SC_FIFO_SIZE,
3497                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3498                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3499                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3500                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3501                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3502                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3503                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3504                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3505         mutex_unlock(&adev->grbm_idx_mutex);
3506
3507 }
3508
3509 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3510 {
3511         u32 i, j, k;
3512         u32 mask;
3513
3514         mutex_lock(&adev->grbm_idx_mutex);
3515         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3516                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3517                         gfx_v8_0_select_se_sh(adev, i, j);
3518                         for (k = 0; k < adev->usec_timeout; k++) {
3519                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3520                                         break;
3521                                 udelay(1);
3522                         }
3523                 }
3524         }
3525         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3526         mutex_unlock(&adev->grbm_idx_mutex);
3527
3528         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3529                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3530                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3531                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3532         for (k = 0; k < adev->usec_timeout; k++) {
3533                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3534                         break;
3535                 udelay(1);
3536         }
3537 }
3538
3539 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3540                                                bool enable)
3541 {
3542         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3543
3544         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3545         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3546         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3547         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3548
3549         WREG32(mmCP_INT_CNTL_RING0, tmp);
3550 }
3551
3552 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3553 {
3554         /* csib */
3555         WREG32(mmRLC_CSIB_ADDR_HI,
3556                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3557         WREG32(mmRLC_CSIB_ADDR_LO,
3558                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3559         WREG32(mmRLC_CSIB_LENGTH,
3560                         adev->gfx.rlc.clear_state_size);
3561 }
3562
3563 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3564                                 int ind_offset,
3565                                 int list_size,
3566                                 int *unique_indices,
3567                                 int *indices_count,
3568                                 int max_indices,
3569                                 int *ind_start_offsets,
3570                                 int *offset_count,
3571                                 int max_offset)
3572 {
3573         int indices;
3574         bool new_entry = true;
3575
3576         for (; ind_offset < list_size; ind_offset++) {
3577
3578                 if (new_entry) {
3579                         new_entry = false;
3580                         ind_start_offsets[*offset_count] = ind_offset;
3581                         *offset_count = *offset_count + 1;
3582                         BUG_ON(*offset_count >= max_offset);
3583                 }
3584
3585                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3586                         new_entry = true;
3587                         continue;
3588                 }
3589
3590                 ind_offset += 2;
3591
3592                 /* look for the matching indice */
3593                 for (indices = 0;
3594                         indices < *indices_count;
3595                         indices++) {
3596                         if (unique_indices[indices] ==
3597                                 register_list_format[ind_offset])
3598                                 break;
3599                 }
3600
3601                 if (indices >= *indices_count) {
3602                         unique_indices[*indices_count] =
3603                                 register_list_format[ind_offset];
3604                         indices = *indices_count;
3605                         *indices_count = *indices_count + 1;
3606                         BUG_ON(*indices_count >= max_indices);
3607                 }
3608
3609                 register_list_format[ind_offset] = indices;
3610         }
3611 }
3612
3613 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3614 {
3615         int i, temp, data;
3616         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3617         int indices_count = 0;
3618         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3619         int offset_count = 0;
3620
3621         int list_size;
3622         unsigned int *register_list_format =
3623                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3624         if (register_list_format == NULL)
3625                 return -ENOMEM;
3626         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3627                         adev->gfx.rlc.reg_list_format_size_bytes);
3628
3629         gfx_v8_0_parse_ind_reg_list(register_list_format,
3630                                 RLC_FormatDirectRegListLength,
3631                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3632                                 unique_indices,
3633                                 &indices_count,
3634                                 sizeof(unique_indices) / sizeof(int),
3635                                 indirect_start_offsets,
3636                                 &offset_count,
3637                                 sizeof(indirect_start_offsets)/sizeof(int));
3638
3639         /* save and restore list */
3640         temp = RREG32(mmRLC_SRM_CNTL);
3641         temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3642         WREG32(mmRLC_SRM_CNTL, temp);
3643
3644         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3645         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3646                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3647
3648         /* indirect list */
3649         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3650         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3651                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3652
3653         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3654         list_size = list_size >> 1;
3655         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3656         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3657
3658         /* starting offsets starts */
3659         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3660                 adev->gfx.rlc.starting_offsets_start);
3661         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3662                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3663                                 indirect_start_offsets[i]);
3664
3665         /* unique indices */
3666         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3667         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3668         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3669                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3670                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3671         }
3672         kfree(register_list_format);
3673
3674         return 0;
3675 }
3676
3677 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3678 {
3679         uint32_t data;
3680
3681         data = RREG32(mmRLC_SRM_CNTL);
3682         data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3683         WREG32(mmRLC_SRM_CNTL, data);
3684 }
3685
3686 static void polaris11_init_power_gating(struct amdgpu_device *adev)
3687 {
3688         uint32_t data;
3689
3690         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3691                         AMD_PG_SUPPORT_GFX_SMG |
3692                         AMD_PG_SUPPORT_GFX_DMG)) {
3693                 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3694                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3695                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3696                 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3697
3698                 data = 0;
3699                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3700                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3701                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3702                 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3703                 WREG32(mmRLC_PG_DELAY, data);
3704
3705                 data = RREG32(mmRLC_PG_DELAY_2);
3706                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3707                 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3708                 WREG32(mmRLC_PG_DELAY_2, data);
3709
3710                 data = RREG32(mmRLC_AUTO_PG_CTRL);
3711                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3712                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3713                 WREG32(mmRLC_AUTO_PG_CTRL, data);
3714         }
3715 }
3716
3717 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3718 {
3719         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3720                               AMD_PG_SUPPORT_GFX_SMG |
3721                               AMD_PG_SUPPORT_GFX_DMG |
3722                               AMD_PG_SUPPORT_CP |
3723                               AMD_PG_SUPPORT_GDS |
3724                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3725                 gfx_v8_0_init_csb(adev);
3726                 gfx_v8_0_init_save_restore_list(adev);
3727                 gfx_v8_0_enable_save_restore_machine(adev);
3728
3729                 if (adev->asic_type == CHIP_POLARIS11)
3730                         polaris11_init_power_gating(adev);
3731         }
3732 }
3733
3734 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3735 {
3736         u32 tmp = RREG32(mmRLC_CNTL);
3737
3738         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3739         WREG32(mmRLC_CNTL, tmp);
3740
3741         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3742
3743         gfx_v8_0_wait_for_rlc_serdes(adev);
3744 }
3745
3746 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3747 {
3748         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3749
3750         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3751         WREG32(mmGRBM_SOFT_RESET, tmp);
3752         udelay(50);
3753         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3754         WREG32(mmGRBM_SOFT_RESET, tmp);
3755         udelay(50);
3756 }
3757
3758 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3759 {
3760         u32 tmp = RREG32(mmRLC_CNTL);
3761
3762         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3763         WREG32(mmRLC_CNTL, tmp);
3764
3765         /* carrizo do enable cp interrupt after cp inited */
3766         if (!(adev->flags & AMD_IS_APU))
3767                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3768
3769         udelay(50);
3770 }
3771
3772 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3773 {
3774         const struct rlc_firmware_header_v2_0 *hdr;
3775         const __le32 *fw_data;
3776         unsigned i, fw_size;
3777
3778         if (!adev->gfx.rlc_fw)
3779                 return -EINVAL;
3780
3781         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3782         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3783
3784         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3785                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3786         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3787
3788         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3789         for (i = 0; i < fw_size; i++)
3790                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3791         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3792
3793         return 0;
3794 }
3795
3796 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3797 {
3798         int r;
3799
3800         gfx_v8_0_rlc_stop(adev);
3801
3802         /* disable CG */
3803         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3804         if (adev->asic_type == CHIP_POLARIS11 ||
3805                 adev->asic_type == CHIP_POLARIS10)
3806                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3807
3808         /* disable PG */
3809         WREG32(mmRLC_PG_CNTL, 0);
3810
3811         gfx_v8_0_rlc_reset(adev);
3812
3813         gfx_v8_0_init_pg(adev);
3814
3815         if (!adev->pp_enabled) {
3816                 if (!adev->firmware.smu_load) {
3817                         /* legacy rlc firmware loading */
3818                         r = gfx_v8_0_rlc_load_microcode(adev);
3819                         if (r)
3820                                 return r;
3821                 } else {
3822                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3823                                                         AMDGPU_UCODE_ID_RLC_G);
3824                         if (r)
3825                                 return -EINVAL;
3826                 }
3827         }
3828
3829         gfx_v8_0_rlc_start(adev);
3830
3831         return 0;
3832 }
3833
3834 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3835 {
3836         int i;
3837         u32 tmp = RREG32(mmCP_ME_CNTL);
3838
3839         if (enable) {
3840                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3841                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3842                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3843         } else {
3844                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3845                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3846                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3847                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3848                         adev->gfx.gfx_ring[i].ready = false;
3849         }
3850         WREG32(mmCP_ME_CNTL, tmp);
3851         udelay(50);
3852 }
3853
3854 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3855 {
3856         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3857         const struct gfx_firmware_header_v1_0 *ce_hdr;
3858         const struct gfx_firmware_header_v1_0 *me_hdr;
3859         const __le32 *fw_data;
3860         unsigned i, fw_size;
3861
3862         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3863                 return -EINVAL;
3864
3865         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3866                 adev->gfx.pfp_fw->data;
3867         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3868                 adev->gfx.ce_fw->data;
3869         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3870                 adev->gfx.me_fw->data;
3871
3872         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3873         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3874         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3875
3876         gfx_v8_0_cp_gfx_enable(adev, false);
3877
3878         /* PFP */
3879         fw_data = (const __le32 *)
3880                 (adev->gfx.pfp_fw->data +
3881                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3882         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3883         WREG32(mmCP_PFP_UCODE_ADDR, 0);
3884         for (i = 0; i < fw_size; i++)
3885                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3886         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3887
3888         /* CE */
3889         fw_data = (const __le32 *)
3890                 (adev->gfx.ce_fw->data +
3891                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3892         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3893         WREG32(mmCP_CE_UCODE_ADDR, 0);
3894         for (i = 0; i < fw_size; i++)
3895                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3896         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3897
3898         /* ME */
3899         fw_data = (const __le32 *)
3900                 (adev->gfx.me_fw->data +
3901                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3902         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3903         WREG32(mmCP_ME_RAM_WADDR, 0);
3904         for (i = 0; i < fw_size; i++)
3905                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3906         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3907
3908         return 0;
3909 }
3910
3911 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3912 {
3913         u32 count = 0;
3914         const struct cs_section_def *sect = NULL;
3915         const struct cs_extent_def *ext = NULL;
3916
3917         /* begin clear state */
3918         count += 2;
3919         /* context control state */
3920         count += 3;
3921
3922         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3923                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3924                         if (sect->id == SECT_CONTEXT)
3925                                 count += 2 + ext->reg_count;
3926                         else
3927                                 return 0;
3928                 }
3929         }
3930         /* pa_sc_raster_config/pa_sc_raster_config1 */
3931         count += 4;
3932         /* end clear state */
3933         count += 2;
3934         /* clear state */
3935         count += 2;
3936
3937         return count;
3938 }
3939
3940 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3941 {
3942         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3943         const struct cs_section_def *sect = NULL;
3944         const struct cs_extent_def *ext = NULL;
3945         int r, i;
3946
3947         /* init the CP */
3948         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3949         WREG32(mmCP_ENDIAN_SWAP, 0);
3950         WREG32(mmCP_DEVICE_ID, 1);
3951
3952         gfx_v8_0_cp_gfx_enable(adev, true);
3953
3954         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3955         if (r) {
3956                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3957                 return r;
3958         }
3959
3960         /* clear state buffer */
3961         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3962         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3963
3964         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3965         amdgpu_ring_write(ring, 0x80000000);
3966         amdgpu_ring_write(ring, 0x80000000);
3967
3968         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3969                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3970                         if (sect->id == SECT_CONTEXT) {
3971                                 amdgpu_ring_write(ring,
3972                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3973                                                ext->reg_count));
3974                                 amdgpu_ring_write(ring,
3975                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3976                                 for (i = 0; i < ext->reg_count; i++)
3977                                         amdgpu_ring_write(ring, ext->extent[i]);
3978                         }
3979                 }
3980         }
3981
3982         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3983         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3984         switch (adev->asic_type) {
3985         case CHIP_TONGA:
3986         case CHIP_POLARIS10:
3987                 amdgpu_ring_write(ring, 0x16000012);
3988                 amdgpu_ring_write(ring, 0x0000002A);
3989                 break;
3990         case CHIP_POLARIS11:
3991                 amdgpu_ring_write(ring, 0x16000012);
3992                 amdgpu_ring_write(ring, 0x00000000);
3993                 break;
3994         case CHIP_FIJI:
3995                 amdgpu_ring_write(ring, 0x3a00161a);
3996                 amdgpu_ring_write(ring, 0x0000002e);
3997                 break;
3998         case CHIP_CARRIZO:
3999                 amdgpu_ring_write(ring, 0x00000002);
4000                 amdgpu_ring_write(ring, 0x00000000);
4001                 break;
4002         case CHIP_TOPAZ:
4003                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4004                                 0x00000000 : 0x00000002);
4005                 amdgpu_ring_write(ring, 0x00000000);
4006                 break;
4007         case CHIP_STONEY:
4008                 amdgpu_ring_write(ring, 0x00000000);
4009                 amdgpu_ring_write(ring, 0x00000000);
4010                 break;
4011         default:
4012                 BUG();
4013         }
4014
4015         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4016         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4017
4018         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4019         amdgpu_ring_write(ring, 0);
4020
4021         /* init the CE partitions */
4022         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4023         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4024         amdgpu_ring_write(ring, 0x8000);
4025         amdgpu_ring_write(ring, 0x8000);
4026
4027         amdgpu_ring_commit(ring);
4028
4029         return 0;
4030 }
4031
4032 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4033 {
4034         struct amdgpu_ring *ring;
4035         u32 tmp;
4036         u32 rb_bufsz;
4037         u64 rb_addr, rptr_addr;
4038         int r;
4039
4040         /* Set the write pointer delay */
4041         WREG32(mmCP_RB_WPTR_DELAY, 0);
4042
4043         /* set the RB to use vmid 0 */
4044         WREG32(mmCP_RB_VMID, 0);
4045
4046         /* Set ring buffer size */
4047         ring = &adev->gfx.gfx_ring[0];
4048         rb_bufsz = order_base_2(ring->ring_size / 8);
4049         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4050         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4051         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4052         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4053 #ifdef __BIG_ENDIAN
4054         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4055 #endif
4056         WREG32(mmCP_RB0_CNTL, tmp);
4057
4058         /* Initialize the ring buffer's read and write pointers */
4059         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4060         ring->wptr = 0;
4061         WREG32(mmCP_RB0_WPTR, ring->wptr);
4062
4063         /* set the wb address wether it's enabled or not */
4064         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4065         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4066         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4067
4068         mdelay(1);
4069         WREG32(mmCP_RB0_CNTL, tmp);
4070
4071         rb_addr = ring->gpu_addr >> 8;
4072         WREG32(mmCP_RB0_BASE, rb_addr);
4073         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4074
4075         /* no gfx doorbells on iceland */
4076         if (adev->asic_type != CHIP_TOPAZ) {
4077                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4078                 if (ring->use_doorbell) {
4079                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4080                                             DOORBELL_OFFSET, ring->doorbell_index);
4081                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4082                                             DOORBELL_HIT, 0);
4083                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4084                                             DOORBELL_EN, 1);
4085                 } else {
4086                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4087                                             DOORBELL_EN, 0);
4088                 }
4089                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4090
4091                 if (adev->asic_type == CHIP_TONGA) {
4092                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4093                                             DOORBELL_RANGE_LOWER,
4094                                             AMDGPU_DOORBELL_GFX_RING0);
4095                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4096
4097                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4098                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4099                 }
4100
4101         }
4102
4103         /* start the ring */
4104         gfx_v8_0_cp_gfx_start(adev);
4105         ring->ready = true;
4106         r = amdgpu_ring_test_ring(ring);
4107         if (r) {
4108                 ring->ready = false;
4109                 return r;
4110         }
4111
4112         return 0;
4113 }
4114
4115 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4116 {
4117         int i;
4118
4119         if (enable) {
4120                 WREG32(mmCP_MEC_CNTL, 0);
4121         } else {
4122                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4123                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4124                         adev->gfx.compute_ring[i].ready = false;
4125         }
4126         udelay(50);
4127 }
4128
4129 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4130 {
4131         const struct gfx_firmware_header_v1_0 *mec_hdr;
4132         const __le32 *fw_data;
4133         unsigned i, fw_size;
4134
4135         if (!adev->gfx.mec_fw)
4136                 return -EINVAL;
4137
4138         gfx_v8_0_cp_compute_enable(adev, false);
4139
4140         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4141         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4142
4143         fw_data = (const __le32 *)
4144                 (adev->gfx.mec_fw->data +
4145                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4146         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4147
4148         /* MEC1 */
4149         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4150         for (i = 0; i < fw_size; i++)
4151                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4152         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4153
4154         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4155         if (adev->gfx.mec2_fw) {
4156                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4157
4158                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4159                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4160
4161                 fw_data = (const __le32 *)
4162                         (adev->gfx.mec2_fw->data +
4163                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4164                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4165
4166                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4167                 for (i = 0; i < fw_size; i++)
4168                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4169                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4170         }
4171
4172         return 0;
4173 }
4174
4175 struct vi_mqd {
4176         uint32_t header;  /* ordinal0 */
4177         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4178         uint32_t compute_dim_x;  /* ordinal2 */
4179         uint32_t compute_dim_y;  /* ordinal3 */
4180         uint32_t compute_dim_z;  /* ordinal4 */
4181         uint32_t compute_start_x;  /* ordinal5 */
4182         uint32_t compute_start_y;  /* ordinal6 */
4183         uint32_t compute_start_z;  /* ordinal7 */
4184         uint32_t compute_num_thread_x;  /* ordinal8 */
4185         uint32_t compute_num_thread_y;  /* ordinal9 */
4186         uint32_t compute_num_thread_z;  /* ordinal10 */
4187         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4188         uint32_t compute_perfcount_enable;  /* ordinal12 */
4189         uint32_t compute_pgm_lo;  /* ordinal13 */
4190         uint32_t compute_pgm_hi;  /* ordinal14 */
4191         uint32_t compute_tba_lo;  /* ordinal15 */
4192         uint32_t compute_tba_hi;  /* ordinal16 */
4193         uint32_t compute_tma_lo;  /* ordinal17 */
4194         uint32_t compute_tma_hi;  /* ordinal18 */
4195         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4196         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4197         uint32_t compute_vmid;  /* ordinal21 */
4198         uint32_t compute_resource_limits;  /* ordinal22 */
4199         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4200         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4201         uint32_t compute_tmpring_size;  /* ordinal25 */
4202         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4203         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4204         uint32_t compute_restart_x;  /* ordinal28 */
4205         uint32_t compute_restart_y;  /* ordinal29 */
4206         uint32_t compute_restart_z;  /* ordinal30 */
4207         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4208         uint32_t compute_misc_reserved;  /* ordinal32 */
4209         uint32_t compute_dispatch_id;  /* ordinal33 */
4210         uint32_t compute_threadgroup_id;  /* ordinal34 */
4211         uint32_t compute_relaunch;  /* ordinal35 */
4212         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4213         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4214         uint32_t compute_wave_restore_control;  /* ordinal38 */
4215         uint32_t reserved9;  /* ordinal39 */
4216         uint32_t reserved10;  /* ordinal40 */
4217         uint32_t reserved11;  /* ordinal41 */
4218         uint32_t reserved12;  /* ordinal42 */
4219         uint32_t reserved13;  /* ordinal43 */
4220         uint32_t reserved14;  /* ordinal44 */
4221         uint32_t reserved15;  /* ordinal45 */
4222         uint32_t reserved16;  /* ordinal46 */
4223         uint32_t reserved17;  /* ordinal47 */
4224         uint32_t reserved18;  /* ordinal48 */
4225         uint32_t reserved19;  /* ordinal49 */
4226         uint32_t reserved20;  /* ordinal50 */
4227         uint32_t reserved21;  /* ordinal51 */
4228         uint32_t reserved22;  /* ordinal52 */
4229         uint32_t reserved23;  /* ordinal53 */
4230         uint32_t reserved24;  /* ordinal54 */
4231         uint32_t reserved25;  /* ordinal55 */
4232         uint32_t reserved26;  /* ordinal56 */
4233         uint32_t reserved27;  /* ordinal57 */
4234         uint32_t reserved28;  /* ordinal58 */
4235         uint32_t reserved29;  /* ordinal59 */
4236         uint32_t reserved30;  /* ordinal60 */
4237         uint32_t reserved31;  /* ordinal61 */
4238         uint32_t reserved32;  /* ordinal62 */
4239         uint32_t reserved33;  /* ordinal63 */
4240         uint32_t reserved34;  /* ordinal64 */
4241         uint32_t compute_user_data_0;  /* ordinal65 */
4242         uint32_t compute_user_data_1;  /* ordinal66 */
4243         uint32_t compute_user_data_2;  /* ordinal67 */
4244         uint32_t compute_user_data_3;  /* ordinal68 */
4245         uint32_t compute_user_data_4;  /* ordinal69 */
4246         uint32_t compute_user_data_5;  /* ordinal70 */
4247         uint32_t compute_user_data_6;  /* ordinal71 */
4248         uint32_t compute_user_data_7;  /* ordinal72 */
4249         uint32_t compute_user_data_8;  /* ordinal73 */
4250         uint32_t compute_user_data_9;  /* ordinal74 */
4251         uint32_t compute_user_data_10;  /* ordinal75 */
4252         uint32_t compute_user_data_11;  /* ordinal76 */
4253         uint32_t compute_user_data_12;  /* ordinal77 */
4254         uint32_t compute_user_data_13;  /* ordinal78 */
4255         uint32_t compute_user_data_14;  /* ordinal79 */
4256         uint32_t compute_user_data_15;  /* ordinal80 */
4257         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4258         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4259         uint32_t reserved35;  /* ordinal83 */
4260         uint32_t reserved36;  /* ordinal84 */
4261         uint32_t reserved37;  /* ordinal85 */
4262         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4263         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4264         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4265         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4266         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4267         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4268         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4269         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4270         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4271         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4272         uint32_t reserved38;  /* ordinal96 */
4273         uint32_t reserved39;  /* ordinal97 */
4274         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4275         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4276         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4277         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4278         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4279         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4280         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4281         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4282         uint32_t reserved40;  /* ordinal106 */
4283         uint32_t reserved41;  /* ordinal107 */
4284         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4285         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4286         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4287         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4288         uint32_t reserved42;  /* ordinal112 */
4289         uint32_t reserved43;  /* ordinal113 */
4290         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4291         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4292         uint32_t cp_packet_id_lo;  /* ordinal116 */
4293         uint32_t cp_packet_id_hi;  /* ordinal117 */
4294         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4295         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4296         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4297         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4298         uint32_t gds_save_mask_lo;  /* ordinal122 */
4299         uint32_t gds_save_mask_hi;  /* ordinal123 */
4300         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4301         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4302         uint32_t reserved44;  /* ordinal126 */
4303         uint32_t reserved45;  /* ordinal127 */
4304         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4305         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4306         uint32_t cp_hqd_active;  /* ordinal130 */
4307         uint32_t cp_hqd_vmid;  /* ordinal131 */
4308         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4309         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4310         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4311         uint32_t cp_hqd_quantum;  /* ordinal135 */
4312         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4313         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4314         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4315         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4316         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4317         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4318         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4319         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4320         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4321         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4322         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4323         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4324         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4325         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4326         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4327         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4328         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4329         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4330         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4331         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4332         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4333         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4334         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4335         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4336         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4337         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4338         uint32_t cp_mqd_control;  /* ordinal162 */
4339         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4340         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4341         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4342         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4343         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4344         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4345         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4346         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4347         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4348         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4349         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4350         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4351         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4352         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4353         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4354         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4355         uint32_t cp_hqd_error;  /* ordinal179 */
4356         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4357         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4358         uint32_t reserved46;  /* ordinal182 */
4359         uint32_t reserved47;  /* ordinal183 */
4360         uint32_t reserved48;  /* ordinal184 */
4361         uint32_t reserved49;  /* ordinal185 */
4362         uint32_t reserved50;  /* ordinal186 */
4363         uint32_t reserved51;  /* ordinal187 */
4364         uint32_t reserved52;  /* ordinal188 */
4365         uint32_t reserved53;  /* ordinal189 */
4366         uint32_t reserved54;  /* ordinal190 */
4367         uint32_t reserved55;  /* ordinal191 */
4368         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4369         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4370         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4371         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4372         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4373         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4374         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4375         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4376         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4377         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4378         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4379         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4380         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4381         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4382         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4383         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4384         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4385         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4386         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4387         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4388         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4389         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4390         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4391         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4392         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4393         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4394         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4395         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4396         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4397         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4398         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4399         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4400         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4401         uint32_t reserved56;  /* ordinal225 */
4402         uint32_t reserved57;  /* ordinal226 */
4403         uint32_t reserved58;  /* ordinal227 */
4404         uint32_t set_resources_header;  /* ordinal228 */
4405         uint32_t set_resources_dw1;  /* ordinal229 */
4406         uint32_t set_resources_dw2;  /* ordinal230 */
4407         uint32_t set_resources_dw3;  /* ordinal231 */
4408         uint32_t set_resources_dw4;  /* ordinal232 */
4409         uint32_t set_resources_dw5;  /* ordinal233 */
4410         uint32_t set_resources_dw6;  /* ordinal234 */
4411         uint32_t set_resources_dw7;  /* ordinal235 */
4412         uint32_t reserved59;  /* ordinal236 */
4413         uint32_t reserved60;  /* ordinal237 */
4414         uint32_t reserved61;  /* ordinal238 */
4415         uint32_t reserved62;  /* ordinal239 */
4416         uint32_t reserved63;  /* ordinal240 */
4417         uint32_t reserved64;  /* ordinal241 */
4418         uint32_t reserved65;  /* ordinal242 */
4419         uint32_t reserved66;  /* ordinal243 */
4420         uint32_t reserved67;  /* ordinal244 */
4421         uint32_t reserved68;  /* ordinal245 */
4422         uint32_t reserved69;  /* ordinal246 */
4423         uint32_t reserved70;  /* ordinal247 */
4424         uint32_t reserved71;  /* ordinal248 */
4425         uint32_t reserved72;  /* ordinal249 */
4426         uint32_t reserved73;  /* ordinal250 */
4427         uint32_t reserved74;  /* ordinal251 */
4428         uint32_t reserved75;  /* ordinal252 */
4429         uint32_t reserved76;  /* ordinal253 */
4430         uint32_t reserved77;  /* ordinal254 */
4431         uint32_t reserved78;  /* ordinal255 */
4432
4433         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4434 };
4435
4436 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4437 {
4438         int i, r;
4439
4440         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4441                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4442
4443                 if (ring->mqd_obj) {
4444                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4445                         if (unlikely(r != 0))
4446                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4447
4448                         amdgpu_bo_unpin(ring->mqd_obj);
4449                         amdgpu_bo_unreserve(ring->mqd_obj);
4450
4451                         amdgpu_bo_unref(&ring->mqd_obj);
4452                         ring->mqd_obj = NULL;
4453                 }
4454         }
4455 }
4456
4457 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4458 {
4459         int r, i, j;
4460         u32 tmp;
4461         bool use_doorbell = true;
4462         u64 hqd_gpu_addr;
4463         u64 mqd_gpu_addr;
4464         u64 eop_gpu_addr;
4465         u64 wb_gpu_addr;
4466         u32 *buf;
4467         struct vi_mqd *mqd;
4468
4469         /* init the pipes */
4470         mutex_lock(&adev->srbm_mutex);
4471         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4472                 int me = (i < 4) ? 1 : 2;
4473                 int pipe = (i < 4) ? i : (i - 4);
4474
4475                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4476                 eop_gpu_addr >>= 8;
4477
4478                 vi_srbm_select(adev, me, pipe, 0, 0);
4479
4480                 /* write the EOP addr */
4481                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4482                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4483
4484                 /* set the VMID assigned */
4485                 WREG32(mmCP_HQD_VMID, 0);
4486
4487                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4488                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4489                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4490                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4491                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4492         }
4493         vi_srbm_select(adev, 0, 0, 0, 0);
4494         mutex_unlock(&adev->srbm_mutex);
4495
4496         /* init the queues.  Just two for now. */
4497         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4498                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4499
4500                 if (ring->mqd_obj == NULL) {
4501                         r = amdgpu_bo_create(adev,
4502                                              sizeof(struct vi_mqd),
4503                                              PAGE_SIZE, true,
4504                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4505                                              NULL, &ring->mqd_obj);
4506                         if (r) {
4507                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4508                                 return r;
4509                         }
4510                 }
4511
4512                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4513                 if (unlikely(r != 0)) {
4514                         gfx_v8_0_cp_compute_fini(adev);
4515                         return r;
4516                 }
4517                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4518                                   &mqd_gpu_addr);
4519                 if (r) {
4520                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4521                         gfx_v8_0_cp_compute_fini(adev);
4522                         return r;
4523                 }
4524                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4525                 if (r) {
4526                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4527                         gfx_v8_0_cp_compute_fini(adev);
4528                         return r;
4529                 }
4530
4531                 /* init the mqd struct */
4532                 memset(buf, 0, sizeof(struct vi_mqd));
4533
4534                 mqd = (struct vi_mqd *)buf;
4535                 mqd->header = 0xC0310800;
4536                 mqd->compute_pipelinestat_enable = 0x00000001;
4537                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4538                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4539                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4540                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4541                 mqd->compute_misc_reserved = 0x00000003;
4542
4543                 mutex_lock(&adev->srbm_mutex);
4544                 vi_srbm_select(adev, ring->me,
4545                                ring->pipe,
4546                                ring->queue, 0);
4547
4548                 /* disable wptr polling */
4549                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4550                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4551                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4552
4553                 mqd->cp_hqd_eop_base_addr_lo =
4554                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4555                 mqd->cp_hqd_eop_base_addr_hi =
4556                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4557
4558                 /* enable doorbell? */
4559                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4560                 if (use_doorbell) {
4561                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4562                 } else {
4563                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4564                 }
4565                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4566                 mqd->cp_hqd_pq_doorbell_control = tmp;
4567
4568                 /* disable the queue if it's active */
4569                 mqd->cp_hqd_dequeue_request = 0;
4570                 mqd->cp_hqd_pq_rptr = 0;
4571                 mqd->cp_hqd_pq_wptr= 0;
4572                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4573                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4574                         for (j = 0; j < adev->usec_timeout; j++) {
4575                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4576                                         break;
4577                                 udelay(1);
4578                         }
4579                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4580                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4581                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4582                 }
4583
4584                 /* set the pointer to the MQD */
4585                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4586                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4587                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4588                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4589
4590                 /* set MQD vmid to 0 */
4591                 tmp = RREG32(mmCP_MQD_CONTROL);
4592                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4593                 WREG32(mmCP_MQD_CONTROL, tmp);
4594                 mqd->cp_mqd_control = tmp;
4595
4596                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4597                 hqd_gpu_addr = ring->gpu_addr >> 8;
4598                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4599                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4600                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4601                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4602
4603                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4604                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4605                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4606                                     (order_base_2(ring->ring_size / 4) - 1));
4607                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4608                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4609 #ifdef __BIG_ENDIAN
4610                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4611 #endif
4612                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4613                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4614                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4615                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4616                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4617                 mqd->cp_hqd_pq_control = tmp;
4618
4619                 /* set the wb address wether it's enabled or not */
4620                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4621                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4622                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4623                         upper_32_bits(wb_gpu_addr) & 0xffff;
4624                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4625                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4626                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4627                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4628
4629                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4630                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4631                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4632                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4633                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4634                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4635                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4636
4637                 /* enable the doorbell if requested */
4638                 if (use_doorbell) {
4639                         if ((adev->asic_type == CHIP_CARRIZO) ||
4640                             (adev->asic_type == CHIP_FIJI) ||
4641                             (adev->asic_type == CHIP_STONEY) ||
4642                             (adev->asic_type == CHIP_POLARIS11) ||
4643                             (adev->asic_type == CHIP_POLARIS10)) {
4644                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4645                                        AMDGPU_DOORBELL_KIQ << 2);
4646                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4647                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4648                         }
4649                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4650                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4651                                             DOORBELL_OFFSET, ring->doorbell_index);
4652                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4653                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4654                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4655                         mqd->cp_hqd_pq_doorbell_control = tmp;
4656
4657                 } else {
4658                         mqd->cp_hqd_pq_doorbell_control = 0;
4659                 }
4660                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4661                        mqd->cp_hqd_pq_doorbell_control);
4662
4663                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4664                 ring->wptr = 0;
4665                 mqd->cp_hqd_pq_wptr = ring->wptr;
4666                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4667                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4668
4669                 /* set the vmid for the queue */
4670                 mqd->cp_hqd_vmid = 0;
4671                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4672
4673                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4674                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4675                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4676                 mqd->cp_hqd_persistent_state = tmp;
4677                 if (adev->asic_type == CHIP_STONEY ||
4678                         adev->asic_type == CHIP_POLARIS11 ||
4679                         adev->asic_type == CHIP_POLARIS10) {
4680                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4681                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4682                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4683                 }
4684
4685                 /* activate the queue */
4686                 mqd->cp_hqd_active = 1;
4687                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4688
4689                 vi_srbm_select(adev, 0, 0, 0, 0);
4690                 mutex_unlock(&adev->srbm_mutex);
4691
4692                 amdgpu_bo_kunmap(ring->mqd_obj);
4693                 amdgpu_bo_unreserve(ring->mqd_obj);
4694         }
4695
4696         if (use_doorbell) {
4697                 tmp = RREG32(mmCP_PQ_STATUS);
4698                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4699                 WREG32(mmCP_PQ_STATUS, tmp);
4700         }
4701
4702         gfx_v8_0_cp_compute_enable(adev, true);
4703
4704         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4705                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4706
4707                 ring->ready = true;
4708                 r = amdgpu_ring_test_ring(ring);
4709                 if (r)
4710                         ring->ready = false;
4711         }
4712
4713         return 0;
4714 }
4715
4716 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4717 {
4718         int r;
4719
4720         if (!(adev->flags & AMD_IS_APU))
4721                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4722
4723         if (!adev->pp_enabled) {
4724                 if (!adev->firmware.smu_load) {
4725                         /* legacy firmware loading */
4726                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4727                         if (r)
4728                                 return r;
4729
4730                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4731                         if (r)
4732                                 return r;
4733                 } else {
4734                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4735                                                         AMDGPU_UCODE_ID_CP_CE);
4736                         if (r)
4737                                 return -EINVAL;
4738
4739                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4740                                                         AMDGPU_UCODE_ID_CP_PFP);
4741                         if (r)
4742                                 return -EINVAL;
4743
4744                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4745                                                         AMDGPU_UCODE_ID_CP_ME);
4746                         if (r)
4747                                 return -EINVAL;
4748
4749                         if (adev->asic_type == CHIP_TOPAZ) {
4750                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4751                                 if (r)
4752                                         return r;
4753                         } else {
4754                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4755                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4756                                 if (r)
4757                                         return -EINVAL;
4758                         }
4759                 }
4760         }
4761
4762         r = gfx_v8_0_cp_gfx_resume(adev);
4763         if (r)
4764                 return r;
4765
4766         r = gfx_v8_0_cp_compute_resume(adev);
4767         if (r)
4768                 return r;
4769
4770         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4771
4772         return 0;
4773 }
4774
4775 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4776 {
4777         gfx_v8_0_cp_gfx_enable(adev, enable);
4778         gfx_v8_0_cp_compute_enable(adev, enable);
4779 }
4780
4781 static int gfx_v8_0_hw_init(void *handle)
4782 {
4783         int r;
4784         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4785
4786         gfx_v8_0_init_golden_registers(adev);
4787
4788         gfx_v8_0_gpu_init(adev);
4789
4790         r = gfx_v8_0_rlc_resume(adev);
4791         if (r)
4792                 return r;
4793
4794         r = gfx_v8_0_cp_resume(adev);
4795         if (r)
4796                 return r;
4797
4798         return r;
4799 }
4800
4801 static int gfx_v8_0_hw_fini(void *handle)
4802 {
4803         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4804
4805         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4806         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4807         gfx_v8_0_cp_enable(adev, false);
4808         gfx_v8_0_rlc_stop(adev);
4809         gfx_v8_0_cp_compute_fini(adev);
4810
4811         amdgpu_set_powergating_state(adev,
4812                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4813
4814         return 0;
4815 }
4816
4817 static int gfx_v8_0_suspend(void *handle)
4818 {
4819         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4820
4821         return gfx_v8_0_hw_fini(adev);
4822 }
4823
4824 static int gfx_v8_0_resume(void *handle)
4825 {
4826         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4827
4828         return gfx_v8_0_hw_init(adev);
4829 }
4830
4831 static bool gfx_v8_0_is_idle(void *handle)
4832 {
4833         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4834
4835         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4836                 return false;
4837         else
4838                 return true;
4839 }
4840
4841 static int gfx_v8_0_wait_for_idle(void *handle)
4842 {
4843         unsigned i;
4844         u32 tmp;
4845         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4846
4847         for (i = 0; i < adev->usec_timeout; i++) {
4848                 /* read MC_STATUS */
4849                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4850
4851                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4852                         return 0;
4853                 udelay(1);
4854         }
4855         return -ETIMEDOUT;
4856 }
4857
4858 static int gfx_v8_0_soft_reset(void *handle)
4859 {
4860         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4861         u32 tmp;
4862         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4863
4864         /* GRBM_STATUS */
4865         tmp = RREG32(mmGRBM_STATUS);
4866         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4867                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4868                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4869                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4870                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4871                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4872                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4873                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4874                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4875                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4876         }
4877
4878         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4879                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4880                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4881                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4882                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4883         }
4884
4885         /* GRBM_STATUS2 */
4886         tmp = RREG32(mmGRBM_STATUS2);
4887         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4888                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4889                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4890
4891         /* SRBM_STATUS */
4892         tmp = RREG32(mmSRBM_STATUS);
4893         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4894                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4895                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4896
4897         if (grbm_soft_reset || srbm_soft_reset) {
4898                 /* stop the rlc */
4899                 gfx_v8_0_rlc_stop(adev);
4900
4901                 /* Disable GFX parsing/prefetching */
4902                 gfx_v8_0_cp_gfx_enable(adev, false);
4903
4904                 /* Disable MEC parsing/prefetching */
4905                 gfx_v8_0_cp_compute_enable(adev, false);
4906
4907                 if (grbm_soft_reset || srbm_soft_reset) {
4908                         tmp = RREG32(mmGMCON_DEBUG);
4909                         tmp = REG_SET_FIELD(tmp,
4910                                             GMCON_DEBUG, GFX_STALL, 1);
4911                         tmp = REG_SET_FIELD(tmp,
4912                                             GMCON_DEBUG, GFX_CLEAR, 1);
4913                         WREG32(mmGMCON_DEBUG, tmp);
4914
4915                         udelay(50);
4916                 }
4917
4918                 if (grbm_soft_reset) {
4919                         tmp = RREG32(mmGRBM_SOFT_RESET);
4920                         tmp |= grbm_soft_reset;
4921                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4922                         WREG32(mmGRBM_SOFT_RESET, tmp);
4923                         tmp = RREG32(mmGRBM_SOFT_RESET);
4924
4925                         udelay(50);
4926
4927                         tmp &= ~grbm_soft_reset;
4928                         WREG32(mmGRBM_SOFT_RESET, tmp);
4929                         tmp = RREG32(mmGRBM_SOFT_RESET);
4930                 }
4931
4932                 if (srbm_soft_reset) {
4933                         tmp = RREG32(mmSRBM_SOFT_RESET);
4934                         tmp |= srbm_soft_reset;
4935                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4936                         WREG32(mmSRBM_SOFT_RESET, tmp);
4937                         tmp = RREG32(mmSRBM_SOFT_RESET);
4938
4939                         udelay(50);
4940
4941                         tmp &= ~srbm_soft_reset;
4942                         WREG32(mmSRBM_SOFT_RESET, tmp);
4943                         tmp = RREG32(mmSRBM_SOFT_RESET);
4944                 }
4945
4946                 if (grbm_soft_reset || srbm_soft_reset) {
4947                         tmp = RREG32(mmGMCON_DEBUG);
4948                         tmp = REG_SET_FIELD(tmp,
4949                                             GMCON_DEBUG, GFX_STALL, 0);
4950                         tmp = REG_SET_FIELD(tmp,
4951                                             GMCON_DEBUG, GFX_CLEAR, 0);
4952                         WREG32(mmGMCON_DEBUG, tmp);
4953                 }
4954
4955                 /* Wait a little for things to settle down */
4956                 udelay(50);
4957         }
4958         return 0;
4959 }
4960
4961 /**
4962  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4963  *
4964  * @adev: amdgpu_device pointer
4965  *
4966  * Fetches a GPU clock counter snapshot.
4967  * Returns the 64 bit clock counter snapshot.
4968  */
4969 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4970 {
4971         uint64_t clock;
4972
4973         mutex_lock(&adev->gfx.gpu_clock_mutex);
4974         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4975         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4976                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4977         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4978         return clock;
4979 }
4980
4981 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4982                                           uint32_t vmid,
4983                                           uint32_t gds_base, uint32_t gds_size,
4984                                           uint32_t gws_base, uint32_t gws_size,
4985                                           uint32_t oa_base, uint32_t oa_size)
4986 {
4987         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4988         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4989
4990         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4991         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4992
4993         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4994         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4995
4996         /* GDS Base */
4997         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4998         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4999                                 WRITE_DATA_DST_SEL(0)));
5000         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5001         amdgpu_ring_write(ring, 0);
5002         amdgpu_ring_write(ring, gds_base);
5003
5004         /* GDS Size */
5005         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5006         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5007                                 WRITE_DATA_DST_SEL(0)));
5008         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5009         amdgpu_ring_write(ring, 0);
5010         amdgpu_ring_write(ring, gds_size);
5011
5012         /* GWS */
5013         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5014         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5015                                 WRITE_DATA_DST_SEL(0)));
5016         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5017         amdgpu_ring_write(ring, 0);
5018         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5019
5020         /* OA */
5021         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5022         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5023                                 WRITE_DATA_DST_SEL(0)));
5024         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5025         amdgpu_ring_write(ring, 0);
5026         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5027 }
5028
5029 static int gfx_v8_0_early_init(void *handle)
5030 {
5031         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5032
5033         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5034         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5035         gfx_v8_0_set_ring_funcs(adev);
5036         gfx_v8_0_set_irq_funcs(adev);
5037         gfx_v8_0_set_gds_init(adev);
5038         gfx_v8_0_set_rlc_funcs(adev);
5039
5040         return 0;
5041 }
5042
5043 static int gfx_v8_0_late_init(void *handle)
5044 {
5045         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5046         int r;
5047
5048         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5049         if (r)
5050                 return r;
5051
5052         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5053         if (r)
5054                 return r;
5055
5056         /* requires IBs so do in late init after IB pool is initialized */
5057         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5058         if (r)
5059                 return r;
5060
5061         amdgpu_set_powergating_state(adev,
5062                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5063
5064         return 0;
5065 }
5066
5067 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5068                 bool enable)
5069 {
5070         uint32_t data, temp;
5071
5072         /* Send msg to SMU via Powerplay */
5073         amdgpu_set_powergating_state(adev,
5074                         AMD_IP_BLOCK_TYPE_SMC,
5075                         enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5076
5077         if (enable) {
5078                 /* Enable static MGPG */
5079                 temp = data = RREG32(mmRLC_PG_CNTL);
5080                 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5081
5082                 if (temp != data)
5083                         WREG32(mmRLC_PG_CNTL, data);
5084         } else {
5085                 temp = data = RREG32(mmRLC_PG_CNTL);
5086                 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5087
5088                 if (temp != data)
5089                         WREG32(mmRLC_PG_CNTL, data);
5090         }
5091 }
5092
5093 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5094                 bool enable)
5095 {
5096         uint32_t data, temp;
5097
5098         if (enable) {
5099                 /* Enable dynamic MGPG */
5100                 temp = data = RREG32(mmRLC_PG_CNTL);
5101                 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5102
5103                 if (temp != data)
5104                         WREG32(mmRLC_PG_CNTL, data);
5105         } else {
5106                 temp = data = RREG32(mmRLC_PG_CNTL);
5107                 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5108
5109                 if (temp != data)
5110                         WREG32(mmRLC_PG_CNTL, data);
5111         }
5112 }
5113
5114 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5115                 bool enable)
5116 {
5117         uint32_t data, temp;
5118
5119         if (enable) {
5120                 /* Enable quick PG */
5121                 temp = data = RREG32(mmRLC_PG_CNTL);
5122                 data |= 0x100000;
5123
5124                 if (temp != data)
5125                         WREG32(mmRLC_PG_CNTL, data);
5126         } else {
5127                 temp = data = RREG32(mmRLC_PG_CNTL);
5128                 data &= ~0x100000;
5129
5130                 if (temp != data)
5131                         WREG32(mmRLC_PG_CNTL, data);
5132         }
5133 }
5134
5135 static int gfx_v8_0_set_powergating_state(void *handle,
5136                                           enum amd_powergating_state state)
5137 {
5138         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5139
5140         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5141                 return 0;
5142
5143         switch (adev->asic_type) {
5144         case CHIP_POLARIS11:
5145                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5146                         polaris11_enable_gfx_static_mg_power_gating(adev,
5147                                         state == AMD_PG_STATE_GATE ? true : false);
5148                 else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5149                         polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5150                                         state == AMD_PG_STATE_GATE ? true : false);
5151                 else
5152                         polaris11_enable_gfx_quick_mg_power_gating(adev,
5153                                         state == AMD_PG_STATE_GATE ? true : false);
5154                 break;
5155         default:
5156                 break;
5157         }
5158
5159         return 0;
5160 }
5161
5162 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5163                                      uint32_t reg_addr, uint32_t cmd)
5164 {
5165         uint32_t data;
5166
5167         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5168
5169         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5170         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5171
5172         data = RREG32(mmRLC_SERDES_WR_CTRL);
5173         if (adev->asic_type == CHIP_STONEY)
5174                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5175                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5176                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5177                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5178                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5179                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5180                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5181                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5182                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5183         else
5184                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5185                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5186                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5187                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5188                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5189                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5190                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5191                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5192                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5193                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5194                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5195         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5196                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5197                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5198                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5199
5200         WREG32(mmRLC_SERDES_WR_CTRL, data);
5201 }
5202
5203 #define MSG_ENTER_RLC_SAFE_MODE     1
5204 #define MSG_EXIT_RLC_SAFE_MODE      0
5205
5206 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5207 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5208 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5209
5210 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5211 {
5212         u32 data = 0;
5213         unsigned i;
5214
5215         data = RREG32(mmRLC_CNTL);
5216         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5217                 return;
5218
5219         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5220             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5221                                AMD_PG_SUPPORT_GFX_DMG))) {
5222                 data |= RLC_GPR_REG2__REQ_MASK;
5223                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5224                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5225                 WREG32(mmRLC_GPR_REG2, data);
5226
5227                 for (i = 0; i < adev->usec_timeout; i++) {
5228                         if ((RREG32(mmRLC_GPM_STAT) &
5229                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5230                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5231                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5232                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5233                                 break;
5234                         udelay(1);
5235                 }
5236
5237                 for (i = 0; i < adev->usec_timeout; i++) {
5238                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5239                                 break;
5240                         udelay(1);
5241                 }
5242                 adev->gfx.rlc.in_safe_mode = true;
5243         }
5244 }
5245
5246 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5247 {
5248         u32 data;
5249         unsigned i;
5250
5251         data = RREG32(mmRLC_CNTL);
5252         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5253                 return;
5254
5255         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5256             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5257                                AMD_PG_SUPPORT_GFX_DMG))) {
5258                 data |= RLC_GPR_REG2__REQ_MASK;
5259                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5260                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5261                 WREG32(mmRLC_GPR_REG2, data);
5262                 adev->gfx.rlc.in_safe_mode = false;
5263         }
5264
5265         for (i = 0; i < adev->usec_timeout; i++) {
5266                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5267                         break;
5268                 udelay(1);
5269         }
5270 }
5271
5272 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5273 {
5274         u32 data;
5275         unsigned i;
5276
5277         data = RREG32(mmRLC_CNTL);
5278         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5279                 return;
5280
5281         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5282                 data |= RLC_SAFE_MODE__CMD_MASK;
5283                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5284                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5285                 WREG32(mmRLC_SAFE_MODE, data);
5286
5287                 for (i = 0; i < adev->usec_timeout; i++) {
5288                         if ((RREG32(mmRLC_GPM_STAT) &
5289                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5290                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5291                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5292                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5293                                 break;
5294                         udelay(1);
5295                 }
5296
5297                 for (i = 0; i < adev->usec_timeout; i++) {
5298                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5299                                 break;
5300                         udelay(1);
5301                 }
5302                 adev->gfx.rlc.in_safe_mode = true;
5303         }
5304 }
5305
5306 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5307 {
5308         u32 data = 0;
5309         unsigned i;
5310
5311         data = RREG32(mmRLC_CNTL);
5312         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5313                 return;
5314
5315         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5316                 if (adev->gfx.rlc.in_safe_mode) {
5317                         data |= RLC_SAFE_MODE__CMD_MASK;
5318                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5319                         WREG32(mmRLC_SAFE_MODE, data);
5320                         adev->gfx.rlc.in_safe_mode = false;
5321                 }
5322         }
5323
5324         for (i = 0; i < adev->usec_timeout; i++) {
5325                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5326                         break;
5327                 udelay(1);
5328         }
5329 }
5330
5331 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5332 {
5333         adev->gfx.rlc.in_safe_mode = true;
5334 }
5335
5336 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5337 {
5338         adev->gfx.rlc.in_safe_mode = false;
5339 }
5340
5341 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5342         .enter_safe_mode = cz_enter_rlc_safe_mode,
5343         .exit_safe_mode = cz_exit_rlc_safe_mode
5344 };
5345
5346 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5347         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5348         .exit_safe_mode = iceland_exit_rlc_safe_mode
5349 };
5350
5351 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5352         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5353         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5354 };
5355
5356 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5357                                                       bool enable)
5358 {
5359         uint32_t temp, data;
5360
5361         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5362
5363         /* It is disabled by HW by default */
5364         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5365                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5366                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5367                                 /* 1 - RLC memory Light sleep */
5368                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5369                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5370                                 if (temp != data)
5371                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5372                         }
5373
5374                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5375                                 /* 2 - CP memory Light sleep */
5376                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5377                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5378                                 if (temp != data)
5379                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5380                         }
5381                 }
5382
5383                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5384                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5385                 if (adev->flags & AMD_IS_APU)
5386                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5387                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5388                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5389                 else
5390                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5391                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5392                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5393                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5394
5395                 if (temp != data)
5396                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5397
5398                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5399                 gfx_v8_0_wait_for_rlc_serdes(adev);
5400
5401                 /* 5 - clear mgcg override */
5402                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5403
5404                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5405                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5406                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5407                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5408                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5409                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5410                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5411                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5412                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5413                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5414                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5415                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5416                         if (temp != data)
5417                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5418                 }
5419                 udelay(50);
5420
5421                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5422                 gfx_v8_0_wait_for_rlc_serdes(adev);
5423         } else {
5424                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5425                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5426                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5427                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5428                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5429                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5430                 if (temp != data)
5431                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5432
5433                 /* 2 - disable MGLS in RLC */
5434                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5435                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5436                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5437                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5438                 }
5439
5440                 /* 3 - disable MGLS in CP */
5441                 data = RREG32(mmCP_MEM_SLP_CNTL);
5442                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5443                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5444                         WREG32(mmCP_MEM_SLP_CNTL, data);
5445                 }
5446
5447                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5448                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5449                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5450                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5451                 if (temp != data)
5452                         WREG32(mmCGTS_SM_CTRL_REG, data);
5453
5454                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5455                 gfx_v8_0_wait_for_rlc_serdes(adev);
5456
5457                 /* 6 - set mgcg override */
5458                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5459
5460                 udelay(50);
5461
5462                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5463                 gfx_v8_0_wait_for_rlc_serdes(adev);
5464         }
5465
5466         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5467 }
5468
5469 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5470                                                       bool enable)
5471 {
5472         uint32_t temp, temp1, data, data1;
5473
5474         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5475
5476         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5477
5478         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5479                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5480                  * Cmp_busy/GFX_Idle interrupts
5481                  */
5482                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5483
5484                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5485                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5486                 if (temp1 != data1)
5487                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5488
5489                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5490                 gfx_v8_0_wait_for_rlc_serdes(adev);
5491
5492                 /* 3 - clear cgcg override */
5493                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5494
5495                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5496                 gfx_v8_0_wait_for_rlc_serdes(adev);
5497
5498                 /* 4 - write cmd to set CGLS */
5499                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5500
5501                 /* 5 - enable cgcg */
5502                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5503
5504                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5505                         /* enable cgls*/
5506                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5507
5508                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5509                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5510
5511                         if (temp1 != data1)
5512                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5513                 } else {
5514                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5515                 }
5516
5517                 if (temp != data)
5518                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5519         } else {
5520                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5521                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5522
5523                 /* TEST CGCG */
5524                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5525                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5526                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5527                 if (temp1 != data1)
5528                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5529
5530                 /* read gfx register to wake up cgcg */
5531                 RREG32(mmCB_CGTT_SCLK_CTRL);
5532                 RREG32(mmCB_CGTT_SCLK_CTRL);
5533                 RREG32(mmCB_CGTT_SCLK_CTRL);
5534                 RREG32(mmCB_CGTT_SCLK_CTRL);
5535
5536                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5537                 gfx_v8_0_wait_for_rlc_serdes(adev);
5538
5539                 /* write cmd to Set CGCG Overrride */
5540                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5541
5542                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5543                 gfx_v8_0_wait_for_rlc_serdes(adev);
5544
5545                 /* write cmd to Clear CGLS */
5546                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5547
5548                 /* disable cgcg, cgls should be disabled too. */
5549                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5550                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5551                 if (temp != data)
5552                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5553         }
5554
5555         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5556 }
5557 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5558                                             bool enable)
5559 {
5560         if (enable) {
5561                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5562                  * ===  MGCG + MGLS + TS(CG/LS) ===
5563                  */
5564                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5565                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5566         } else {
5567                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5568                  * ===  CGCG + CGLS ===
5569                  */
5570                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5571                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5572         }
5573         return 0;
5574 }
5575
5576 static int gfx_v8_0_set_clockgating_state(void *handle,
5577                                           enum amd_clockgating_state state)
5578 {
5579         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5580
5581         switch (adev->asic_type) {
5582         case CHIP_FIJI:
5583         case CHIP_CARRIZO:
5584         case CHIP_STONEY:
5585                 gfx_v8_0_update_gfx_clock_gating(adev,
5586                                                  state == AMD_CG_STATE_GATE ? true : false);
5587                 break;
5588         default:
5589                 break;
5590         }
5591         return 0;
5592 }
5593
5594 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5595 {
5596         u32 rptr;
5597
5598         rptr = ring->adev->wb.wb[ring->rptr_offs];
5599
5600         return rptr;
5601 }
5602
5603 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5604 {
5605         struct amdgpu_device *adev = ring->adev;
5606         u32 wptr;
5607
5608         if (ring->use_doorbell)
5609                 /* XXX check if swapping is necessary on BE */
5610                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5611         else
5612                 wptr = RREG32(mmCP_RB0_WPTR);
5613
5614         return wptr;
5615 }
5616
5617 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5618 {
5619         struct amdgpu_device *adev = ring->adev;
5620
5621         if (ring->use_doorbell) {
5622                 /* XXX check if swapping is necessary on BE */
5623                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5624                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5625         } else {
5626                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5627                 (void)RREG32(mmCP_RB0_WPTR);
5628         }
5629 }
5630
5631 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5632 {
5633         u32 ref_and_mask, reg_mem_engine;
5634
5635         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5636                 switch (ring->me) {
5637                 case 1:
5638                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5639                         break;
5640                 case 2:
5641                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5642                         break;
5643                 default:
5644                         return;
5645                 }
5646                 reg_mem_engine = 0;
5647         } else {
5648                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5649                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5650         }
5651
5652         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5653         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5654                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5655                                  reg_mem_engine));
5656         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5657         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5658         amdgpu_ring_write(ring, ref_and_mask);
5659         amdgpu_ring_write(ring, ref_and_mask);
5660         amdgpu_ring_write(ring, 0x20); /* poll interval */
5661 }
5662
5663 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5664 {
5665         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5666         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5667                                  WRITE_DATA_DST_SEL(0) |
5668                                  WR_CONFIRM));
5669         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5670         amdgpu_ring_write(ring, 0);
5671         amdgpu_ring_write(ring, 1);
5672
5673 }
5674
5675 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5676                                       struct amdgpu_ib *ib,
5677                                       unsigned vm_id, bool ctx_switch)
5678 {
5679         u32 header, control = 0;
5680         u32 next_rptr = ring->wptr + 5;
5681
5682         if (ctx_switch)
5683                 next_rptr += 2;
5684
5685         next_rptr += 4;
5686         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5687         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5688         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5689         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5690         amdgpu_ring_write(ring, next_rptr);
5691
5692         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5693         if (ctx_switch) {
5694                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5695                 amdgpu_ring_write(ring, 0);
5696         }
5697
5698         if (ib->flags & AMDGPU_IB_FLAG_CE)
5699                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5700         else
5701                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5702
5703         control |= ib->length_dw | (vm_id << 24);
5704
5705         amdgpu_ring_write(ring, header);
5706         amdgpu_ring_write(ring,
5707 #ifdef __BIG_ENDIAN
5708                           (2 << 0) |
5709 #endif
5710                           (ib->gpu_addr & 0xFFFFFFFC));
5711         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5712         amdgpu_ring_write(ring, control);
5713 }
5714
5715 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5716                                           struct amdgpu_ib *ib,
5717                                           unsigned vm_id, bool ctx_switch)
5718 {
5719         u32 header, control = 0;
5720         u32 next_rptr = ring->wptr + 5;
5721
5722         control |= INDIRECT_BUFFER_VALID;
5723
5724         next_rptr += 4;
5725         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5727         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5728         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5729         amdgpu_ring_write(ring, next_rptr);
5730
5731         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5732
5733         control |= ib->length_dw | (vm_id << 24);
5734
5735         amdgpu_ring_write(ring, header);
5736         amdgpu_ring_write(ring,
5737 #ifdef __BIG_ENDIAN
5738                                           (2 << 0) |
5739 #endif
5740                                           (ib->gpu_addr & 0xFFFFFFFC));
5741         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5742         amdgpu_ring_write(ring, control);
5743 }
5744
5745 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5746                                          u64 seq, unsigned flags)
5747 {
5748         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5749         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5750
5751         /* EVENT_WRITE_EOP - flush caches, send int */
5752         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5753         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5754                                  EOP_TC_ACTION_EN |
5755                                  EOP_TC_WB_ACTION_EN |
5756                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5757                                  EVENT_INDEX(5)));
5758         amdgpu_ring_write(ring, addr & 0xfffffffc);
5759         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5760                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5761         amdgpu_ring_write(ring, lower_32_bits(seq));
5762         amdgpu_ring_write(ring, upper_32_bits(seq));
5763
5764 }
5765
5766 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5767 {
5768         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5769         uint32_t seq = ring->fence_drv.sync_seq;
5770         uint64_t addr = ring->fence_drv.gpu_addr;
5771
5772         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5773         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5774                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
5775                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5776         amdgpu_ring_write(ring, addr & 0xfffffffc);
5777         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5778         amdgpu_ring_write(ring, seq);
5779         amdgpu_ring_write(ring, 0xffffffff);
5780         amdgpu_ring_write(ring, 4); /* poll interval */
5781
5782         if (usepfp) {
5783                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5784                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5785                 amdgpu_ring_write(ring, 0);
5786                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5787                 amdgpu_ring_write(ring, 0);
5788         }
5789 }
5790
5791 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5792                                         unsigned vm_id, uint64_t pd_addr)
5793 {
5794         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5795
5796         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5797         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5798                                  WRITE_DATA_DST_SEL(0)) |
5799                                  WR_CONFIRM);
5800         if (vm_id < 8) {
5801                 amdgpu_ring_write(ring,
5802                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5803         } else {
5804                 amdgpu_ring_write(ring,
5805                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5806         }
5807         amdgpu_ring_write(ring, 0);
5808         amdgpu_ring_write(ring, pd_addr >> 12);
5809
5810         /* bits 0-15 are the VM contexts0-15 */
5811         /* invalidate the cache */
5812         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5813         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5814                                  WRITE_DATA_DST_SEL(0)));
5815         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5816         amdgpu_ring_write(ring, 0);
5817         amdgpu_ring_write(ring, 1 << vm_id);
5818
5819         /* wait for the invalidate to complete */
5820         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5821         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5822                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5823                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5824         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5825         amdgpu_ring_write(ring, 0);
5826         amdgpu_ring_write(ring, 0); /* ref */
5827         amdgpu_ring_write(ring, 0); /* mask */
5828         amdgpu_ring_write(ring, 0x20); /* poll interval */
5829
5830         /* compute doesn't have PFP */
5831         if (usepfp) {
5832                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5833                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5834                 amdgpu_ring_write(ring, 0x0);
5835                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5836                 amdgpu_ring_write(ring, 0);
5837                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5838                 amdgpu_ring_write(ring, 0);
5839         }
5840 }
5841
5842 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5843 {
5844         return ring->adev->wb.wb[ring->rptr_offs];
5845 }
5846
5847 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5848 {
5849         return ring->adev->wb.wb[ring->wptr_offs];
5850 }
5851
5852 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5853 {
5854         struct amdgpu_device *adev = ring->adev;
5855
5856         /* XXX check if swapping is necessary on BE */
5857         adev->wb.wb[ring->wptr_offs] = ring->wptr;
5858         WDOORBELL32(ring->doorbell_index, ring->wptr);
5859 }
5860
5861 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5862                                              u64 addr, u64 seq,
5863                                              unsigned flags)
5864 {
5865         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5866         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5867
5868         /* RELEASE_MEM - flush caches, send int */
5869         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5870         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5871                                  EOP_TC_ACTION_EN |
5872                                  EOP_TC_WB_ACTION_EN |
5873                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5874                                  EVENT_INDEX(5)));
5875         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5876         amdgpu_ring_write(ring, addr & 0xfffffffc);
5877         amdgpu_ring_write(ring, upper_32_bits(addr));
5878         amdgpu_ring_write(ring, lower_32_bits(seq));
5879         amdgpu_ring_write(ring, upper_32_bits(seq));
5880 }
5881
5882 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5883                                                  enum amdgpu_interrupt_state state)
5884 {
5885         u32 cp_int_cntl;
5886
5887         switch (state) {
5888         case AMDGPU_IRQ_STATE_DISABLE:
5889                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5890                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5891                                             TIME_STAMP_INT_ENABLE, 0);
5892                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5893                 break;
5894         case AMDGPU_IRQ_STATE_ENABLE:
5895                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5896                 cp_int_cntl =
5897                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5898                                       TIME_STAMP_INT_ENABLE, 1);
5899                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5900                 break;
5901         default:
5902                 break;
5903         }
5904 }
5905
5906 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5907                                                      int me, int pipe,
5908                                                      enum amdgpu_interrupt_state state)
5909 {
5910         u32 mec_int_cntl, mec_int_cntl_reg;
5911
5912         /*
5913          * amdgpu controls only pipe 0 of MEC1. That's why this function only
5914          * handles the setting of interrupts for this specific pipe. All other
5915          * pipes' interrupts are set by amdkfd.
5916          */
5917
5918         if (me == 1) {
5919                 switch (pipe) {
5920                 case 0:
5921                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5922                         break;
5923                 default:
5924                         DRM_DEBUG("invalid pipe %d\n", pipe);
5925                         return;
5926                 }
5927         } else {
5928                 DRM_DEBUG("invalid me %d\n", me);
5929                 return;
5930         }
5931
5932         switch (state) {
5933         case AMDGPU_IRQ_STATE_DISABLE:
5934                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5935                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5936                                              TIME_STAMP_INT_ENABLE, 0);
5937                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5938                 break;
5939         case AMDGPU_IRQ_STATE_ENABLE:
5940                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5941                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5942                                              TIME_STAMP_INT_ENABLE, 1);
5943                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5944                 break;
5945         default:
5946                 break;
5947         }
5948 }
5949
5950 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5951                                              struct amdgpu_irq_src *source,
5952                                              unsigned type,
5953                                              enum amdgpu_interrupt_state state)
5954 {
5955         u32 cp_int_cntl;
5956
5957         switch (state) {
5958         case AMDGPU_IRQ_STATE_DISABLE:
5959                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5960                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5961                                             PRIV_REG_INT_ENABLE, 0);
5962                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5963                 break;
5964         case AMDGPU_IRQ_STATE_ENABLE:
5965                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5966                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5967                                             PRIV_REG_INT_ENABLE, 1);
5968                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5969                 break;
5970         default:
5971                 break;
5972         }
5973
5974         return 0;
5975 }
5976
5977 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5978                                               struct amdgpu_irq_src *source,
5979                                               unsigned type,
5980                                               enum amdgpu_interrupt_state state)
5981 {
5982         u32 cp_int_cntl;
5983
5984         switch (state) {
5985         case AMDGPU_IRQ_STATE_DISABLE:
5986                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5987                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5988                                             PRIV_INSTR_INT_ENABLE, 0);
5989                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5990                 break;
5991         case AMDGPU_IRQ_STATE_ENABLE:
5992                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5993                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5994                                             PRIV_INSTR_INT_ENABLE, 1);
5995                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5996                 break;
5997         default:
5998                 break;
5999         }
6000
6001         return 0;
6002 }
6003
6004 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6005                                             struct amdgpu_irq_src *src,
6006                                             unsigned type,
6007                                             enum amdgpu_interrupt_state state)
6008 {
6009         switch (type) {
6010         case AMDGPU_CP_IRQ_GFX_EOP:
6011                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6012                 break;
6013         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6014                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6015                 break;
6016         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6017                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6018                 break;
6019         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6020                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6021                 break;
6022         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6023                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6024                 break;
6025         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6026                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6027                 break;
6028         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6029                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6030                 break;
6031         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6032                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6033                 break;
6034         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6035                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6036                 break;
6037         default:
6038                 break;
6039         }
6040         return 0;
6041 }
6042
6043 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6044                             struct amdgpu_irq_src *source,
6045                             struct amdgpu_iv_entry *entry)
6046 {
6047         int i;
6048         u8 me_id, pipe_id, queue_id;
6049         struct amdgpu_ring *ring;
6050
6051         DRM_DEBUG("IH: CP EOP\n");
6052         me_id = (entry->ring_id & 0x0c) >> 2;
6053         pipe_id = (entry->ring_id & 0x03) >> 0;
6054         queue_id = (entry->ring_id & 0x70) >> 4;
6055
6056         switch (me_id) {
6057         case 0:
6058                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6059                 break;
6060         case 1:
6061         case 2:
6062                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6063                         ring = &adev->gfx.compute_ring[i];
6064                         /* Per-queue interrupt is supported for MEC starting from VI.
6065                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6066                           */
6067                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6068                                 amdgpu_fence_process(ring);
6069                 }
6070                 break;
6071         }
6072         return 0;
6073 }
6074
6075 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6076                                  struct amdgpu_irq_src *source,
6077                                  struct amdgpu_iv_entry *entry)
6078 {
6079         DRM_ERROR("Illegal register access in command stream\n");
6080         schedule_work(&adev->reset_work);
6081         return 0;
6082 }
6083
6084 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6085                                   struct amdgpu_irq_src *source,
6086                                   struct amdgpu_iv_entry *entry)
6087 {
6088         DRM_ERROR("Illegal instruction in command stream\n");
6089         schedule_work(&adev->reset_work);
6090         return 0;
6091 }
6092
6093 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6094         .name = "gfx_v8_0",
6095         .early_init = gfx_v8_0_early_init,
6096         .late_init = gfx_v8_0_late_init,
6097         .sw_init = gfx_v8_0_sw_init,
6098         .sw_fini = gfx_v8_0_sw_fini,
6099         .hw_init = gfx_v8_0_hw_init,
6100         .hw_fini = gfx_v8_0_hw_fini,
6101         .suspend = gfx_v8_0_suspend,
6102         .resume = gfx_v8_0_resume,
6103         .is_idle = gfx_v8_0_is_idle,
6104         .wait_for_idle = gfx_v8_0_wait_for_idle,
6105         .soft_reset = gfx_v8_0_soft_reset,
6106         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6107         .set_powergating_state = gfx_v8_0_set_powergating_state,
6108 };
6109
6110 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6111         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6112         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6113         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6114         .parse_cs = NULL,
6115         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6116         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6117         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6118         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6119         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6120         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6121         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6122         .test_ring = gfx_v8_0_ring_test_ring,
6123         .test_ib = gfx_v8_0_ring_test_ib,
6124         .insert_nop = amdgpu_ring_insert_nop,
6125         .pad_ib = amdgpu_ring_generic_pad_ib,
6126 };
6127
6128 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6129         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6130         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6131         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6132         .parse_cs = NULL,
6133         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6134         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6135         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6136         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6137         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6138         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6139         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6140         .test_ring = gfx_v8_0_ring_test_ring,
6141         .test_ib = gfx_v8_0_ring_test_ib,
6142         .insert_nop = amdgpu_ring_insert_nop,
6143         .pad_ib = amdgpu_ring_generic_pad_ib,
6144 };
6145
6146 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6147 {
6148         int i;
6149
6150         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6151                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6152
6153         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6154                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6155 }
6156
6157 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6158         .set = gfx_v8_0_set_eop_interrupt_state,
6159         .process = gfx_v8_0_eop_irq,
6160 };
6161
6162 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6163         .set = gfx_v8_0_set_priv_reg_fault_state,
6164         .process = gfx_v8_0_priv_reg_irq,
6165 };
6166
6167 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6168         .set = gfx_v8_0_set_priv_inst_fault_state,
6169         .process = gfx_v8_0_priv_inst_irq,
6170 };
6171
6172 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6173 {
6174         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6175         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6176
6177         adev->gfx.priv_reg_irq.num_types = 1;
6178         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6179
6180         adev->gfx.priv_inst_irq.num_types = 1;
6181         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6182 }
6183
6184 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6185 {
6186         switch (adev->asic_type) {
6187         case CHIP_TOPAZ:
6188         case CHIP_STONEY:
6189                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6190                 break;
6191         case CHIP_CARRIZO:
6192                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6193                 break;
6194         default:
6195                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6196                 break;
6197         }
6198 }
6199
6200 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6201 {
6202         /* init asci gds info */
6203         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6204         adev->gds.gws.total_size = 64;
6205         adev->gds.oa.total_size = 16;
6206
6207         if (adev->gds.mem.total_size == 64 * 1024) {
6208                 adev->gds.mem.gfx_partition_size = 4096;
6209                 adev->gds.mem.cs_partition_size = 4096;
6210
6211                 adev->gds.gws.gfx_partition_size = 4;
6212                 adev->gds.gws.cs_partition_size = 4;
6213
6214                 adev->gds.oa.gfx_partition_size = 4;
6215                 adev->gds.oa.cs_partition_size = 1;
6216         } else {
6217                 adev->gds.mem.gfx_partition_size = 1024;
6218                 adev->gds.mem.cs_partition_size = 1024;
6219
6220                 adev->gds.gws.gfx_partition_size = 16;
6221                 adev->gds.gws.cs_partition_size = 16;
6222
6223                 adev->gds.oa.gfx_partition_size = 4;
6224                 adev->gds.oa.cs_partition_size = 4;
6225         }
6226 }
6227
6228 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6229 {
6230         u32 data, mask;
6231
6232         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6233         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6234
6235         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6236         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6237
6238         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6239
6240         return (~data) & mask;
6241 }
6242
6243 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6244 {
6245         int i, j, k, counter, active_cu_number = 0;
6246         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6247         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6248
6249         memset(cu_info, 0, sizeof(*cu_info));
6250
6251         mutex_lock(&adev->grbm_idx_mutex);
6252         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6253                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6254                         mask = 1;
6255                         ao_bitmap = 0;
6256                         counter = 0;
6257                         gfx_v8_0_select_se_sh(adev, i, j);
6258                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6259                         cu_info->bitmap[i][j] = bitmap;
6260
6261                         for (k = 0; k < 16; k ++) {
6262                                 if (bitmap & mask) {
6263                                         if (counter < 2)
6264                                                 ao_bitmap |= mask;
6265                                         counter ++;
6266                                 }
6267                                 mask <<= 1;
6268                         }
6269                         active_cu_number += counter;
6270                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6271                 }
6272         }
6273         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6274         mutex_unlock(&adev->grbm_idx_mutex);
6275
6276         cu_info->number = active_cu_number;
6277         cu_info->ao_cu_mask = ao_cu_mask;
6278 }