17b4384dfd6c1a0985259ac8a45ebd73858acf49
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49
50 #define GFX8_NUM_GFX_RINGS     1
51 #define GFX8_NUM_COMPUTE_RINGS 8
52
53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
55 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
57
58 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
59 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
60 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
61 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
62 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
63 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
64 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
65 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
66 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
67
68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
74
75 /* BPM SERDES CMD */
76 #define SET_BPM_SERDES_CMD    1
77 #define CLE_BPM_SERDES_CMD    0
78
79 /* BPM Register Address*/
80 enum {
81         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
82         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
83         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
84         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
85         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
86         BPM_REG_FGCG_MAX
87 };
88
89 #define RLC_FormatDirectRegListLength        14
90
91 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
97
98 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
110
111 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
123
124 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
125 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
126 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
130
131 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
137
138 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
139 {
140         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
141         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
142         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
143         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
144         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
145         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
146         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
147         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
148         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
149         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
150         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
151         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
152         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
153         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
154         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
155         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
156 };
157
158 static const u32 golden_settings_tonga_a11[] =
159 {
160         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
161         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
162         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
163         mmGB_GPU_ID, 0x0000000f, 0x00000000,
164         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
165         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
166         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
167         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
168         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
169         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
170         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
171         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
172         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
173         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
174         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
175 };
176
177 static const u32 tonga_golden_common_all[] =
178 {
179         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
181         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
182         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
183         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
184         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
185         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
186         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
187 };
188
189 static const u32 tonga_mgcg_cgcg_init[] =
190 {
191         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
192         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
193         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
194         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
195         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
198         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
200         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
202         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
203         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
209         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
210         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
213         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
214         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
216         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
217         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
218         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
219         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
220         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
222         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
225         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
230         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
235         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
240         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
245         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
250         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
255         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
260         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
263         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
264         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
265         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
266 };
267
268 static const u32 golden_settings_polaris11_a11[] =
269 {
270         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
271         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
272         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
273         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
274         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
275         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
276         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
277         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
278         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
279         mmSQ_CONFIG, 0x07f80000, 0x07180000,
280         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
281         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
282         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
283         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
284         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
285 };
286
287 static const u32 polaris11_golden_common_all[] =
288 {
289         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
290         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
291         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
292         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
293         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
294         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
295 };
296
297 static const u32 golden_settings_polaris10_a11[] =
298 {
299         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
300         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
301         mmCB_HW_CONTROL_2, 0, 0x0f000000,
302         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
303         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
304         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
305         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
306         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
307         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
308         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
309         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
310         mmSQ_CONFIG, 0x07f80000, 0x07180000,
311         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
312         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
313         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
314         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
315 };
316
317 static const u32 polaris10_golden_common_all[] =
318 {
319         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
320         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
322         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
323         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
324         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
325         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
326         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
327 };
328
329 static const u32 fiji_golden_common_all[] =
330 {
331         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
333         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
334         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
335         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
336         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
337         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
338         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
339         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
341 };
342
343 static const u32 golden_settings_fiji_a10[] =
344 {
345         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
346         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
347         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
348         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
349         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
354         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
355         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
356 };
357
358 static const u32 fiji_mgcg_cgcg_init[] =
359 {
360         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
366         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
367         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
369         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
371         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
377         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
378         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
379         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
380         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
381         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
382         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
384         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
385         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
386         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
387         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
389         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
390         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
391         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
392         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
393         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
394         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
395 };
396
397 static const u32 golden_settings_iceland_a11[] =
398 {
399         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
400         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
401         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
402         mmGB_GPU_ID, 0x0000000f, 0x00000000,
403         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
404         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
405         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
406         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
407         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
409         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
410         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
411         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
412         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
413         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
414 };
415
416 static const u32 iceland_golden_common_all[] =
417 {
418         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
419         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
420         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
421         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
422         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
423         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
424         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
425         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
426 };
427
428 static const u32 iceland_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
462         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
463         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
464         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
465         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
466         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
469         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
474         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
484         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
492         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
493         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
494 };
495
496 static const u32 cz_golden_settings_a11[] =
497 {
498         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
499         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
500         mmGB_GPU_ID, 0x0000000f, 0x00000000,
501         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
502         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
503         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
504         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
505         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
506         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
507         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
508 };
509
510 static const u32 cz_golden_common_all[] =
511 {
512         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
513         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
514         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
515         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
516         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
517         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
518         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
519         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
520 };
521
522 static const u32 cz_mgcg_cgcg_init[] =
523 {
524         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
525         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
526         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
530         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
532         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
533         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
534         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
535         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
542         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
543         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
544         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
545         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
546         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
549         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
550         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
551         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
552         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
553         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
554         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
555         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
558         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
568         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
573         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
578         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
583         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
588         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
593         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
596         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
597         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
598         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
599 };
600
601 static const u32 stoney_golden_settings_a11[] =
602 {
603         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
604         mmGB_GPU_ID, 0x0000000f, 0x00000000,
605         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
606         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
607         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
608         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
609         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
610         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
611         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
612         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
613 };
614
615 static const u32 stoney_golden_common_all[] =
616 {
617         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
618         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
619         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
620         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
621         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
622         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
623         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
624         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
625 };
626
627 static const u32 stoney_mgcg_cgcg_init[] =
628 {
629         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
630         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
632         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
633         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
634         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
635 };
636
637 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
638 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
639 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
640 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
641 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
642 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
643
644 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
645 {
646         switch (adev->asic_type) {
647         case CHIP_TOPAZ:
648                 amdgpu_program_register_sequence(adev,
649                                                  iceland_mgcg_cgcg_init,
650                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
651                 amdgpu_program_register_sequence(adev,
652                                                  golden_settings_iceland_a11,
653                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
654                 amdgpu_program_register_sequence(adev,
655                                                  iceland_golden_common_all,
656                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
657                 break;
658         case CHIP_FIJI:
659                 amdgpu_program_register_sequence(adev,
660                                                  fiji_mgcg_cgcg_init,
661                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
662                 amdgpu_program_register_sequence(adev,
663                                                  golden_settings_fiji_a10,
664                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
665                 amdgpu_program_register_sequence(adev,
666                                                  fiji_golden_common_all,
667                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
668                 break;
669
670         case CHIP_TONGA:
671                 amdgpu_program_register_sequence(adev,
672                                                  tonga_mgcg_cgcg_init,
673                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
674                 amdgpu_program_register_sequence(adev,
675                                                  golden_settings_tonga_a11,
676                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
677                 amdgpu_program_register_sequence(adev,
678                                                  tonga_golden_common_all,
679                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
680                 break;
681         case CHIP_POLARIS11:
682                 amdgpu_program_register_sequence(adev,
683                                                  golden_settings_polaris11_a11,
684                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
685                 amdgpu_program_register_sequence(adev,
686                                                  polaris11_golden_common_all,
687                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
688                 break;
689         case CHIP_POLARIS10:
690                 amdgpu_program_register_sequence(adev,
691                                                  golden_settings_polaris10_a11,
692                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
693                 amdgpu_program_register_sequence(adev,
694                                                  polaris10_golden_common_all,
695                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
696                 break;
697         case CHIP_CARRIZO:
698                 amdgpu_program_register_sequence(adev,
699                                                  cz_mgcg_cgcg_init,
700                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
701                 amdgpu_program_register_sequence(adev,
702                                                  cz_golden_settings_a11,
703                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
704                 amdgpu_program_register_sequence(adev,
705                                                  cz_golden_common_all,
706                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
707                 break;
708         case CHIP_STONEY:
709                 amdgpu_program_register_sequence(adev,
710                                                  stoney_mgcg_cgcg_init,
711                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
712                 amdgpu_program_register_sequence(adev,
713                                                  stoney_golden_settings_a11,
714                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
715                 amdgpu_program_register_sequence(adev,
716                                                  stoney_golden_common_all,
717                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
718                 break;
719         default:
720                 break;
721         }
722 }
723
724 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
725 {
726         int i;
727
728         adev->gfx.scratch.num_reg = 7;
729         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
730         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
731                 adev->gfx.scratch.free[i] = true;
732                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
733         }
734 }
735
736 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
737 {
738         struct amdgpu_device *adev = ring->adev;
739         uint32_t scratch;
740         uint32_t tmp = 0;
741         unsigned i;
742         int r;
743
744         r = amdgpu_gfx_scratch_get(adev, &scratch);
745         if (r) {
746                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
747                 return r;
748         }
749         WREG32(scratch, 0xCAFEDEAD);
750         r = amdgpu_ring_alloc(ring, 3);
751         if (r) {
752                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
753                           ring->idx, r);
754                 amdgpu_gfx_scratch_free(adev, scratch);
755                 return r;
756         }
757         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
758         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
759         amdgpu_ring_write(ring, 0xDEADBEEF);
760         amdgpu_ring_commit(ring);
761
762         for (i = 0; i < adev->usec_timeout; i++) {
763                 tmp = RREG32(scratch);
764                 if (tmp == 0xDEADBEEF)
765                         break;
766                 DRM_UDELAY(1);
767         }
768         if (i < adev->usec_timeout) {
769                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
770                          ring->idx, i);
771         } else {
772                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
773                           ring->idx, scratch, tmp);
774                 r = -EINVAL;
775         }
776         amdgpu_gfx_scratch_free(adev, scratch);
777         return r;
778 }
779
780 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
781 {
782         struct amdgpu_device *adev = ring->adev;
783         struct amdgpu_ib ib;
784         struct fence *f = NULL;
785         uint32_t scratch;
786         uint32_t tmp = 0;
787         unsigned i;
788         int r;
789
790         r = amdgpu_gfx_scratch_get(adev, &scratch);
791         if (r) {
792                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
793                 return r;
794         }
795         WREG32(scratch, 0xCAFEDEAD);
796         memset(&ib, 0, sizeof(ib));
797         r = amdgpu_ib_get(adev, NULL, 256, &ib);
798         if (r) {
799                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
800                 goto err1;
801         }
802         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
803         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
804         ib.ptr[2] = 0xDEADBEEF;
805         ib.length_dw = 3;
806
807         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
808         if (r)
809                 goto err2;
810
811         r = fence_wait(f, false);
812         if (r) {
813                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
814                 goto err2;
815         }
816         for (i = 0; i < adev->usec_timeout; i++) {
817                 tmp = RREG32(scratch);
818                 if (tmp == 0xDEADBEEF)
819                         break;
820                 DRM_UDELAY(1);
821         }
822         if (i < adev->usec_timeout) {
823                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
824                          ring->idx, i);
825                 goto err2;
826         } else {
827                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
828                           scratch, tmp);
829                 r = -EINVAL;
830         }
831 err2:
832         fence_put(f);
833         amdgpu_ib_free(adev, &ib, NULL);
834         fence_put(f);
835 err1:
836         amdgpu_gfx_scratch_free(adev, scratch);
837         return r;
838 }
839
840
841 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
842         release_firmware(adev->gfx.pfp_fw);
843         adev->gfx.pfp_fw = NULL;
844         release_firmware(adev->gfx.me_fw);
845         adev->gfx.me_fw = NULL;
846         release_firmware(adev->gfx.ce_fw);
847         adev->gfx.ce_fw = NULL;
848         release_firmware(adev->gfx.rlc_fw);
849         adev->gfx.rlc_fw = NULL;
850         release_firmware(adev->gfx.mec_fw);
851         adev->gfx.mec_fw = NULL;
852         if ((adev->asic_type != CHIP_STONEY) &&
853             (adev->asic_type != CHIP_TOPAZ))
854                 release_firmware(adev->gfx.mec2_fw);
855         adev->gfx.mec2_fw = NULL;
856
857         kfree(adev->gfx.rlc.register_list_format);
858 }
859
860 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
861 {
862         const char *chip_name;
863         char fw_name[30];
864         int err;
865         struct amdgpu_firmware_info *info = NULL;
866         const struct common_firmware_header *header = NULL;
867         const struct gfx_firmware_header_v1_0 *cp_hdr;
868         const struct rlc_firmware_header_v2_0 *rlc_hdr;
869         unsigned int *tmp = NULL, i;
870
871         DRM_DEBUG("\n");
872
873         switch (adev->asic_type) {
874         case CHIP_TOPAZ:
875                 chip_name = "topaz";
876                 break;
877         case CHIP_TONGA:
878                 chip_name = "tonga";
879                 break;
880         case CHIP_CARRIZO:
881                 chip_name = "carrizo";
882                 break;
883         case CHIP_FIJI:
884                 chip_name = "fiji";
885                 break;
886         case CHIP_POLARIS11:
887                 chip_name = "polaris11";
888                 break;
889         case CHIP_POLARIS10:
890                 chip_name = "polaris10";
891                 break;
892         case CHIP_STONEY:
893                 chip_name = "stoney";
894                 break;
895         default:
896                 BUG();
897         }
898
899         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
900         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
901         if (err)
902                 goto out;
903         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
904         if (err)
905                 goto out;
906         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
907         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
908         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
909
910         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
911         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
912         if (err)
913                 goto out;
914         err = amdgpu_ucode_validate(adev->gfx.me_fw);
915         if (err)
916                 goto out;
917         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
918         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
919         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
920
921         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
922         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
923         if (err)
924                 goto out;
925         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
926         if (err)
927                 goto out;
928         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
929         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931
932         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
933         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
934         if (err)
935                 goto out;
936         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
937         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
938         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
939         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
940
941         adev->gfx.rlc.save_and_restore_offset =
942                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
943         adev->gfx.rlc.clear_state_descriptor_offset =
944                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
945         adev->gfx.rlc.avail_scratch_ram_locations =
946                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
947         adev->gfx.rlc.reg_restore_list_size =
948                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
949         adev->gfx.rlc.reg_list_format_start =
950                         le32_to_cpu(rlc_hdr->reg_list_format_start);
951         adev->gfx.rlc.reg_list_format_separate_start =
952                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
953         adev->gfx.rlc.starting_offsets_start =
954                         le32_to_cpu(rlc_hdr->starting_offsets_start);
955         adev->gfx.rlc.reg_list_format_size_bytes =
956                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
957         adev->gfx.rlc.reg_list_size_bytes =
958                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
959
960         adev->gfx.rlc.register_list_format =
961                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
962                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
963
964         if (!adev->gfx.rlc.register_list_format) {
965                 err = -ENOMEM;
966                 goto out;
967         }
968
969         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
970                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
971         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
972                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
973
974         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
975
976         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
977                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
978         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
979                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
980
981         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
982         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
983         if (err)
984                 goto out;
985         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
986         if (err)
987                 goto out;
988         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
989         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
990         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
991
992         if ((adev->asic_type != CHIP_STONEY) &&
993             (adev->asic_type != CHIP_TOPAZ)) {
994                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
995                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
996                 if (!err) {
997                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
998                         if (err)
999                                 goto out;
1000                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1001                                 adev->gfx.mec2_fw->data;
1002                         adev->gfx.mec2_fw_version =
1003                                 le32_to_cpu(cp_hdr->header.ucode_version);
1004                         adev->gfx.mec2_feature_version =
1005                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1006                 } else {
1007                         err = 0;
1008                         adev->gfx.mec2_fw = NULL;
1009                 }
1010         }
1011
1012         if (adev->firmware.smu_load) {
1013                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1014                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1015                 info->fw = adev->gfx.pfp_fw;
1016                 header = (const struct common_firmware_header *)info->fw->data;
1017                 adev->firmware.fw_size +=
1018                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1019
1020                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1021                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1022                 info->fw = adev->gfx.me_fw;
1023                 header = (const struct common_firmware_header *)info->fw->data;
1024                 adev->firmware.fw_size +=
1025                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1026
1027                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1028                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1029                 info->fw = adev->gfx.ce_fw;
1030                 header = (const struct common_firmware_header *)info->fw->data;
1031                 adev->firmware.fw_size +=
1032                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1033
1034                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1035                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1036                 info->fw = adev->gfx.rlc_fw;
1037                 header = (const struct common_firmware_header *)info->fw->data;
1038                 adev->firmware.fw_size +=
1039                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1040
1041                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1042                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1043                 info->fw = adev->gfx.mec_fw;
1044                 header = (const struct common_firmware_header *)info->fw->data;
1045                 adev->firmware.fw_size +=
1046                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1047
1048                 if (adev->gfx.mec2_fw) {
1049                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1050                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1051                         info->fw = adev->gfx.mec2_fw;
1052                         header = (const struct common_firmware_header *)info->fw->data;
1053                         adev->firmware.fw_size +=
1054                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1055                 }
1056
1057         }
1058
1059 out:
1060         if (err) {
1061                 dev_err(adev->dev,
1062                         "gfx8: Failed to load firmware \"%s\"\n",
1063                         fw_name);
1064                 release_firmware(adev->gfx.pfp_fw);
1065                 adev->gfx.pfp_fw = NULL;
1066                 release_firmware(adev->gfx.me_fw);
1067                 adev->gfx.me_fw = NULL;
1068                 release_firmware(adev->gfx.ce_fw);
1069                 adev->gfx.ce_fw = NULL;
1070                 release_firmware(adev->gfx.rlc_fw);
1071                 adev->gfx.rlc_fw = NULL;
1072                 release_firmware(adev->gfx.mec_fw);
1073                 adev->gfx.mec_fw = NULL;
1074                 release_firmware(adev->gfx.mec2_fw);
1075                 adev->gfx.mec2_fw = NULL;
1076         }
1077         return err;
1078 }
1079
1080 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1081                                     volatile u32 *buffer)
1082 {
1083         u32 count = 0, i;
1084         const struct cs_section_def *sect = NULL;
1085         const struct cs_extent_def *ext = NULL;
1086
1087         if (adev->gfx.rlc.cs_data == NULL)
1088                 return;
1089         if (buffer == NULL)
1090                 return;
1091
1092         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1093         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1094
1095         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1096         buffer[count++] = cpu_to_le32(0x80000000);
1097         buffer[count++] = cpu_to_le32(0x80000000);
1098
1099         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1100                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1101                         if (sect->id == SECT_CONTEXT) {
1102                                 buffer[count++] =
1103                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1104                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1105                                                 PACKET3_SET_CONTEXT_REG_START);
1106                                 for (i = 0; i < ext->reg_count; i++)
1107                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1108                         } else {
1109                                 return;
1110                         }
1111                 }
1112         }
1113
1114         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1115         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1116                         PACKET3_SET_CONTEXT_REG_START);
1117         switch (adev->asic_type) {
1118         case CHIP_TONGA:
1119         case CHIP_POLARIS10:
1120                 buffer[count++] = cpu_to_le32(0x16000012);
1121                 buffer[count++] = cpu_to_le32(0x0000002A);
1122                 break;
1123         case CHIP_POLARIS11:
1124                 buffer[count++] = cpu_to_le32(0x16000012);
1125                 buffer[count++] = cpu_to_le32(0x00000000);
1126                 break;
1127         case CHIP_FIJI:
1128                 buffer[count++] = cpu_to_le32(0x3a00161a);
1129                 buffer[count++] = cpu_to_le32(0x0000002e);
1130                 break;
1131         case CHIP_TOPAZ:
1132         case CHIP_CARRIZO:
1133                 buffer[count++] = cpu_to_le32(0x00000002);
1134                 buffer[count++] = cpu_to_le32(0x00000000);
1135                 break;
1136         case CHIP_STONEY:
1137                 buffer[count++] = cpu_to_le32(0x00000000);
1138                 buffer[count++] = cpu_to_le32(0x00000000);
1139                 break;
1140         default:
1141                 buffer[count++] = cpu_to_le32(0x00000000);
1142                 buffer[count++] = cpu_to_le32(0x00000000);
1143                 break;
1144         }
1145
1146         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1147         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1148
1149         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1150         buffer[count++] = cpu_to_le32(0);
1151 }
1152
1153 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1154 {
1155         const __le32 *fw_data;
1156         volatile u32 *dst_ptr;
1157         int me, i, max_me = 4;
1158         u32 bo_offset = 0;
1159         u32 table_offset, table_size;
1160
1161         if (adev->asic_type == CHIP_CARRIZO)
1162                 max_me = 5;
1163
1164         /* write the cp table buffer */
1165         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1166         for (me = 0; me < max_me; me++) {
1167                 if (me == 0) {
1168                         const struct gfx_firmware_header_v1_0 *hdr =
1169                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1170                         fw_data = (const __le32 *)
1171                                 (adev->gfx.ce_fw->data +
1172                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1173                         table_offset = le32_to_cpu(hdr->jt_offset);
1174                         table_size = le32_to_cpu(hdr->jt_size);
1175                 } else if (me == 1) {
1176                         const struct gfx_firmware_header_v1_0 *hdr =
1177                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1178                         fw_data = (const __le32 *)
1179                                 (adev->gfx.pfp_fw->data +
1180                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1181                         table_offset = le32_to_cpu(hdr->jt_offset);
1182                         table_size = le32_to_cpu(hdr->jt_size);
1183                 } else if (me == 2) {
1184                         const struct gfx_firmware_header_v1_0 *hdr =
1185                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1186                         fw_data = (const __le32 *)
1187                                 (adev->gfx.me_fw->data +
1188                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1189                         table_offset = le32_to_cpu(hdr->jt_offset);
1190                         table_size = le32_to_cpu(hdr->jt_size);
1191                 } else if (me == 3) {
1192                         const struct gfx_firmware_header_v1_0 *hdr =
1193                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1194                         fw_data = (const __le32 *)
1195                                 (adev->gfx.mec_fw->data +
1196                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1197                         table_offset = le32_to_cpu(hdr->jt_offset);
1198                         table_size = le32_to_cpu(hdr->jt_size);
1199                 } else  if (me == 4) {
1200                         const struct gfx_firmware_header_v1_0 *hdr =
1201                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1202                         fw_data = (const __le32 *)
1203                                 (adev->gfx.mec2_fw->data +
1204                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1205                         table_offset = le32_to_cpu(hdr->jt_offset);
1206                         table_size = le32_to_cpu(hdr->jt_size);
1207                 }
1208
1209                 for (i = 0; i < table_size; i ++) {
1210                         dst_ptr[bo_offset + i] =
1211                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1212                 }
1213
1214                 bo_offset += table_size;
1215         }
1216 }
1217
1218 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1219 {
1220         int r;
1221
1222         /* clear state block */
1223         if (adev->gfx.rlc.clear_state_obj) {
1224                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1225                 if (unlikely(r != 0))
1226                         dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1227                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1228                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1229
1230                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1231                 adev->gfx.rlc.clear_state_obj = NULL;
1232         }
1233
1234         /* jump table block */
1235         if (adev->gfx.rlc.cp_table_obj) {
1236                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1237                 if (unlikely(r != 0))
1238                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1239                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1240                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1241
1242                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1243                 adev->gfx.rlc.cp_table_obj = NULL;
1244         }
1245 }
1246
1247 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1248 {
1249         volatile u32 *dst_ptr;
1250         u32 dws;
1251         const struct cs_section_def *cs_data;
1252         int r;
1253
1254         adev->gfx.rlc.cs_data = vi_cs_data;
1255
1256         cs_data = adev->gfx.rlc.cs_data;
1257
1258         if (cs_data) {
1259                 /* clear state block */
1260                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1261
1262                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1263                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1264                                              AMDGPU_GEM_DOMAIN_VRAM,
1265                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1266                                              NULL, NULL,
1267                                              &adev->gfx.rlc.clear_state_obj);
1268                         if (r) {
1269                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1270                                 gfx_v8_0_rlc_fini(adev);
1271                                 return r;
1272                         }
1273                 }
1274                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1275                 if (unlikely(r != 0)) {
1276                         gfx_v8_0_rlc_fini(adev);
1277                         return r;
1278                 }
1279                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1280                                   &adev->gfx.rlc.clear_state_gpu_addr);
1281                 if (r) {
1282                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1283                         dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1284                         gfx_v8_0_rlc_fini(adev);
1285                         return r;
1286                 }
1287
1288                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1289                 if (r) {
1290                         dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1291                         gfx_v8_0_rlc_fini(adev);
1292                         return r;
1293                 }
1294                 /* set up the cs buffer */
1295                 dst_ptr = adev->gfx.rlc.cs_ptr;
1296                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1297                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1298                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1299         }
1300
1301         if ((adev->asic_type == CHIP_CARRIZO) ||
1302             (adev->asic_type == CHIP_STONEY)) {
1303                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1304                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1305                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1306                                              AMDGPU_GEM_DOMAIN_VRAM,
1307                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1308                                              NULL, NULL,
1309                                              &adev->gfx.rlc.cp_table_obj);
1310                         if (r) {
1311                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1312                                 return r;
1313                         }
1314                 }
1315
1316                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1317                 if (unlikely(r != 0)) {
1318                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1319                         return r;
1320                 }
1321                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1322                                   &adev->gfx.rlc.cp_table_gpu_addr);
1323                 if (r) {
1324                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1325                         dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
1326                         return r;
1327                 }
1328                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1329                 if (r) {
1330                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1331                         return r;
1332                 }
1333
1334                 cz_init_cp_jump_table(adev);
1335
1336                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1337                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1338
1339         }
1340
1341         return 0;
1342 }
1343
1344 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1345 {
1346         int r;
1347
1348         if (adev->gfx.mec.hpd_eop_obj) {
1349                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1350                 if (unlikely(r != 0))
1351                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1352                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1353                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1354
1355                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1356                 adev->gfx.mec.hpd_eop_obj = NULL;
1357         }
1358 }
1359
1360 #define MEC_HPD_SIZE 2048
1361
1362 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1363 {
1364         int r;
1365         u32 *hpd;
1366
1367         /*
1368          * we assign only 1 pipe because all other pipes will
1369          * be handled by KFD
1370          */
1371         adev->gfx.mec.num_mec = 1;
1372         adev->gfx.mec.num_pipe = 1;
1373         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1374
1375         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1376                 r = amdgpu_bo_create(adev,
1377                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1378                                      PAGE_SIZE, true,
1379                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1380                                      &adev->gfx.mec.hpd_eop_obj);
1381                 if (r) {
1382                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1383                         return r;
1384                 }
1385         }
1386
1387         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1388         if (unlikely(r != 0)) {
1389                 gfx_v8_0_mec_fini(adev);
1390                 return r;
1391         }
1392         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1393                           &adev->gfx.mec.hpd_eop_gpu_addr);
1394         if (r) {
1395                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1396                 gfx_v8_0_mec_fini(adev);
1397                 return r;
1398         }
1399         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1400         if (r) {
1401                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1402                 gfx_v8_0_mec_fini(adev);
1403                 return r;
1404         }
1405
1406         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1407
1408         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1409         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1410
1411         return 0;
1412 }
1413
1414 static const u32 vgpr_init_compute_shader[] =
1415 {
1416         0x7e000209, 0x7e020208,
1417         0x7e040207, 0x7e060206,
1418         0x7e080205, 0x7e0a0204,
1419         0x7e0c0203, 0x7e0e0202,
1420         0x7e100201, 0x7e120200,
1421         0x7e140209, 0x7e160208,
1422         0x7e180207, 0x7e1a0206,
1423         0x7e1c0205, 0x7e1e0204,
1424         0x7e200203, 0x7e220202,
1425         0x7e240201, 0x7e260200,
1426         0x7e280209, 0x7e2a0208,
1427         0x7e2c0207, 0x7e2e0206,
1428         0x7e300205, 0x7e320204,
1429         0x7e340203, 0x7e360202,
1430         0x7e380201, 0x7e3a0200,
1431         0x7e3c0209, 0x7e3e0208,
1432         0x7e400207, 0x7e420206,
1433         0x7e440205, 0x7e460204,
1434         0x7e480203, 0x7e4a0202,
1435         0x7e4c0201, 0x7e4e0200,
1436         0x7e500209, 0x7e520208,
1437         0x7e540207, 0x7e560206,
1438         0x7e580205, 0x7e5a0204,
1439         0x7e5c0203, 0x7e5e0202,
1440         0x7e600201, 0x7e620200,
1441         0x7e640209, 0x7e660208,
1442         0x7e680207, 0x7e6a0206,
1443         0x7e6c0205, 0x7e6e0204,
1444         0x7e700203, 0x7e720202,
1445         0x7e740201, 0x7e760200,
1446         0x7e780209, 0x7e7a0208,
1447         0x7e7c0207, 0x7e7e0206,
1448         0xbf8a0000, 0xbf810000,
1449 };
1450
1451 static const u32 sgpr_init_compute_shader[] =
1452 {
1453         0xbe8a0100, 0xbe8c0102,
1454         0xbe8e0104, 0xbe900106,
1455         0xbe920108, 0xbe940100,
1456         0xbe960102, 0xbe980104,
1457         0xbe9a0106, 0xbe9c0108,
1458         0xbe9e0100, 0xbea00102,
1459         0xbea20104, 0xbea40106,
1460         0xbea60108, 0xbea80100,
1461         0xbeaa0102, 0xbeac0104,
1462         0xbeae0106, 0xbeb00108,
1463         0xbeb20100, 0xbeb40102,
1464         0xbeb60104, 0xbeb80106,
1465         0xbeba0108, 0xbebc0100,
1466         0xbebe0102, 0xbec00104,
1467         0xbec20106, 0xbec40108,
1468         0xbec60100, 0xbec80102,
1469         0xbee60004, 0xbee70005,
1470         0xbeea0006, 0xbeeb0007,
1471         0xbee80008, 0xbee90009,
1472         0xbefc0000, 0xbf8a0000,
1473         0xbf810000, 0x00000000,
1474 };
1475
1476 static const u32 vgpr_init_regs[] =
1477 {
1478         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1479         mmCOMPUTE_RESOURCE_LIMITS, 0,
1480         mmCOMPUTE_NUM_THREAD_X, 256*4,
1481         mmCOMPUTE_NUM_THREAD_Y, 1,
1482         mmCOMPUTE_NUM_THREAD_Z, 1,
1483         mmCOMPUTE_PGM_RSRC2, 20,
1484         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1485         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1486         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1487         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1488         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1489         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1490         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1491         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1492         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1493         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1494 };
1495
1496 static const u32 sgpr1_init_regs[] =
1497 {
1498         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1499         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1500         mmCOMPUTE_NUM_THREAD_X, 256*5,
1501         mmCOMPUTE_NUM_THREAD_Y, 1,
1502         mmCOMPUTE_NUM_THREAD_Z, 1,
1503         mmCOMPUTE_PGM_RSRC2, 20,
1504         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1505         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1506         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1507         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1508         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1509         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1510         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1511         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1512         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1513         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1514 };
1515
1516 static const u32 sgpr2_init_regs[] =
1517 {
1518         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1519         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1520         mmCOMPUTE_NUM_THREAD_X, 256*5,
1521         mmCOMPUTE_NUM_THREAD_Y, 1,
1522         mmCOMPUTE_NUM_THREAD_Z, 1,
1523         mmCOMPUTE_PGM_RSRC2, 20,
1524         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1525         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1526         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1527         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1528         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1529         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1530         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1531         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1532         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1533         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1534 };
1535
1536 static const u32 sec_ded_counter_registers[] =
1537 {
1538         mmCPC_EDC_ATC_CNT,
1539         mmCPC_EDC_SCRATCH_CNT,
1540         mmCPC_EDC_UCODE_CNT,
1541         mmCPF_EDC_ATC_CNT,
1542         mmCPF_EDC_ROQ_CNT,
1543         mmCPF_EDC_TAG_CNT,
1544         mmCPG_EDC_ATC_CNT,
1545         mmCPG_EDC_DMA_CNT,
1546         mmCPG_EDC_TAG_CNT,
1547         mmDC_EDC_CSINVOC_CNT,
1548         mmDC_EDC_RESTORE_CNT,
1549         mmDC_EDC_STATE_CNT,
1550         mmGDS_EDC_CNT,
1551         mmGDS_EDC_GRBM_CNT,
1552         mmGDS_EDC_OA_DED,
1553         mmSPI_EDC_CNT,
1554         mmSQC_ATC_EDC_GATCL1_CNT,
1555         mmSQC_EDC_CNT,
1556         mmSQ_EDC_DED_CNT,
1557         mmSQ_EDC_INFO,
1558         mmSQ_EDC_SEC_CNT,
1559         mmTCC_EDC_CNT,
1560         mmTCP_ATC_EDC_GATCL1_CNT,
1561         mmTCP_EDC_CNT,
1562         mmTD_EDC_CNT
1563 };
1564
1565 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1566 {
1567         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1568         struct amdgpu_ib ib;
1569         struct fence *f = NULL;
1570         int r, i;
1571         u32 tmp;
1572         unsigned total_size, vgpr_offset, sgpr_offset;
1573         u64 gpu_addr;
1574
1575         /* only supported on CZ */
1576         if (adev->asic_type != CHIP_CARRIZO)
1577                 return 0;
1578
1579         /* bail if the compute ring is not ready */
1580         if (!ring->ready)
1581                 return 0;
1582
1583         tmp = RREG32(mmGB_EDC_MODE);
1584         WREG32(mmGB_EDC_MODE, 0);
1585
1586         total_size =
1587                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1588         total_size +=
1589                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1590         total_size +=
1591                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1592         total_size = ALIGN(total_size, 256);
1593         vgpr_offset = total_size;
1594         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1595         sgpr_offset = total_size;
1596         total_size += sizeof(sgpr_init_compute_shader);
1597
1598         /* allocate an indirect buffer to put the commands in */
1599         memset(&ib, 0, sizeof(ib));
1600         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1601         if (r) {
1602                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1603                 return r;
1604         }
1605
1606         /* load the compute shaders */
1607         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1608                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1609
1610         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1611                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1612
1613         /* init the ib length to 0 */
1614         ib.length_dw = 0;
1615
1616         /* VGPR */
1617         /* write the register state for the compute dispatch */
1618         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1619                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1620                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1621                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1622         }
1623         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1624         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1625         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1626         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1627         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1628         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1629
1630         /* write dispatch packet */
1631         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1632         ib.ptr[ib.length_dw++] = 8; /* x */
1633         ib.ptr[ib.length_dw++] = 1; /* y */
1634         ib.ptr[ib.length_dw++] = 1; /* z */
1635         ib.ptr[ib.length_dw++] =
1636                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1637
1638         /* write CS partial flush packet */
1639         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1640         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1641
1642         /* SGPR1 */
1643         /* write the register state for the compute dispatch */
1644         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1645                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1646                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1647                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1648         }
1649         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1650         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1651         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1652         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1653         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1654         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1655
1656         /* write dispatch packet */
1657         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1658         ib.ptr[ib.length_dw++] = 8; /* x */
1659         ib.ptr[ib.length_dw++] = 1; /* y */
1660         ib.ptr[ib.length_dw++] = 1; /* z */
1661         ib.ptr[ib.length_dw++] =
1662                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1663
1664         /* write CS partial flush packet */
1665         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1666         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1667
1668         /* SGPR2 */
1669         /* write the register state for the compute dispatch */
1670         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1671                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1672                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1673                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1674         }
1675         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1676         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1677         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1678         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1679         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1680         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1681
1682         /* write dispatch packet */
1683         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1684         ib.ptr[ib.length_dw++] = 8; /* x */
1685         ib.ptr[ib.length_dw++] = 1; /* y */
1686         ib.ptr[ib.length_dw++] = 1; /* z */
1687         ib.ptr[ib.length_dw++] =
1688                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1689
1690         /* write CS partial flush packet */
1691         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1692         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1693
1694         /* shedule the ib on the ring */
1695         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1696         if (r) {
1697                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1698                 goto fail;
1699         }
1700
1701         /* wait for the GPU to finish processing the IB */
1702         r = fence_wait(f, false);
1703         if (r) {
1704                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1705                 goto fail;
1706         }
1707
1708         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1709         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1710         WREG32(mmGB_EDC_MODE, tmp);
1711
1712         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1713         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1714         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1715
1716
1717         /* read back registers to clear the counters */
1718         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1719                 RREG32(sec_ded_counter_registers[i]);
1720
1721 fail:
1722         fence_put(f);
1723         amdgpu_ib_free(adev, &ib, NULL);
1724         fence_put(f);
1725
1726         return r;
1727 }
1728
1729 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1730 {
1731         u32 gb_addr_config;
1732         u32 mc_shared_chmap, mc_arb_ramcfg;
1733         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1734         u32 tmp;
1735         int ret;
1736
1737         switch (adev->asic_type) {
1738         case CHIP_TOPAZ:
1739                 adev->gfx.config.max_shader_engines = 1;
1740                 adev->gfx.config.max_tile_pipes = 2;
1741                 adev->gfx.config.max_cu_per_sh = 6;
1742                 adev->gfx.config.max_sh_per_se = 1;
1743                 adev->gfx.config.max_backends_per_se = 2;
1744                 adev->gfx.config.max_texture_channel_caches = 2;
1745                 adev->gfx.config.max_gprs = 256;
1746                 adev->gfx.config.max_gs_threads = 32;
1747                 adev->gfx.config.max_hw_contexts = 8;
1748
1749                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1750                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1751                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1752                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1753                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1754                 break;
1755         case CHIP_FIJI:
1756                 adev->gfx.config.max_shader_engines = 4;
1757                 adev->gfx.config.max_tile_pipes = 16;
1758                 adev->gfx.config.max_cu_per_sh = 16;
1759                 adev->gfx.config.max_sh_per_se = 1;
1760                 adev->gfx.config.max_backends_per_se = 4;
1761                 adev->gfx.config.max_texture_channel_caches = 16;
1762                 adev->gfx.config.max_gprs = 256;
1763                 adev->gfx.config.max_gs_threads = 32;
1764                 adev->gfx.config.max_hw_contexts = 8;
1765
1766                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1767                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1768                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1769                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1770                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1771                 break;
1772         case CHIP_POLARIS11:
1773                 ret = amdgpu_atombios_get_gfx_info(adev);
1774                 if (ret)
1775                         return ret;
1776                 adev->gfx.config.max_gprs = 256;
1777                 adev->gfx.config.max_gs_threads = 32;
1778                 adev->gfx.config.max_hw_contexts = 8;
1779
1780                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1784                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1785                 break;
1786         case CHIP_POLARIS10:
1787                 ret = amdgpu_atombios_get_gfx_info(adev);
1788                 if (ret)
1789                         return ret;
1790                 adev->gfx.config.max_gprs = 256;
1791                 adev->gfx.config.max_gs_threads = 32;
1792                 adev->gfx.config.max_hw_contexts = 8;
1793
1794                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799                 break;
1800         case CHIP_TONGA:
1801                 adev->gfx.config.max_shader_engines = 4;
1802                 adev->gfx.config.max_tile_pipes = 8;
1803                 adev->gfx.config.max_cu_per_sh = 8;
1804                 adev->gfx.config.max_sh_per_se = 1;
1805                 adev->gfx.config.max_backends_per_se = 2;
1806                 adev->gfx.config.max_texture_channel_caches = 8;
1807                 adev->gfx.config.max_gprs = 256;
1808                 adev->gfx.config.max_gs_threads = 32;
1809                 adev->gfx.config.max_hw_contexts = 8;
1810
1811                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1816                 break;
1817         case CHIP_CARRIZO:
1818                 adev->gfx.config.max_shader_engines = 1;
1819                 adev->gfx.config.max_tile_pipes = 2;
1820                 adev->gfx.config.max_sh_per_se = 1;
1821                 adev->gfx.config.max_backends_per_se = 2;
1822
1823                 switch (adev->pdev->revision) {
1824                 case 0xc4:
1825                 case 0x84:
1826                 case 0xc8:
1827                 case 0xcc:
1828                 case 0xe1:
1829                 case 0xe3:
1830                         /* B10 */
1831                         adev->gfx.config.max_cu_per_sh = 8;
1832                         break;
1833                 case 0xc5:
1834                 case 0x81:
1835                 case 0x85:
1836                 case 0xc9:
1837                 case 0xcd:
1838                 case 0xe2:
1839                 case 0xe4:
1840                         /* B8 */
1841                         adev->gfx.config.max_cu_per_sh = 6;
1842                         break;
1843                 case 0xc6:
1844                 case 0xca:
1845                 case 0xce:
1846                 case 0x88:
1847                         /* B6 */
1848                         adev->gfx.config.max_cu_per_sh = 6;
1849                         break;
1850                 case 0xc7:
1851                 case 0x87:
1852                 case 0xcb:
1853                 case 0xe5:
1854                 case 0x89:
1855                 default:
1856                         /* B4 */
1857                         adev->gfx.config.max_cu_per_sh = 4;
1858                         break;
1859                 }
1860
1861                 adev->gfx.config.max_texture_channel_caches = 2;
1862                 adev->gfx.config.max_gprs = 256;
1863                 adev->gfx.config.max_gs_threads = 32;
1864                 adev->gfx.config.max_hw_contexts = 8;
1865
1866                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1867                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1868                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1869                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1870                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1871                 break;
1872         case CHIP_STONEY:
1873                 adev->gfx.config.max_shader_engines = 1;
1874                 adev->gfx.config.max_tile_pipes = 2;
1875                 adev->gfx.config.max_sh_per_se = 1;
1876                 adev->gfx.config.max_backends_per_se = 1;
1877
1878                 switch (adev->pdev->revision) {
1879                 case 0xc0:
1880                 case 0xc1:
1881                 case 0xc2:
1882                 case 0xc4:
1883                 case 0xc8:
1884                 case 0xc9:
1885                         adev->gfx.config.max_cu_per_sh = 3;
1886                         break;
1887                 case 0xd0:
1888                 case 0xd1:
1889                 case 0xd2:
1890                 default:
1891                         adev->gfx.config.max_cu_per_sh = 2;
1892                         break;
1893                 }
1894
1895                 adev->gfx.config.max_texture_channel_caches = 2;
1896                 adev->gfx.config.max_gprs = 256;
1897                 adev->gfx.config.max_gs_threads = 16;
1898                 adev->gfx.config.max_hw_contexts = 8;
1899
1900                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1901                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1902                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1903                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1904                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1905                 break;
1906         default:
1907                 adev->gfx.config.max_shader_engines = 2;
1908                 adev->gfx.config.max_tile_pipes = 4;
1909                 adev->gfx.config.max_cu_per_sh = 2;
1910                 adev->gfx.config.max_sh_per_se = 1;
1911                 adev->gfx.config.max_backends_per_se = 2;
1912                 adev->gfx.config.max_texture_channel_caches = 4;
1913                 adev->gfx.config.max_gprs = 256;
1914                 adev->gfx.config.max_gs_threads = 32;
1915                 adev->gfx.config.max_hw_contexts = 8;
1916
1917                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1918                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1919                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1920                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1921                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1922                 break;
1923         }
1924
1925         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1926         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1927         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1928
1929         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1930         adev->gfx.config.mem_max_burst_length_bytes = 256;
1931         if (adev->flags & AMD_IS_APU) {
1932                 /* Get memory bank mapping mode. */
1933                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1934                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1935                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1936
1937                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1938                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1939                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1940
1941                 /* Validate settings in case only one DIMM installed. */
1942                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1943                         dimm00_addr_map = 0;
1944                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1945                         dimm01_addr_map = 0;
1946                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1947                         dimm10_addr_map = 0;
1948                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1949                         dimm11_addr_map = 0;
1950
1951                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1952                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1953                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1954                         adev->gfx.config.mem_row_size_in_kb = 2;
1955                 else
1956                         adev->gfx.config.mem_row_size_in_kb = 1;
1957         } else {
1958                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1959                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1960                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1961                         adev->gfx.config.mem_row_size_in_kb = 4;
1962         }
1963
1964         adev->gfx.config.shader_engine_tile_size = 32;
1965         adev->gfx.config.num_gpus = 1;
1966         adev->gfx.config.multi_gpu_tile_size = 64;
1967
1968         /* fix up row size */
1969         switch (adev->gfx.config.mem_row_size_in_kb) {
1970         case 1:
1971         default:
1972                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1973                 break;
1974         case 2:
1975                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1976                 break;
1977         case 4:
1978                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1979                 break;
1980         }
1981         adev->gfx.config.gb_addr_config = gb_addr_config;
1982
1983         return 0;
1984 }
1985
1986 static int gfx_v8_0_sw_init(void *handle)
1987 {
1988         int i, r;
1989         struct amdgpu_ring *ring;
1990         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1991
1992         /* EOP Event */
1993         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1994         if (r)
1995                 return r;
1996
1997         /* Privileged reg */
1998         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1999         if (r)
2000                 return r;
2001
2002         /* Privileged inst */
2003         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2004         if (r)
2005                 return r;
2006
2007         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2008
2009         gfx_v8_0_scratch_init(adev);
2010
2011         r = gfx_v8_0_init_microcode(adev);
2012         if (r) {
2013                 DRM_ERROR("Failed to load gfx firmware!\n");
2014                 return r;
2015         }
2016
2017         r = gfx_v8_0_rlc_init(adev);
2018         if (r) {
2019                 DRM_ERROR("Failed to init rlc BOs!\n");
2020                 return r;
2021         }
2022
2023         r = gfx_v8_0_mec_init(adev);
2024         if (r) {
2025                 DRM_ERROR("Failed to init MEC BOs!\n");
2026                 return r;
2027         }
2028
2029         /* set up the gfx ring */
2030         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2031                 ring = &adev->gfx.gfx_ring[i];
2032                 ring->ring_obj = NULL;
2033                 sprintf(ring->name, "gfx");
2034                 /* no gfx doorbells on iceland */
2035                 if (adev->asic_type != CHIP_TOPAZ) {
2036                         ring->use_doorbell = true;
2037                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2038                 }
2039
2040                 r = amdgpu_ring_init(adev, ring, 1024,
2041                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2042                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2043                                      AMDGPU_RING_TYPE_GFX);
2044                 if (r)
2045                         return r;
2046         }
2047
2048         /* set up the compute queues */
2049         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2050                 unsigned irq_type;
2051
2052                 /* max 32 queues per MEC */
2053                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2054                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2055                         break;
2056                 }
2057                 ring = &adev->gfx.compute_ring[i];
2058                 ring->ring_obj = NULL;
2059                 ring->use_doorbell = true;
2060                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2061                 ring->me = 1; /* first MEC */
2062                 ring->pipe = i / 8;
2063                 ring->queue = i % 8;
2064                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2065                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2066                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2067                 r = amdgpu_ring_init(adev, ring, 1024,
2068                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2069                                      &adev->gfx.eop_irq, irq_type,
2070                                      AMDGPU_RING_TYPE_COMPUTE);
2071                 if (r)
2072                         return r;
2073         }
2074
2075         /* reserve GDS, GWS and OA resource for gfx */
2076         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
2077                         PAGE_SIZE, true,
2078                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
2079                         NULL, &adev->gds.gds_gfx_bo);
2080         if (r)
2081                 return r;
2082
2083         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
2084                 PAGE_SIZE, true,
2085                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
2086                 NULL, &adev->gds.gws_gfx_bo);
2087         if (r)
2088                 return r;
2089
2090         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
2091                         PAGE_SIZE, true,
2092                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
2093                         NULL, &adev->gds.oa_gfx_bo);
2094         if (r)
2095                 return r;
2096
2097         adev->gfx.ce_ram_size = 0x8000;
2098
2099         r = gfx_v8_0_gpu_early_init(adev);
2100         if (r)
2101                 return r;
2102
2103         return 0;
2104 }
2105
2106 static int gfx_v8_0_sw_fini(void *handle)
2107 {
2108         int i;
2109         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2110
2111         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2112         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2113         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2114
2115         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2116                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2117         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2118                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2119
2120         gfx_v8_0_mec_fini(adev);
2121
2122         gfx_v8_0_rlc_fini(adev);
2123
2124         gfx_v8_0_free_microcode(adev);
2125
2126         return 0;
2127 }
2128
2129 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2130 {
2131         uint32_t *modearray, *mod2array;
2132         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2133         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2134         u32 reg_offset;
2135
2136         modearray = adev->gfx.config.tile_mode_array;
2137         mod2array = adev->gfx.config.macrotile_mode_array;
2138
2139         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2140                 modearray[reg_offset] = 0;
2141
2142         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2143                 mod2array[reg_offset] = 0;
2144
2145         switch (adev->asic_type) {
2146         case CHIP_TOPAZ:
2147                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148                                 PIPE_CONFIG(ADDR_SURF_P2) |
2149                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152                                 PIPE_CONFIG(ADDR_SURF_P2) |
2153                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2154                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156                                 PIPE_CONFIG(ADDR_SURF_P2) |
2157                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2158                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160                                 PIPE_CONFIG(ADDR_SURF_P2) |
2161                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2162                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2164                                 PIPE_CONFIG(ADDR_SURF_P2) |
2165                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2166                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2168                                 PIPE_CONFIG(ADDR_SURF_P2) |
2169                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2172                                 PIPE_CONFIG(ADDR_SURF_P2) |
2173                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2174                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2175                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2176                                 PIPE_CONFIG(ADDR_SURF_P2));
2177                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178                                 PIPE_CONFIG(ADDR_SURF_P2) |
2179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2180                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182                                  PIPE_CONFIG(ADDR_SURF_P2) |
2183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2186                                  PIPE_CONFIG(ADDR_SURF_P2) |
2187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2189                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2190                                  PIPE_CONFIG(ADDR_SURF_P2) |
2191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194                                  PIPE_CONFIG(ADDR_SURF_P2) |
2195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2198                                  PIPE_CONFIG(ADDR_SURF_P2) |
2199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2201                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2202                                  PIPE_CONFIG(ADDR_SURF_P2) |
2203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2205                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2206                                  PIPE_CONFIG(ADDR_SURF_P2) |
2207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2210                                  PIPE_CONFIG(ADDR_SURF_P2) |
2211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2214                                  PIPE_CONFIG(ADDR_SURF_P2) |
2215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2218                                  PIPE_CONFIG(ADDR_SURF_P2) |
2219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2222                                  PIPE_CONFIG(ADDR_SURF_P2) |
2223                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2224                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2226                                  PIPE_CONFIG(ADDR_SURF_P2) |
2227                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2228                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2230                                  PIPE_CONFIG(ADDR_SURF_P2) |
2231                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2234                                  PIPE_CONFIG(ADDR_SURF_P2) |
2235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2237                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2238                                  PIPE_CONFIG(ADDR_SURF_P2) |
2239                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2240                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242                                  PIPE_CONFIG(ADDR_SURF_P2) |
2243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2245                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2246                                  PIPE_CONFIG(ADDR_SURF_P2) |
2247                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2248                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2249
2250                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2251                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2253                                 NUM_BANKS(ADDR_SURF_8_BANK));
2254                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2255                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257                                 NUM_BANKS(ADDR_SURF_8_BANK));
2258                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2259                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261                                 NUM_BANKS(ADDR_SURF_8_BANK));
2262                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2264                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2265                                 NUM_BANKS(ADDR_SURF_8_BANK));
2266                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2268                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269                                 NUM_BANKS(ADDR_SURF_8_BANK));
2270                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273                                 NUM_BANKS(ADDR_SURF_8_BANK));
2274                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2276                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2277                                 NUM_BANKS(ADDR_SURF_8_BANK));
2278                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2279                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2280                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281                                 NUM_BANKS(ADDR_SURF_16_BANK));
2282                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2283                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2284                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285                                 NUM_BANKS(ADDR_SURF_16_BANK));
2286                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2287                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2288                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289                                  NUM_BANKS(ADDR_SURF_16_BANK));
2290                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2291                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2292                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293                                  NUM_BANKS(ADDR_SURF_16_BANK));
2294                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2296                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297                                  NUM_BANKS(ADDR_SURF_16_BANK));
2298                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2300                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2301                                  NUM_BANKS(ADDR_SURF_16_BANK));
2302                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2304                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2305                                  NUM_BANKS(ADDR_SURF_8_BANK));
2306
2307                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2308                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2309                             reg_offset != 23)
2310                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2311
2312                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2313                         if (reg_offset != 7)
2314                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2315
2316                 break;
2317         case CHIP_FIJI:
2318                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2325                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2329                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2333                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2339                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2341                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2342                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2345                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2347                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2348                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2349                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2350                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2351                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2352                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2353                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2356                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2364                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2365                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2368                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2372                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2381                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2384                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2385                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2388                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2405                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2409                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2413                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2417                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2421                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2424                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2433                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2436                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2438                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2439                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2440
2441                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                                 NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2455                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                                 NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                                 NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                                 NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                                 NUM_BANKS(ADDR_SURF_8_BANK));
2469                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2471                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2472                                 NUM_BANKS(ADDR_SURF_8_BANK));
2473                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                                 NUM_BANKS(ADDR_SURF_8_BANK));
2477                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2480                                  NUM_BANKS(ADDR_SURF_8_BANK));
2481                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                                  NUM_BANKS(ADDR_SURF_8_BANK));
2485                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2487                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2488                                  NUM_BANKS(ADDR_SURF_8_BANK));
2489                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2492                                  NUM_BANKS(ADDR_SURF_8_BANK));
2493                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496                                  NUM_BANKS(ADDR_SURF_4_BANK));
2497
2498                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2499                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2500
2501                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2502                         if (reg_offset != 7)
2503                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2504
2505                 break;
2506         case CHIP_TONGA:
2507                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2514                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2518                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2522                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2528                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2530                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2531                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2532                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2534                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2536                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2537                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2538                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2539                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2540                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2541                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2545                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2553                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2557                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2565                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2570                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2573                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2574                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2577                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2594                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2598                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2602                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2604                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2606                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2610                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2612                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2613                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2620                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2622                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2624                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2625                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2627                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2629
2630                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2640                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2641                                 NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2644                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2645                                 NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2648                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2649                                 NUM_BANKS(ADDR_SURF_16_BANK));
2650                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653                                 NUM_BANKS(ADDR_SURF_16_BANK));
2654                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657                                 NUM_BANKS(ADDR_SURF_16_BANK));
2658                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2660                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2661                                 NUM_BANKS(ADDR_SURF_16_BANK));
2662                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2664                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2665                                 NUM_BANKS(ADDR_SURF_16_BANK));
2666                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2668                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2669                                  NUM_BANKS(ADDR_SURF_16_BANK));
2670                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2673                                  NUM_BANKS(ADDR_SURF_16_BANK));
2674                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2676                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2677                                  NUM_BANKS(ADDR_SURF_8_BANK));
2678                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681                                  NUM_BANKS(ADDR_SURF_4_BANK));
2682                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2684                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2685                                  NUM_BANKS(ADDR_SURF_4_BANK));
2686
2687                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2688                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2689
2690                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2691                         if (reg_offset != 7)
2692                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2693
2694                 break;
2695         case CHIP_POLARIS11:
2696                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2699                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2700                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2703                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2707                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2708                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2711                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2712                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2713                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2715                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2716                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2719                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2720                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2721                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2723                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2724                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2725                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2727                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2728                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2729                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2730                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2742                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2745                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2746                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2747                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2749                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2751                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2753                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2754                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2755                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2757                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2758                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2759                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2761                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2762                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2763                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2765                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2766                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2767                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2769                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2771                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2773                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2774                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2775                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2777                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2778                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2779                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2781                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2782                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2783                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2785                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2786                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2787                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2789                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2790                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2791                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2793                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2794                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2795                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2797                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2798                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2799                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2801                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2802                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2811                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2814                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2815                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2817                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2818
2819                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2821                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2822                                 NUM_BANKS(ADDR_SURF_16_BANK));
2823
2824                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827                                 NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837                                 NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2842                                 NUM_BANKS(ADDR_SURF_16_BANK));
2843
2844                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848
2849                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853
2854                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2855                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2856                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2857                                 NUM_BANKS(ADDR_SURF_16_BANK));
2858
2859                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2860                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2861                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2862                                 NUM_BANKS(ADDR_SURF_16_BANK));
2863
2864                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867                                 NUM_BANKS(ADDR_SURF_16_BANK));
2868
2869                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2871                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872                                 NUM_BANKS(ADDR_SURF_16_BANK));
2873
2874                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2876                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2877                                 NUM_BANKS(ADDR_SURF_16_BANK));
2878
2879                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2882                                 NUM_BANKS(ADDR_SURF_8_BANK));
2883
2884                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887                                 NUM_BANKS(ADDR_SURF_4_BANK));
2888
2889                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2891
2892                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2893                         if (reg_offset != 7)
2894                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2895
2896                 break;
2897         case CHIP_POLARIS10:
2898                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2902                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2906                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2914                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2915                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2916                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2918                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2920                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2921                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2922                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2923                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2924                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2925                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2926                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2927                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2928                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2929                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2930                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2931                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2932                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2941                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2944                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2945                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2948                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2949                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2953                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2956                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2957                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2961                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2963                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2964                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2967                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2968                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2969                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2971                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2973                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2975                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2977                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2979                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2981                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2983                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2985                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2987                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2988                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2989                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2990                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2991                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2992                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2993                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2995                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2996                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2997                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2999                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3000                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3001                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3003                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3004                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3005                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3008                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3011                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3012                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3013                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3015                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3016                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3017                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3019                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3020
3021                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3023                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024                                 NUM_BANKS(ADDR_SURF_16_BANK));
3025
3026                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3028                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029                                 NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3033                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034                                 NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3038                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039                                 NUM_BANKS(ADDR_SURF_16_BANK));
3040
3041                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3043                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3044                                 NUM_BANKS(ADDR_SURF_16_BANK));
3045
3046                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3049                                 NUM_BANKS(ADDR_SURF_16_BANK));
3050
3051                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054                                 NUM_BANKS(ADDR_SURF_16_BANK));
3055
3056                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3058                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059                                 NUM_BANKS(ADDR_SURF_16_BANK));
3060
3061                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3063                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064                                 NUM_BANKS(ADDR_SURF_16_BANK));
3065
3066                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3068                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3069                                 NUM_BANKS(ADDR_SURF_16_BANK));
3070
3071                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3072                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3073                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3074                                 NUM_BANKS(ADDR_SURF_16_BANK));
3075
3076                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3078                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3079                                 NUM_BANKS(ADDR_SURF_8_BANK));
3080
3081                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3084                                 NUM_BANKS(ADDR_SURF_4_BANK));
3085
3086                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3089                                 NUM_BANKS(ADDR_SURF_4_BANK));
3090
3091                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3092                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3093
3094                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3095                         if (reg_offset != 7)
3096                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3097
3098                 break;
3099         case CHIP_STONEY:
3100                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3101                                 PIPE_CONFIG(ADDR_SURF_P2) |
3102                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3104                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3105                                 PIPE_CONFIG(ADDR_SURF_P2) |
3106                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3108                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3109                                 PIPE_CONFIG(ADDR_SURF_P2) |
3110                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3112                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3113                                 PIPE_CONFIG(ADDR_SURF_P2) |
3114                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3116                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117                                 PIPE_CONFIG(ADDR_SURF_P2) |
3118                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3119                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3120                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3121                                 PIPE_CONFIG(ADDR_SURF_P2) |
3122                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3123                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3124                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3125                                 PIPE_CONFIG(ADDR_SURF_P2) |
3126                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3127                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3128                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3129                                 PIPE_CONFIG(ADDR_SURF_P2));
3130                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3131                                 PIPE_CONFIG(ADDR_SURF_P2) |
3132                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3133                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135                                  PIPE_CONFIG(ADDR_SURF_P2) |
3136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3138                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3139                                  PIPE_CONFIG(ADDR_SURF_P2) |
3140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3142                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3143                                  PIPE_CONFIG(ADDR_SURF_P2) |
3144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3146                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3147                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3150                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3151                                  PIPE_CONFIG(ADDR_SURF_P2) |
3152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3154                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3155                                  PIPE_CONFIG(ADDR_SURF_P2) |
3156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3158                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3159                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3162                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3163                                  PIPE_CONFIG(ADDR_SURF_P2) |
3164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3166                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3167                                  PIPE_CONFIG(ADDR_SURF_P2) |
3168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3170                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3171                                  PIPE_CONFIG(ADDR_SURF_P2) |
3172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3174                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3175                                  PIPE_CONFIG(ADDR_SURF_P2) |
3176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3178                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3179                                  PIPE_CONFIG(ADDR_SURF_P2) |
3180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3182                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3183                                  PIPE_CONFIG(ADDR_SURF_P2) |
3184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3186                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3187                                  PIPE_CONFIG(ADDR_SURF_P2) |
3188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3190                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3191                                  PIPE_CONFIG(ADDR_SURF_P2) |
3192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3194                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195                                  PIPE_CONFIG(ADDR_SURF_P2) |
3196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3199                                  PIPE_CONFIG(ADDR_SURF_P2) |
3200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3202
3203                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3204                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3205                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3206                                 NUM_BANKS(ADDR_SURF_8_BANK));
3207                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3209                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3210                                 NUM_BANKS(ADDR_SURF_8_BANK));
3211                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3214                                 NUM_BANKS(ADDR_SURF_8_BANK));
3215                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3218                                 NUM_BANKS(ADDR_SURF_8_BANK));
3219                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3220                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3221                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3222                                 NUM_BANKS(ADDR_SURF_8_BANK));
3223                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3224                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3225                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3226                                 NUM_BANKS(ADDR_SURF_8_BANK));
3227                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3228                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3229                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3230                                 NUM_BANKS(ADDR_SURF_8_BANK));
3231                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3232                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3233                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234                                 NUM_BANKS(ADDR_SURF_16_BANK));
3235                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3236                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3237                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3238                                 NUM_BANKS(ADDR_SURF_16_BANK));
3239                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3240                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3241                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3242                                  NUM_BANKS(ADDR_SURF_16_BANK));
3243                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3244                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3245                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3246                                  NUM_BANKS(ADDR_SURF_16_BANK));
3247                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3248                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3249                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3250                                  NUM_BANKS(ADDR_SURF_16_BANK));
3251                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3252                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3253                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254                                  NUM_BANKS(ADDR_SURF_16_BANK));
3255                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3256                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3257                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3258                                  NUM_BANKS(ADDR_SURF_8_BANK));
3259
3260                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3261                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3262                             reg_offset != 23)
3263                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3264
3265                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3266                         if (reg_offset != 7)
3267                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3268
3269                 break;
3270         default:
3271                 dev_warn(adev->dev,
3272                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3273                          adev->asic_type);
3274
3275         case CHIP_CARRIZO:
3276                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277                                 PIPE_CONFIG(ADDR_SURF_P2) |
3278                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3279                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3280                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3281                                 PIPE_CONFIG(ADDR_SURF_P2) |
3282                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3283                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3284                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285                                 PIPE_CONFIG(ADDR_SURF_P2) |
3286                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3287                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3288                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3289                                 PIPE_CONFIG(ADDR_SURF_P2) |
3290                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3291                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3292                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3293                                 PIPE_CONFIG(ADDR_SURF_P2) |
3294                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3295                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3296                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297                                 PIPE_CONFIG(ADDR_SURF_P2) |
3298                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3299                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3300                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3301                                 PIPE_CONFIG(ADDR_SURF_P2) |
3302                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3303                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3304                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3305                                 PIPE_CONFIG(ADDR_SURF_P2));
3306                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3307                                 PIPE_CONFIG(ADDR_SURF_P2) |
3308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3309                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3311                                  PIPE_CONFIG(ADDR_SURF_P2) |
3312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3314                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3315                                  PIPE_CONFIG(ADDR_SURF_P2) |
3316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3318                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3319                                  PIPE_CONFIG(ADDR_SURF_P2) |
3320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3322                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3323                                  PIPE_CONFIG(ADDR_SURF_P2) |
3324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3326                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3327                                  PIPE_CONFIG(ADDR_SURF_P2) |
3328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3330                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3331                                  PIPE_CONFIG(ADDR_SURF_P2) |
3332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3334                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3335                                  PIPE_CONFIG(ADDR_SURF_P2) |
3336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3338                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3339                                  PIPE_CONFIG(ADDR_SURF_P2) |
3340                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3341                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3342                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3343                                  PIPE_CONFIG(ADDR_SURF_P2) |
3344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3346                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3347                                  PIPE_CONFIG(ADDR_SURF_P2) |
3348                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3349                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3350                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3351                                  PIPE_CONFIG(ADDR_SURF_P2) |
3352                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3353                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3354                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3355                                  PIPE_CONFIG(ADDR_SURF_P2) |
3356                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3357                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3358                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3359                                  PIPE_CONFIG(ADDR_SURF_P2) |
3360                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3361                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3362                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3363                                  PIPE_CONFIG(ADDR_SURF_P2) |
3364                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3365                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3366                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3367                                  PIPE_CONFIG(ADDR_SURF_P2) |
3368                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3369                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3370                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3371                                  PIPE_CONFIG(ADDR_SURF_P2) |
3372                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3373                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3374                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3375                                  PIPE_CONFIG(ADDR_SURF_P2) |
3376                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3377                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3378
3379                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3381                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3382                                 NUM_BANKS(ADDR_SURF_8_BANK));
3383                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3385                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3386                                 NUM_BANKS(ADDR_SURF_8_BANK));
3387                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3388                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3389                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3390                                 NUM_BANKS(ADDR_SURF_8_BANK));
3391                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3392                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3393                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3394                                 NUM_BANKS(ADDR_SURF_8_BANK));
3395                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3396                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3397                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3398                                 NUM_BANKS(ADDR_SURF_8_BANK));
3399                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3400                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3401                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3402                                 NUM_BANKS(ADDR_SURF_8_BANK));
3403                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3404                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3405                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3406                                 NUM_BANKS(ADDR_SURF_8_BANK));
3407                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3408                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3409                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3410                                 NUM_BANKS(ADDR_SURF_16_BANK));
3411                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3412                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3413                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3414                                 NUM_BANKS(ADDR_SURF_16_BANK));
3415                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3416                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3417                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3418                                  NUM_BANKS(ADDR_SURF_16_BANK));
3419                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3420                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3421                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3422                                  NUM_BANKS(ADDR_SURF_16_BANK));
3423                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3424                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3425                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3426                                  NUM_BANKS(ADDR_SURF_16_BANK));
3427                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3428                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3429                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3430                                  NUM_BANKS(ADDR_SURF_16_BANK));
3431                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3432                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3433                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3434                                  NUM_BANKS(ADDR_SURF_8_BANK));
3435
3436                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3437                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3438                             reg_offset != 23)
3439                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3440
3441                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3442                         if (reg_offset != 7)
3443                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3444
3445                 break;
3446         }
3447 }
3448
3449 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3450 {
3451         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3452
3453         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3454                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3455                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3456         } else if (se_num == 0xffffffff) {
3457                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3458                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3459         } else if (sh_num == 0xffffffff) {
3460                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3461                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3462         } else {
3463                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3464                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3465         }
3466         WREG32(mmGRBM_GFX_INDEX, data);
3467 }
3468
3469 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3470 {
3471         return (u32)((1ULL << bit_width) - 1);
3472 }
3473
3474 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3475 {
3476         u32 data, mask;
3477
3478         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3479         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3480
3481         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3482         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3483
3484         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3485                                        adev->gfx.config.max_sh_per_se);
3486
3487         return (~data) & mask;
3488 }
3489
3490 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3491 {
3492         int i, j;
3493         u32 data;
3494         u32 active_rbs = 0;
3495         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3496                                         adev->gfx.config.max_sh_per_se;
3497
3498         mutex_lock(&adev->grbm_idx_mutex);
3499         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3500                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3501                         gfx_v8_0_select_se_sh(adev, i, j);
3502                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3503                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3504                                                rb_bitmap_width_per_sh);
3505                 }
3506         }
3507         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3508         mutex_unlock(&adev->grbm_idx_mutex);
3509
3510         adev->gfx.config.backend_enable_mask = active_rbs;
3511         adev->gfx.config.num_rbs = hweight32(active_rbs);
3512 }
3513
3514 /**
3515  * gfx_v8_0_init_compute_vmid - gart enable
3516  *
3517  * @rdev: amdgpu_device pointer
3518  *
3519  * Initialize compute vmid sh_mem registers
3520  *
3521  */
3522 #define DEFAULT_SH_MEM_BASES    (0x6000)
3523 #define FIRST_COMPUTE_VMID      (8)
3524 #define LAST_COMPUTE_VMID       (16)
3525 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3526 {
3527         int i;
3528         uint32_t sh_mem_config;
3529         uint32_t sh_mem_bases;
3530
3531         /*
3532          * Configure apertures:
3533          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3534          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3535          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3536          */
3537         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3538
3539         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3540                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3541                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3542                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3543                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3544                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3545
3546         mutex_lock(&adev->srbm_mutex);
3547         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3548                 vi_srbm_select(adev, 0, 0, 0, i);
3549                 /* CP and shaders */
3550                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3551                 WREG32(mmSH_MEM_APE1_BASE, 1);
3552                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3553                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3554         }
3555         vi_srbm_select(adev, 0, 0, 0, 0);
3556         mutex_unlock(&adev->srbm_mutex);
3557 }
3558
3559 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3560 {
3561         u32 tmp;
3562         int i;
3563
3564         tmp = RREG32(mmGRBM_CNTL);
3565         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3566         WREG32(mmGRBM_CNTL, tmp);
3567
3568         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3569         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3570         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3571
3572         gfx_v8_0_tiling_mode_table_init(adev);
3573
3574         gfx_v8_0_setup_rb(adev);
3575         gfx_v8_0_get_cu_info(adev);
3576
3577         /* XXX SH_MEM regs */
3578         /* where to put LDS, scratch, GPUVM in FSA64 space */
3579         mutex_lock(&adev->srbm_mutex);
3580         for (i = 0; i < 16; i++) {
3581                 vi_srbm_select(adev, 0, 0, 0, i);
3582                 /* CP and shaders */
3583                 if (i == 0) {
3584                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3585                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3586                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3587                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3588                         WREG32(mmSH_MEM_CONFIG, tmp);
3589                 } else {
3590                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3591                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3592                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3593                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3594                         WREG32(mmSH_MEM_CONFIG, tmp);
3595                 }
3596
3597                 WREG32(mmSH_MEM_APE1_BASE, 1);
3598                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3599                 WREG32(mmSH_MEM_BASES, 0);
3600         }
3601         vi_srbm_select(adev, 0, 0, 0, 0);
3602         mutex_unlock(&adev->srbm_mutex);
3603
3604         gfx_v8_0_init_compute_vmid(adev);
3605
3606         mutex_lock(&adev->grbm_idx_mutex);
3607         /*
3608          * making sure that the following register writes will be broadcasted
3609          * to all the shaders
3610          */
3611         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3612
3613         WREG32(mmPA_SC_FIFO_SIZE,
3614                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3615                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3616                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3617                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3618                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3619                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3620                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3621                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3622         mutex_unlock(&adev->grbm_idx_mutex);
3623
3624 }
3625
3626 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3627 {
3628         u32 i, j, k;
3629         u32 mask;
3630
3631         mutex_lock(&adev->grbm_idx_mutex);
3632         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3633                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3634                         gfx_v8_0_select_se_sh(adev, i, j);
3635                         for (k = 0; k < adev->usec_timeout; k++) {
3636                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3637                                         break;
3638                                 udelay(1);
3639                         }
3640                 }
3641         }
3642         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3643         mutex_unlock(&adev->grbm_idx_mutex);
3644
3645         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3646                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3647                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3648                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3649         for (k = 0; k < adev->usec_timeout; k++) {
3650                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3651                         break;
3652                 udelay(1);
3653         }
3654 }
3655
3656 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3657                                                bool enable)
3658 {
3659         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3660
3661         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3662         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3663         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3664         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3665
3666         WREG32(mmCP_INT_CNTL_RING0, tmp);
3667 }
3668
3669 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3670 {
3671         /* csib */
3672         WREG32(mmRLC_CSIB_ADDR_HI,
3673                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3674         WREG32(mmRLC_CSIB_ADDR_LO,
3675                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3676         WREG32(mmRLC_CSIB_LENGTH,
3677                         adev->gfx.rlc.clear_state_size);
3678 }
3679
3680 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3681                                 int ind_offset,
3682                                 int list_size,
3683                                 int *unique_indices,
3684                                 int *indices_count,
3685                                 int max_indices,
3686                                 int *ind_start_offsets,
3687                                 int *offset_count,
3688                                 int max_offset)
3689 {
3690         int indices;
3691         bool new_entry = true;
3692
3693         for (; ind_offset < list_size; ind_offset++) {
3694
3695                 if (new_entry) {
3696                         new_entry = false;
3697                         ind_start_offsets[*offset_count] = ind_offset;
3698                         *offset_count = *offset_count + 1;
3699                         BUG_ON(*offset_count >= max_offset);
3700                 }
3701
3702                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3703                         new_entry = true;
3704                         continue;
3705                 }
3706
3707                 ind_offset += 2;
3708
3709                 /* look for the matching indice */
3710                 for (indices = 0;
3711                         indices < *indices_count;
3712                         indices++) {
3713                         if (unique_indices[indices] ==
3714                                 register_list_format[ind_offset])
3715                                 break;
3716                 }
3717
3718                 if (indices >= *indices_count) {
3719                         unique_indices[*indices_count] =
3720                                 register_list_format[ind_offset];
3721                         indices = *indices_count;
3722                         *indices_count = *indices_count + 1;
3723                         BUG_ON(*indices_count >= max_indices);
3724                 }
3725
3726                 register_list_format[ind_offset] = indices;
3727         }
3728 }
3729
3730 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3731 {
3732         int i, temp, data;
3733         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3734         int indices_count = 0;
3735         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3736         int offset_count = 0;
3737
3738         int list_size;
3739         unsigned int *register_list_format =
3740                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3741         if (register_list_format == NULL)
3742                 return -ENOMEM;
3743         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3744                         adev->gfx.rlc.reg_list_format_size_bytes);
3745
3746         gfx_v8_0_parse_ind_reg_list(register_list_format,
3747                                 RLC_FormatDirectRegListLength,
3748                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3749                                 unique_indices,
3750                                 &indices_count,
3751                                 sizeof(unique_indices) / sizeof(int),
3752                                 indirect_start_offsets,
3753                                 &offset_count,
3754                                 sizeof(indirect_start_offsets)/sizeof(int));
3755
3756         /* save and restore list */
3757         temp = RREG32(mmRLC_SRM_CNTL);
3758         temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3759         WREG32(mmRLC_SRM_CNTL, temp);
3760
3761         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3762         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3763                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3764
3765         /* indirect list */
3766         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3767         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3768                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3769
3770         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3771         list_size = list_size >> 1;
3772         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3773         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3774
3775         /* starting offsets starts */
3776         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3777                 adev->gfx.rlc.starting_offsets_start);
3778         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3779                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3780                                 indirect_start_offsets[i]);
3781
3782         /* unique indices */
3783         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3784         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3785         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3786                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3787                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3788         }
3789         kfree(register_list_format);
3790
3791         return 0;
3792 }
3793
3794 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3795 {
3796         uint32_t data;
3797
3798         data = RREG32(mmRLC_SRM_CNTL);
3799         data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3800         WREG32(mmRLC_SRM_CNTL, data);
3801 }
3802
3803 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3804 {
3805         uint32_t data;
3806
3807         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3808                               AMD_PG_SUPPORT_GFX_SMG |
3809                               AMD_PG_SUPPORT_GFX_DMG)) {
3810                 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3811                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3812                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3813                 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3814
3815                 data = 0;
3816                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3817                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3818                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3819                 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3820                 WREG32(mmRLC_PG_DELAY, data);
3821
3822                 data = RREG32(mmRLC_PG_DELAY_2);
3823                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3824                 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3825                 WREG32(mmRLC_PG_DELAY_2, data);
3826
3827                 data = RREG32(mmRLC_AUTO_PG_CTRL);
3828                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3829                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3830                 WREG32(mmRLC_AUTO_PG_CTRL, data);
3831         }
3832 }
3833
3834 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3835                                                 bool enable)
3836 {
3837         u32 data, orig;
3838
3839         orig = data = RREG32(mmRLC_PG_CNTL);
3840
3841         if (enable)
3842                 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3843         else
3844                 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3845
3846         if (orig != data)
3847                 WREG32(mmRLC_PG_CNTL, data);
3848 }
3849
3850 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3851                                                   bool enable)
3852 {
3853         u32 data, orig;
3854
3855         orig = data = RREG32(mmRLC_PG_CNTL);
3856
3857         if (enable)
3858                 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3859         else
3860                 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3861
3862         if (orig != data)
3863                 WREG32(mmRLC_PG_CNTL, data);
3864 }
3865
3866 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3867 {
3868         u32 data, orig;
3869
3870         orig = data = RREG32(mmRLC_PG_CNTL);
3871
3872         if (enable)
3873                 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3874         else
3875                 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3876
3877         if (orig != data)
3878                 WREG32(mmRLC_PG_CNTL, data);
3879 }
3880
3881 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3882 {
3883         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3884                               AMD_PG_SUPPORT_GFX_SMG |
3885                               AMD_PG_SUPPORT_GFX_DMG |
3886                               AMD_PG_SUPPORT_CP |
3887                               AMD_PG_SUPPORT_GDS |
3888                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3889                 gfx_v8_0_init_csb(adev);
3890                 gfx_v8_0_init_save_restore_list(adev);
3891                 gfx_v8_0_enable_save_restore_machine(adev);
3892
3893                 if ((adev->asic_type == CHIP_CARRIZO) ||
3894                     (adev->asic_type == CHIP_STONEY)) {
3895                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3896                         gfx_v8_0_init_power_gating(adev);
3897                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3898                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3899                                 cz_enable_sck_slow_down_on_power_up(adev, true);
3900                                 cz_enable_sck_slow_down_on_power_down(adev, true);
3901                         } else {
3902                                 cz_enable_sck_slow_down_on_power_up(adev, false);
3903                                 cz_enable_sck_slow_down_on_power_down(adev, false);
3904                         }
3905                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3906                                 cz_enable_cp_power_gating(adev, true);
3907                         else
3908                                 cz_enable_cp_power_gating(adev, false);
3909                 } else if (adev->asic_type == CHIP_POLARIS11) {
3910                         gfx_v8_0_init_power_gating(adev);
3911                 }
3912         }
3913 }
3914
3915 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3916 {
3917         u32 tmp = RREG32(mmRLC_CNTL);
3918
3919         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3920         WREG32(mmRLC_CNTL, tmp);
3921
3922         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3923
3924         gfx_v8_0_wait_for_rlc_serdes(adev);
3925 }
3926
3927 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3928 {
3929         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3930
3931         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3932         WREG32(mmGRBM_SOFT_RESET, tmp);
3933         udelay(50);
3934         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3935         WREG32(mmGRBM_SOFT_RESET, tmp);
3936         udelay(50);
3937 }
3938
3939 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3940 {
3941         u32 tmp = RREG32(mmRLC_CNTL);
3942
3943         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3944         WREG32(mmRLC_CNTL, tmp);
3945
3946         /* carrizo do enable cp interrupt after cp inited */
3947         if (!(adev->flags & AMD_IS_APU))
3948                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3949
3950         udelay(50);
3951 }
3952
3953 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3954 {
3955         const struct rlc_firmware_header_v2_0 *hdr;
3956         const __le32 *fw_data;
3957         unsigned i, fw_size;
3958
3959         if (!adev->gfx.rlc_fw)
3960                 return -EINVAL;
3961
3962         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3963         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3964
3965         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3966                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3967         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3968
3969         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3970         for (i = 0; i < fw_size; i++)
3971                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3972         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3973
3974         return 0;
3975 }
3976
3977 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3978 {
3979         int r;
3980
3981         gfx_v8_0_rlc_stop(adev);
3982
3983         /* disable CG */
3984         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3985         if (adev->asic_type == CHIP_POLARIS11 ||
3986                 adev->asic_type == CHIP_POLARIS10)
3987                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3988
3989         /* disable PG */
3990         WREG32(mmRLC_PG_CNTL, 0);
3991
3992         gfx_v8_0_rlc_reset(adev);
3993
3994         gfx_v8_0_init_pg(adev);
3995
3996         if (!adev->pp_enabled) {
3997                 if (!adev->firmware.smu_load) {
3998                         /* legacy rlc firmware loading */
3999                         r = gfx_v8_0_rlc_load_microcode(adev);
4000                         if (r)
4001                                 return r;
4002                 } else {
4003                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4004                                                         AMDGPU_UCODE_ID_RLC_G);
4005                         if (r)
4006                                 return -EINVAL;
4007                 }
4008         }
4009
4010         gfx_v8_0_rlc_start(adev);
4011
4012         return 0;
4013 }
4014
4015 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4016 {
4017         int i;
4018         u32 tmp = RREG32(mmCP_ME_CNTL);
4019
4020         if (enable) {
4021                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4022                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4023                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4024         } else {
4025                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4026                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4027                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4028                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4029                         adev->gfx.gfx_ring[i].ready = false;
4030         }
4031         WREG32(mmCP_ME_CNTL, tmp);
4032         udelay(50);
4033 }
4034
4035 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4036 {
4037         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4038         const struct gfx_firmware_header_v1_0 *ce_hdr;
4039         const struct gfx_firmware_header_v1_0 *me_hdr;
4040         const __le32 *fw_data;
4041         unsigned i, fw_size;
4042
4043         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4044                 return -EINVAL;
4045
4046         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4047                 adev->gfx.pfp_fw->data;
4048         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4049                 adev->gfx.ce_fw->data;
4050         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4051                 adev->gfx.me_fw->data;
4052
4053         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4054         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4055         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4056
4057         gfx_v8_0_cp_gfx_enable(adev, false);
4058
4059         /* PFP */
4060         fw_data = (const __le32 *)
4061                 (adev->gfx.pfp_fw->data +
4062                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4063         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4064         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4065         for (i = 0; i < fw_size; i++)
4066                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4067         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4068
4069         /* CE */
4070         fw_data = (const __le32 *)
4071                 (adev->gfx.ce_fw->data +
4072                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4073         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4074         WREG32(mmCP_CE_UCODE_ADDR, 0);
4075         for (i = 0; i < fw_size; i++)
4076                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4077         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4078
4079         /* ME */
4080         fw_data = (const __le32 *)
4081                 (adev->gfx.me_fw->data +
4082                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4083         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4084         WREG32(mmCP_ME_RAM_WADDR, 0);
4085         for (i = 0; i < fw_size; i++)
4086                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4087         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4088
4089         return 0;
4090 }
4091
4092 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4093 {
4094         u32 count = 0;
4095         const struct cs_section_def *sect = NULL;
4096         const struct cs_extent_def *ext = NULL;
4097
4098         /* begin clear state */
4099         count += 2;
4100         /* context control state */
4101         count += 3;
4102
4103         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4104                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4105                         if (sect->id == SECT_CONTEXT)
4106                                 count += 2 + ext->reg_count;
4107                         else
4108                                 return 0;
4109                 }
4110         }
4111         /* pa_sc_raster_config/pa_sc_raster_config1 */
4112         count += 4;
4113         /* end clear state */
4114         count += 2;
4115         /* clear state */
4116         count += 2;
4117
4118         return count;
4119 }
4120
4121 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4122 {
4123         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4124         const struct cs_section_def *sect = NULL;
4125         const struct cs_extent_def *ext = NULL;
4126         int r, i;
4127
4128         /* init the CP */
4129         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4130         WREG32(mmCP_ENDIAN_SWAP, 0);
4131         WREG32(mmCP_DEVICE_ID, 1);
4132
4133         gfx_v8_0_cp_gfx_enable(adev, true);
4134
4135         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4136         if (r) {
4137                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4138                 return r;
4139         }
4140
4141         /* clear state buffer */
4142         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4143         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4144
4145         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4146         amdgpu_ring_write(ring, 0x80000000);
4147         amdgpu_ring_write(ring, 0x80000000);
4148
4149         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4150                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4151                         if (sect->id == SECT_CONTEXT) {
4152                                 amdgpu_ring_write(ring,
4153                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4154                                                ext->reg_count));
4155                                 amdgpu_ring_write(ring,
4156                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4157                                 for (i = 0; i < ext->reg_count; i++)
4158                                         amdgpu_ring_write(ring, ext->extent[i]);
4159                         }
4160                 }
4161         }
4162
4163         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4164         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4165         switch (adev->asic_type) {
4166         case CHIP_TONGA:
4167         case CHIP_POLARIS10:
4168                 amdgpu_ring_write(ring, 0x16000012);
4169                 amdgpu_ring_write(ring, 0x0000002A);
4170                 break;
4171         case CHIP_POLARIS11:
4172                 amdgpu_ring_write(ring, 0x16000012);
4173                 amdgpu_ring_write(ring, 0x00000000);
4174                 break;
4175         case CHIP_FIJI:
4176                 amdgpu_ring_write(ring, 0x3a00161a);
4177                 amdgpu_ring_write(ring, 0x0000002e);
4178                 break;
4179         case CHIP_CARRIZO:
4180                 amdgpu_ring_write(ring, 0x00000002);
4181                 amdgpu_ring_write(ring, 0x00000000);
4182                 break;
4183         case CHIP_TOPAZ:
4184                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4185                                 0x00000000 : 0x00000002);
4186                 amdgpu_ring_write(ring, 0x00000000);
4187                 break;
4188         case CHIP_STONEY:
4189                 amdgpu_ring_write(ring, 0x00000000);
4190                 amdgpu_ring_write(ring, 0x00000000);
4191                 break;
4192         default:
4193                 BUG();
4194         }
4195
4196         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4197         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4198
4199         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4200         amdgpu_ring_write(ring, 0);
4201
4202         /* init the CE partitions */
4203         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4204         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4205         amdgpu_ring_write(ring, 0x8000);
4206         amdgpu_ring_write(ring, 0x8000);
4207
4208         amdgpu_ring_commit(ring);
4209
4210         return 0;
4211 }
4212
4213 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4214 {
4215         struct amdgpu_ring *ring;
4216         u32 tmp;
4217         u32 rb_bufsz;
4218         u64 rb_addr, rptr_addr;
4219         int r;
4220
4221         /* Set the write pointer delay */
4222         WREG32(mmCP_RB_WPTR_DELAY, 0);
4223
4224         /* set the RB to use vmid 0 */
4225         WREG32(mmCP_RB_VMID, 0);
4226
4227         /* Set ring buffer size */
4228         ring = &adev->gfx.gfx_ring[0];
4229         rb_bufsz = order_base_2(ring->ring_size / 8);
4230         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4231         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4232         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4233         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4234 #ifdef __BIG_ENDIAN
4235         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4236 #endif
4237         WREG32(mmCP_RB0_CNTL, tmp);
4238
4239         /* Initialize the ring buffer's read and write pointers */
4240         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4241         ring->wptr = 0;
4242         WREG32(mmCP_RB0_WPTR, ring->wptr);
4243
4244         /* set the wb address wether it's enabled or not */
4245         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4246         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4247         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4248
4249         mdelay(1);
4250         WREG32(mmCP_RB0_CNTL, tmp);
4251
4252         rb_addr = ring->gpu_addr >> 8;
4253         WREG32(mmCP_RB0_BASE, rb_addr);
4254         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4255
4256         /* no gfx doorbells on iceland */
4257         if (adev->asic_type != CHIP_TOPAZ) {
4258                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4259                 if (ring->use_doorbell) {
4260                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4261                                             DOORBELL_OFFSET, ring->doorbell_index);
4262                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4263                                             DOORBELL_HIT, 0);
4264                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4265                                             DOORBELL_EN, 1);
4266                 } else {
4267                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4268                                             DOORBELL_EN, 0);
4269                 }
4270                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4271
4272                 if (adev->asic_type == CHIP_TONGA) {
4273                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4274                                             DOORBELL_RANGE_LOWER,
4275                                             AMDGPU_DOORBELL_GFX_RING0);
4276                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4277
4278                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4279                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4280                 }
4281
4282         }
4283
4284         /* start the ring */
4285         gfx_v8_0_cp_gfx_start(adev);
4286         ring->ready = true;
4287         r = amdgpu_ring_test_ring(ring);
4288         if (r) {
4289                 ring->ready = false;
4290                 return r;
4291         }
4292
4293         return 0;
4294 }
4295
4296 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4297 {
4298         int i;
4299
4300         if (enable) {
4301                 WREG32(mmCP_MEC_CNTL, 0);
4302         } else {
4303                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4304                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4305                         adev->gfx.compute_ring[i].ready = false;
4306         }
4307         udelay(50);
4308 }
4309
4310 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4311 {
4312         const struct gfx_firmware_header_v1_0 *mec_hdr;
4313         const __le32 *fw_data;
4314         unsigned i, fw_size;
4315
4316         if (!adev->gfx.mec_fw)
4317                 return -EINVAL;
4318
4319         gfx_v8_0_cp_compute_enable(adev, false);
4320
4321         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4322         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4323
4324         fw_data = (const __le32 *)
4325                 (adev->gfx.mec_fw->data +
4326                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4327         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4328
4329         /* MEC1 */
4330         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4331         for (i = 0; i < fw_size; i++)
4332                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4333         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4334
4335         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4336         if (adev->gfx.mec2_fw) {
4337                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4338
4339                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4340                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4341
4342                 fw_data = (const __le32 *)
4343                         (adev->gfx.mec2_fw->data +
4344                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4345                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4346
4347                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4348                 for (i = 0; i < fw_size; i++)
4349                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4350                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4351         }
4352
4353         return 0;
4354 }
4355
4356 struct vi_mqd {
4357         uint32_t header;  /* ordinal0 */
4358         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4359         uint32_t compute_dim_x;  /* ordinal2 */
4360         uint32_t compute_dim_y;  /* ordinal3 */
4361         uint32_t compute_dim_z;  /* ordinal4 */
4362         uint32_t compute_start_x;  /* ordinal5 */
4363         uint32_t compute_start_y;  /* ordinal6 */
4364         uint32_t compute_start_z;  /* ordinal7 */
4365         uint32_t compute_num_thread_x;  /* ordinal8 */
4366         uint32_t compute_num_thread_y;  /* ordinal9 */
4367         uint32_t compute_num_thread_z;  /* ordinal10 */
4368         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4369         uint32_t compute_perfcount_enable;  /* ordinal12 */
4370         uint32_t compute_pgm_lo;  /* ordinal13 */
4371         uint32_t compute_pgm_hi;  /* ordinal14 */
4372         uint32_t compute_tba_lo;  /* ordinal15 */
4373         uint32_t compute_tba_hi;  /* ordinal16 */
4374         uint32_t compute_tma_lo;  /* ordinal17 */
4375         uint32_t compute_tma_hi;  /* ordinal18 */
4376         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4377         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4378         uint32_t compute_vmid;  /* ordinal21 */
4379         uint32_t compute_resource_limits;  /* ordinal22 */
4380         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4381         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4382         uint32_t compute_tmpring_size;  /* ordinal25 */
4383         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4384         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4385         uint32_t compute_restart_x;  /* ordinal28 */
4386         uint32_t compute_restart_y;  /* ordinal29 */
4387         uint32_t compute_restart_z;  /* ordinal30 */
4388         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4389         uint32_t compute_misc_reserved;  /* ordinal32 */
4390         uint32_t compute_dispatch_id;  /* ordinal33 */
4391         uint32_t compute_threadgroup_id;  /* ordinal34 */
4392         uint32_t compute_relaunch;  /* ordinal35 */
4393         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4394         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4395         uint32_t compute_wave_restore_control;  /* ordinal38 */
4396         uint32_t reserved9;  /* ordinal39 */
4397         uint32_t reserved10;  /* ordinal40 */
4398         uint32_t reserved11;  /* ordinal41 */
4399         uint32_t reserved12;  /* ordinal42 */
4400         uint32_t reserved13;  /* ordinal43 */
4401         uint32_t reserved14;  /* ordinal44 */
4402         uint32_t reserved15;  /* ordinal45 */
4403         uint32_t reserved16;  /* ordinal46 */
4404         uint32_t reserved17;  /* ordinal47 */
4405         uint32_t reserved18;  /* ordinal48 */
4406         uint32_t reserved19;  /* ordinal49 */
4407         uint32_t reserved20;  /* ordinal50 */
4408         uint32_t reserved21;  /* ordinal51 */
4409         uint32_t reserved22;  /* ordinal52 */
4410         uint32_t reserved23;  /* ordinal53 */
4411         uint32_t reserved24;  /* ordinal54 */
4412         uint32_t reserved25;  /* ordinal55 */
4413         uint32_t reserved26;  /* ordinal56 */
4414         uint32_t reserved27;  /* ordinal57 */
4415         uint32_t reserved28;  /* ordinal58 */
4416         uint32_t reserved29;  /* ordinal59 */
4417         uint32_t reserved30;  /* ordinal60 */
4418         uint32_t reserved31;  /* ordinal61 */
4419         uint32_t reserved32;  /* ordinal62 */
4420         uint32_t reserved33;  /* ordinal63 */
4421         uint32_t reserved34;  /* ordinal64 */
4422         uint32_t compute_user_data_0;  /* ordinal65 */
4423         uint32_t compute_user_data_1;  /* ordinal66 */
4424         uint32_t compute_user_data_2;  /* ordinal67 */
4425         uint32_t compute_user_data_3;  /* ordinal68 */
4426         uint32_t compute_user_data_4;  /* ordinal69 */
4427         uint32_t compute_user_data_5;  /* ordinal70 */
4428         uint32_t compute_user_data_6;  /* ordinal71 */
4429         uint32_t compute_user_data_7;  /* ordinal72 */
4430         uint32_t compute_user_data_8;  /* ordinal73 */
4431         uint32_t compute_user_data_9;  /* ordinal74 */
4432         uint32_t compute_user_data_10;  /* ordinal75 */
4433         uint32_t compute_user_data_11;  /* ordinal76 */
4434         uint32_t compute_user_data_12;  /* ordinal77 */
4435         uint32_t compute_user_data_13;  /* ordinal78 */
4436         uint32_t compute_user_data_14;  /* ordinal79 */
4437         uint32_t compute_user_data_15;  /* ordinal80 */
4438         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4439         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4440         uint32_t reserved35;  /* ordinal83 */
4441         uint32_t reserved36;  /* ordinal84 */
4442         uint32_t reserved37;  /* ordinal85 */
4443         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4444         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4445         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4446         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4447         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4448         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4449         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4450         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4451         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4452         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4453         uint32_t reserved38;  /* ordinal96 */
4454         uint32_t reserved39;  /* ordinal97 */
4455         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4456         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4457         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4458         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4459         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4460         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4461         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4462         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4463         uint32_t reserved40;  /* ordinal106 */
4464         uint32_t reserved41;  /* ordinal107 */
4465         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4466         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4467         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4468         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4469         uint32_t reserved42;  /* ordinal112 */
4470         uint32_t reserved43;  /* ordinal113 */
4471         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4472         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4473         uint32_t cp_packet_id_lo;  /* ordinal116 */
4474         uint32_t cp_packet_id_hi;  /* ordinal117 */
4475         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4476         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4477         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4478         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4479         uint32_t gds_save_mask_lo;  /* ordinal122 */
4480         uint32_t gds_save_mask_hi;  /* ordinal123 */
4481         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4482         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4483         uint32_t reserved44;  /* ordinal126 */
4484         uint32_t reserved45;  /* ordinal127 */
4485         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4486         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4487         uint32_t cp_hqd_active;  /* ordinal130 */
4488         uint32_t cp_hqd_vmid;  /* ordinal131 */
4489         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4490         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4491         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4492         uint32_t cp_hqd_quantum;  /* ordinal135 */
4493         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4494         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4495         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4496         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4497         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4498         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4499         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4500         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4501         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4502         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4503         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4504         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4505         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4506         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4507         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4508         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4509         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4510         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4511         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4512         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4513         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4514         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4515         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4516         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4517         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4518         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4519         uint32_t cp_mqd_control;  /* ordinal162 */
4520         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4521         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4522         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4523         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4524         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4525         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4526         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4527         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4528         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4529         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4530         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4531         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4532         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4533         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4534         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4535         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4536         uint32_t cp_hqd_error;  /* ordinal179 */
4537         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4538         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4539         uint32_t reserved46;  /* ordinal182 */
4540         uint32_t reserved47;  /* ordinal183 */
4541         uint32_t reserved48;  /* ordinal184 */
4542         uint32_t reserved49;  /* ordinal185 */
4543         uint32_t reserved50;  /* ordinal186 */
4544         uint32_t reserved51;  /* ordinal187 */
4545         uint32_t reserved52;  /* ordinal188 */
4546         uint32_t reserved53;  /* ordinal189 */
4547         uint32_t reserved54;  /* ordinal190 */
4548         uint32_t reserved55;  /* ordinal191 */
4549         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4550         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4551         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4552         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4553         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4554         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4555         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4556         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4557         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4558         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4559         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4560         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4561         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4562         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4563         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4564         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4565         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4566         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4567         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4568         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4569         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4570         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4571         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4572         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4573         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4574         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4575         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4576         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4577         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4578         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4579         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4580         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4581         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4582         uint32_t reserved56;  /* ordinal225 */
4583         uint32_t reserved57;  /* ordinal226 */
4584         uint32_t reserved58;  /* ordinal227 */
4585         uint32_t set_resources_header;  /* ordinal228 */
4586         uint32_t set_resources_dw1;  /* ordinal229 */
4587         uint32_t set_resources_dw2;  /* ordinal230 */
4588         uint32_t set_resources_dw3;  /* ordinal231 */
4589         uint32_t set_resources_dw4;  /* ordinal232 */
4590         uint32_t set_resources_dw5;  /* ordinal233 */
4591         uint32_t set_resources_dw6;  /* ordinal234 */
4592         uint32_t set_resources_dw7;  /* ordinal235 */
4593         uint32_t reserved59;  /* ordinal236 */
4594         uint32_t reserved60;  /* ordinal237 */
4595         uint32_t reserved61;  /* ordinal238 */
4596         uint32_t reserved62;  /* ordinal239 */
4597         uint32_t reserved63;  /* ordinal240 */
4598         uint32_t reserved64;  /* ordinal241 */
4599         uint32_t reserved65;  /* ordinal242 */
4600         uint32_t reserved66;  /* ordinal243 */
4601         uint32_t reserved67;  /* ordinal244 */
4602         uint32_t reserved68;  /* ordinal245 */
4603         uint32_t reserved69;  /* ordinal246 */
4604         uint32_t reserved70;  /* ordinal247 */
4605         uint32_t reserved71;  /* ordinal248 */
4606         uint32_t reserved72;  /* ordinal249 */
4607         uint32_t reserved73;  /* ordinal250 */
4608         uint32_t reserved74;  /* ordinal251 */
4609         uint32_t reserved75;  /* ordinal252 */
4610         uint32_t reserved76;  /* ordinal253 */
4611         uint32_t reserved77;  /* ordinal254 */
4612         uint32_t reserved78;  /* ordinal255 */
4613
4614         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4615 };
4616
4617 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4618 {
4619         int i, r;
4620
4621         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4622                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4623
4624                 if (ring->mqd_obj) {
4625                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4626                         if (unlikely(r != 0))
4627                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4628
4629                         amdgpu_bo_unpin(ring->mqd_obj);
4630                         amdgpu_bo_unreserve(ring->mqd_obj);
4631
4632                         amdgpu_bo_unref(&ring->mqd_obj);
4633                         ring->mqd_obj = NULL;
4634                 }
4635         }
4636 }
4637
4638 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4639 {
4640         int r, i, j;
4641         u32 tmp;
4642         bool use_doorbell = true;
4643         u64 hqd_gpu_addr;
4644         u64 mqd_gpu_addr;
4645         u64 eop_gpu_addr;
4646         u64 wb_gpu_addr;
4647         u32 *buf;
4648         struct vi_mqd *mqd;
4649
4650         /* init the pipes */
4651         mutex_lock(&adev->srbm_mutex);
4652         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4653                 int me = (i < 4) ? 1 : 2;
4654                 int pipe = (i < 4) ? i : (i - 4);
4655
4656                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4657                 eop_gpu_addr >>= 8;
4658
4659                 vi_srbm_select(adev, me, pipe, 0, 0);
4660
4661                 /* write the EOP addr */
4662                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4663                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4664
4665                 /* set the VMID assigned */
4666                 WREG32(mmCP_HQD_VMID, 0);
4667
4668                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4669                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4670                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4671                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4672                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4673         }
4674         vi_srbm_select(adev, 0, 0, 0, 0);
4675         mutex_unlock(&adev->srbm_mutex);
4676
4677         /* init the queues.  Just two for now. */
4678         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4679                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4680
4681                 if (ring->mqd_obj == NULL) {
4682                         r = amdgpu_bo_create(adev,
4683                                              sizeof(struct vi_mqd),
4684                                              PAGE_SIZE, true,
4685                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4686                                              NULL, &ring->mqd_obj);
4687                         if (r) {
4688                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4689                                 return r;
4690                         }
4691                 }
4692
4693                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4694                 if (unlikely(r != 0)) {
4695                         gfx_v8_0_cp_compute_fini(adev);
4696                         return r;
4697                 }
4698                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4699                                   &mqd_gpu_addr);
4700                 if (r) {
4701                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4702                         gfx_v8_0_cp_compute_fini(adev);
4703                         return r;
4704                 }
4705                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4706                 if (r) {
4707                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4708                         gfx_v8_0_cp_compute_fini(adev);
4709                         return r;
4710                 }
4711
4712                 /* init the mqd struct */
4713                 memset(buf, 0, sizeof(struct vi_mqd));
4714
4715                 mqd = (struct vi_mqd *)buf;
4716                 mqd->header = 0xC0310800;
4717                 mqd->compute_pipelinestat_enable = 0x00000001;
4718                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4719                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4720                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4721                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4722                 mqd->compute_misc_reserved = 0x00000003;
4723
4724                 mutex_lock(&adev->srbm_mutex);
4725                 vi_srbm_select(adev, ring->me,
4726                                ring->pipe,
4727                                ring->queue, 0);
4728
4729                 /* disable wptr polling */
4730                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4731                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4732                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4733
4734                 mqd->cp_hqd_eop_base_addr_lo =
4735                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4736                 mqd->cp_hqd_eop_base_addr_hi =
4737                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4738
4739                 /* enable doorbell? */
4740                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4741                 if (use_doorbell) {
4742                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4743                 } else {
4744                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4745                 }
4746                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4747                 mqd->cp_hqd_pq_doorbell_control = tmp;
4748
4749                 /* disable the queue if it's active */
4750                 mqd->cp_hqd_dequeue_request = 0;
4751                 mqd->cp_hqd_pq_rptr = 0;
4752                 mqd->cp_hqd_pq_wptr= 0;
4753                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4754                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4755                         for (j = 0; j < adev->usec_timeout; j++) {
4756                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4757                                         break;
4758                                 udelay(1);
4759                         }
4760                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4761                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4762                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4763                 }
4764
4765                 /* set the pointer to the MQD */
4766                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4767                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4768                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4769                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4770
4771                 /* set MQD vmid to 0 */
4772                 tmp = RREG32(mmCP_MQD_CONTROL);
4773                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4774                 WREG32(mmCP_MQD_CONTROL, tmp);
4775                 mqd->cp_mqd_control = tmp;
4776
4777                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4778                 hqd_gpu_addr = ring->gpu_addr >> 8;
4779                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4780                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4781                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4782                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4783
4784                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4785                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4786                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4787                                     (order_base_2(ring->ring_size / 4) - 1));
4788                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4789                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4790 #ifdef __BIG_ENDIAN
4791                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4792 #endif
4793                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4794                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4795                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4796                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4797                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4798                 mqd->cp_hqd_pq_control = tmp;
4799
4800                 /* set the wb address wether it's enabled or not */
4801                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4802                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4803                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4804                         upper_32_bits(wb_gpu_addr) & 0xffff;
4805                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4806                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4807                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4808                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4809
4810                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4811                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4812                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4813                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4814                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4815                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4816                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4817
4818                 /* enable the doorbell if requested */
4819                 if (use_doorbell) {
4820                         if ((adev->asic_type == CHIP_CARRIZO) ||
4821                             (adev->asic_type == CHIP_FIJI) ||
4822                             (adev->asic_type == CHIP_STONEY) ||
4823                             (adev->asic_type == CHIP_POLARIS11) ||
4824                             (adev->asic_type == CHIP_POLARIS10)) {
4825                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4826                                        AMDGPU_DOORBELL_KIQ << 2);
4827                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4828                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4829                         }
4830                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4831                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4832                                             DOORBELL_OFFSET, ring->doorbell_index);
4833                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4834                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4835                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4836                         mqd->cp_hqd_pq_doorbell_control = tmp;
4837
4838                 } else {
4839                         mqd->cp_hqd_pq_doorbell_control = 0;
4840                 }
4841                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4842                        mqd->cp_hqd_pq_doorbell_control);
4843
4844                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4845                 ring->wptr = 0;
4846                 mqd->cp_hqd_pq_wptr = ring->wptr;
4847                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4848                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4849
4850                 /* set the vmid for the queue */
4851                 mqd->cp_hqd_vmid = 0;
4852                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4853
4854                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4855                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4856                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4857                 mqd->cp_hqd_persistent_state = tmp;
4858                 if (adev->asic_type == CHIP_STONEY ||
4859                         adev->asic_type == CHIP_POLARIS11 ||
4860                         adev->asic_type == CHIP_POLARIS10) {
4861                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4862                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4863                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4864                 }
4865
4866                 /* activate the queue */
4867                 mqd->cp_hqd_active = 1;
4868                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4869
4870                 vi_srbm_select(adev, 0, 0, 0, 0);
4871                 mutex_unlock(&adev->srbm_mutex);
4872
4873                 amdgpu_bo_kunmap(ring->mqd_obj);
4874                 amdgpu_bo_unreserve(ring->mqd_obj);
4875         }
4876
4877         if (use_doorbell) {
4878                 tmp = RREG32(mmCP_PQ_STATUS);
4879                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4880                 WREG32(mmCP_PQ_STATUS, tmp);
4881         }
4882
4883         gfx_v8_0_cp_compute_enable(adev, true);
4884
4885         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4886                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4887
4888                 ring->ready = true;
4889                 r = amdgpu_ring_test_ring(ring);
4890                 if (r)
4891                         ring->ready = false;
4892         }
4893
4894         return 0;
4895 }
4896
4897 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4898 {
4899         int r;
4900
4901         if (!(adev->flags & AMD_IS_APU))
4902                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4903
4904         if (!adev->pp_enabled) {
4905                 if (!adev->firmware.smu_load) {
4906                         /* legacy firmware loading */
4907                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4908                         if (r)
4909                                 return r;
4910
4911                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4912                         if (r)
4913                                 return r;
4914                 } else {
4915                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4916                                                         AMDGPU_UCODE_ID_CP_CE);
4917                         if (r)
4918                                 return -EINVAL;
4919
4920                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4921                                                         AMDGPU_UCODE_ID_CP_PFP);
4922                         if (r)
4923                                 return -EINVAL;
4924
4925                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4926                                                         AMDGPU_UCODE_ID_CP_ME);
4927                         if (r)
4928                                 return -EINVAL;
4929
4930                         if (adev->asic_type == CHIP_TOPAZ) {
4931                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4932                                 if (r)
4933                                         return r;
4934                         } else {
4935                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4936                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4937                                 if (r)
4938                                         return -EINVAL;
4939                         }
4940                 }
4941         }
4942
4943         r = gfx_v8_0_cp_gfx_resume(adev);
4944         if (r)
4945                 return r;
4946
4947         r = gfx_v8_0_cp_compute_resume(adev);
4948         if (r)
4949                 return r;
4950
4951         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4952
4953         return 0;
4954 }
4955
4956 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4957 {
4958         gfx_v8_0_cp_gfx_enable(adev, enable);
4959         gfx_v8_0_cp_compute_enable(adev, enable);
4960 }
4961
4962 static int gfx_v8_0_hw_init(void *handle)
4963 {
4964         int r;
4965         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4966
4967         gfx_v8_0_init_golden_registers(adev);
4968
4969         gfx_v8_0_gpu_init(adev);
4970
4971         r = gfx_v8_0_rlc_resume(adev);
4972         if (r)
4973                 return r;
4974
4975         r = gfx_v8_0_cp_resume(adev);
4976         if (r)
4977                 return r;
4978
4979         return r;
4980 }
4981
4982 static int gfx_v8_0_hw_fini(void *handle)
4983 {
4984         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4985
4986         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4987         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4988         gfx_v8_0_cp_enable(adev, false);
4989         gfx_v8_0_rlc_stop(adev);
4990         gfx_v8_0_cp_compute_fini(adev);
4991
4992         amdgpu_set_powergating_state(adev,
4993                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4994
4995         return 0;
4996 }
4997
4998 static int gfx_v8_0_suspend(void *handle)
4999 {
5000         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5001
5002         return gfx_v8_0_hw_fini(adev);
5003 }
5004
5005 static int gfx_v8_0_resume(void *handle)
5006 {
5007         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5008
5009         return gfx_v8_0_hw_init(adev);
5010 }
5011
5012 static bool gfx_v8_0_is_idle(void *handle)
5013 {
5014         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5015
5016         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5017                 return false;
5018         else
5019                 return true;
5020 }
5021
5022 static int gfx_v8_0_wait_for_idle(void *handle)
5023 {
5024         unsigned i;
5025         u32 tmp;
5026         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5027
5028         for (i = 0; i < adev->usec_timeout; i++) {
5029                 /* read MC_STATUS */
5030                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
5031
5032                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5033                         return 0;
5034                 udelay(1);
5035         }
5036         return -ETIMEDOUT;
5037 }
5038
5039 static int gfx_v8_0_soft_reset(void *handle)
5040 {
5041         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5042         u32 tmp;
5043         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5044
5045         /* GRBM_STATUS */
5046         tmp = RREG32(mmGRBM_STATUS);
5047         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5048                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5049                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5050                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5051                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5052                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
5053                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5054                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5055                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5056                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5057         }
5058
5059         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5060                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5061                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5062                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5063                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5064         }
5065
5066         /* GRBM_STATUS2 */
5067         tmp = RREG32(mmGRBM_STATUS2);
5068         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5069                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5070                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5071
5072         /* SRBM_STATUS */
5073         tmp = RREG32(mmSRBM_STATUS);
5074         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5075                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5076                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5077
5078         if (grbm_soft_reset || srbm_soft_reset) {
5079                 /* stop the rlc */
5080                 gfx_v8_0_rlc_stop(adev);
5081
5082                 /* Disable GFX parsing/prefetching */
5083                 gfx_v8_0_cp_gfx_enable(adev, false);
5084
5085                 /* Disable MEC parsing/prefetching */
5086                 gfx_v8_0_cp_compute_enable(adev, false);
5087
5088                 if (grbm_soft_reset || srbm_soft_reset) {
5089                         tmp = RREG32(mmGMCON_DEBUG);
5090                         tmp = REG_SET_FIELD(tmp,
5091                                             GMCON_DEBUG, GFX_STALL, 1);
5092                         tmp = REG_SET_FIELD(tmp,
5093                                             GMCON_DEBUG, GFX_CLEAR, 1);
5094                         WREG32(mmGMCON_DEBUG, tmp);
5095
5096                         udelay(50);
5097                 }
5098
5099                 if (grbm_soft_reset) {
5100                         tmp = RREG32(mmGRBM_SOFT_RESET);
5101                         tmp |= grbm_soft_reset;
5102                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5103                         WREG32(mmGRBM_SOFT_RESET, tmp);
5104                         tmp = RREG32(mmGRBM_SOFT_RESET);
5105
5106                         udelay(50);
5107
5108                         tmp &= ~grbm_soft_reset;
5109                         WREG32(mmGRBM_SOFT_RESET, tmp);
5110                         tmp = RREG32(mmGRBM_SOFT_RESET);
5111                 }
5112
5113                 if (srbm_soft_reset) {
5114                         tmp = RREG32(mmSRBM_SOFT_RESET);
5115                         tmp |= srbm_soft_reset;
5116                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5117                         WREG32(mmSRBM_SOFT_RESET, tmp);
5118                         tmp = RREG32(mmSRBM_SOFT_RESET);
5119
5120                         udelay(50);
5121
5122                         tmp &= ~srbm_soft_reset;
5123                         WREG32(mmSRBM_SOFT_RESET, tmp);
5124                         tmp = RREG32(mmSRBM_SOFT_RESET);
5125                 }
5126
5127                 if (grbm_soft_reset || srbm_soft_reset) {
5128                         tmp = RREG32(mmGMCON_DEBUG);
5129                         tmp = REG_SET_FIELD(tmp,
5130                                             GMCON_DEBUG, GFX_STALL, 0);
5131                         tmp = REG_SET_FIELD(tmp,
5132                                             GMCON_DEBUG, GFX_CLEAR, 0);
5133                         WREG32(mmGMCON_DEBUG, tmp);
5134                 }
5135
5136                 /* Wait a little for things to settle down */
5137                 udelay(50);
5138         }
5139         return 0;
5140 }
5141
5142 /**
5143  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5144  *
5145  * @adev: amdgpu_device pointer
5146  *
5147  * Fetches a GPU clock counter snapshot.
5148  * Returns the 64 bit clock counter snapshot.
5149  */
5150 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5151 {
5152         uint64_t clock;
5153
5154         mutex_lock(&adev->gfx.gpu_clock_mutex);
5155         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5156         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5157                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5158         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5159         return clock;
5160 }
5161
5162 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5163                                           uint32_t vmid,
5164                                           uint32_t gds_base, uint32_t gds_size,
5165                                           uint32_t gws_base, uint32_t gws_size,
5166                                           uint32_t oa_base, uint32_t oa_size)
5167 {
5168         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5169         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5170
5171         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5172         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5173
5174         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5175         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5176
5177         /* GDS Base */
5178         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5179         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5180                                 WRITE_DATA_DST_SEL(0)));
5181         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5182         amdgpu_ring_write(ring, 0);
5183         amdgpu_ring_write(ring, gds_base);
5184
5185         /* GDS Size */
5186         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5187         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5188                                 WRITE_DATA_DST_SEL(0)));
5189         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5190         amdgpu_ring_write(ring, 0);
5191         amdgpu_ring_write(ring, gds_size);
5192
5193         /* GWS */
5194         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5195         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5196                                 WRITE_DATA_DST_SEL(0)));
5197         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5198         amdgpu_ring_write(ring, 0);
5199         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5200
5201         /* OA */
5202         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5203         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5204                                 WRITE_DATA_DST_SEL(0)));
5205         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5206         amdgpu_ring_write(ring, 0);
5207         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5208 }
5209
5210 static int gfx_v8_0_early_init(void *handle)
5211 {
5212         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5213
5214         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5215         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5216         gfx_v8_0_set_ring_funcs(adev);
5217         gfx_v8_0_set_irq_funcs(adev);
5218         gfx_v8_0_set_gds_init(adev);
5219         gfx_v8_0_set_rlc_funcs(adev);
5220
5221         return 0;
5222 }
5223
5224 static int gfx_v8_0_late_init(void *handle)
5225 {
5226         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5227         int r;
5228
5229         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5230         if (r)
5231                 return r;
5232
5233         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5234         if (r)
5235                 return r;
5236
5237         /* requires IBs so do in late init after IB pool is initialized */
5238         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5239         if (r)
5240                 return r;
5241
5242         amdgpu_set_powergating_state(adev,
5243                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5244
5245         return 0;
5246 }
5247
5248 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5249                                                        bool enable)
5250 {
5251         uint32_t data, temp;
5252
5253         if (adev->asic_type == CHIP_POLARIS11)
5254                 /* Send msg to SMU via Powerplay */
5255                 amdgpu_set_powergating_state(adev,
5256                                              AMD_IP_BLOCK_TYPE_SMC,
5257                                              enable ?
5258                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5259
5260         temp = data = RREG32(mmRLC_PG_CNTL);
5261         /* Enable static MGPG */
5262         if (enable)
5263                 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5264         else
5265                 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5266
5267         if (temp != data)
5268                 WREG32(mmRLC_PG_CNTL, data);
5269 }
5270
5271 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5272                                                         bool enable)
5273 {
5274         uint32_t data, temp;
5275
5276         temp = data = RREG32(mmRLC_PG_CNTL);
5277         /* Enable dynamic MGPG */
5278         if (enable)
5279                 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5280         else
5281                 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5282
5283         if (temp != data)
5284                 WREG32(mmRLC_PG_CNTL, data);
5285 }
5286
5287 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5288                 bool enable)
5289 {
5290         uint32_t data, temp;
5291
5292         temp = data = RREG32(mmRLC_PG_CNTL);
5293         /* Enable quick PG */
5294         if (enable)
5295                 data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5296         else
5297                 data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5298
5299         if (temp != data)
5300                 WREG32(mmRLC_PG_CNTL, data);
5301 }
5302
5303 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5304                                           bool enable)
5305 {
5306         u32 data, orig;
5307
5308         orig = data = RREG32(mmRLC_PG_CNTL);
5309
5310         if (enable)
5311                 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5312         else
5313                 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5314
5315         if (orig != data)
5316                 WREG32(mmRLC_PG_CNTL, data);
5317 }
5318
5319 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5320                                                 bool enable)
5321 {
5322         u32 data, orig;
5323
5324         orig = data = RREG32(mmRLC_PG_CNTL);
5325
5326         if (enable)
5327                 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5328         else
5329                 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5330
5331         if (orig != data)
5332                 WREG32(mmRLC_PG_CNTL, data);
5333
5334         /* Read any GFX register to wake up GFX. */
5335         if (!enable)
5336                 data = RREG32(mmDB_RENDER_CONTROL);
5337 }
5338
5339 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5340                                           bool enable)
5341 {
5342         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5343                 cz_enable_gfx_cg_power_gating(adev, true);
5344                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5345                         cz_enable_gfx_pipeline_power_gating(adev, true);
5346         } else {
5347                 cz_enable_gfx_cg_power_gating(adev, false);
5348                 cz_enable_gfx_pipeline_power_gating(adev, false);
5349         }
5350 }
5351
5352 static int gfx_v8_0_set_powergating_state(void *handle,
5353                                           enum amd_powergating_state state)
5354 {
5355         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5356         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5357
5358         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5359                 return 0;
5360
5361         switch (adev->asic_type) {
5362         case CHIP_CARRIZO:
5363         case CHIP_STONEY:
5364                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5365                         cz_update_gfx_cg_power_gating(adev, enable);
5366
5367                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5368                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5369                 else
5370                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5371
5372                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5373                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5374                 else
5375                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5376                 break;
5377         case CHIP_POLARIS11:
5378                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5379                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5380                 else
5381                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5382
5383                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5384                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5385                 else
5386                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5387
5388                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5389                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5390                 else
5391                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5392                 break;
5393         default:
5394                 break;
5395         }
5396
5397         return 0;
5398 }
5399
5400 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5401                                      uint32_t reg_addr, uint32_t cmd)
5402 {
5403         uint32_t data;
5404
5405         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5406
5407         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5408         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5409
5410         data = RREG32(mmRLC_SERDES_WR_CTRL);
5411         if (adev->asic_type == CHIP_STONEY)
5412                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5413                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5414                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5415                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5416                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5417                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5418                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5419                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5420                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5421         else
5422                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5423                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5424                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5425                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5426                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5427                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5428                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5429                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5430                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5431                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5432                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5433         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5434                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5435                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5436                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5437
5438         WREG32(mmRLC_SERDES_WR_CTRL, data);
5439 }
5440
5441 #define MSG_ENTER_RLC_SAFE_MODE     1
5442 #define MSG_EXIT_RLC_SAFE_MODE      0
5443
5444 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5445 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5446 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5447
5448 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5449 {
5450         u32 data = 0;
5451         unsigned i;
5452
5453         data = RREG32(mmRLC_CNTL);
5454         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5455                 return;
5456
5457         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5458             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5459                                AMD_PG_SUPPORT_GFX_DMG))) {
5460                 data |= RLC_GPR_REG2__REQ_MASK;
5461                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5462                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5463                 WREG32(mmRLC_GPR_REG2, data);
5464
5465                 for (i = 0; i < adev->usec_timeout; i++) {
5466                         if ((RREG32(mmRLC_GPM_STAT) &
5467                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5468                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5469                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5470                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5471                                 break;
5472                         udelay(1);
5473                 }
5474
5475                 for (i = 0; i < adev->usec_timeout; i++) {
5476                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5477                                 break;
5478                         udelay(1);
5479                 }
5480                 adev->gfx.rlc.in_safe_mode = true;
5481         }
5482 }
5483
5484 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5485 {
5486         u32 data;
5487         unsigned i;
5488
5489         data = RREG32(mmRLC_CNTL);
5490         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5491                 return;
5492
5493         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5494             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5495                                AMD_PG_SUPPORT_GFX_DMG))) {
5496                 data |= RLC_GPR_REG2__REQ_MASK;
5497                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5498                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5499                 WREG32(mmRLC_GPR_REG2, data);
5500                 adev->gfx.rlc.in_safe_mode = false;
5501         }
5502
5503         for (i = 0; i < adev->usec_timeout; i++) {
5504                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5505                         break;
5506                 udelay(1);
5507         }
5508 }
5509
5510 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5511 {
5512         u32 data;
5513         unsigned i;
5514
5515         data = RREG32(mmRLC_CNTL);
5516         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5517                 return;
5518
5519         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5520                 data |= RLC_SAFE_MODE__CMD_MASK;
5521                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5522                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5523                 WREG32(mmRLC_SAFE_MODE, data);
5524
5525                 for (i = 0; i < adev->usec_timeout; i++) {
5526                         if ((RREG32(mmRLC_GPM_STAT) &
5527                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5528                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5529                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5530                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5531                                 break;
5532                         udelay(1);
5533                 }
5534
5535                 for (i = 0; i < adev->usec_timeout; i++) {
5536                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5537                                 break;
5538                         udelay(1);
5539                 }
5540                 adev->gfx.rlc.in_safe_mode = true;
5541         }
5542 }
5543
5544 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5545 {
5546         u32 data = 0;
5547         unsigned i;
5548
5549         data = RREG32(mmRLC_CNTL);
5550         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5551                 return;
5552
5553         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5554                 if (adev->gfx.rlc.in_safe_mode) {
5555                         data |= RLC_SAFE_MODE__CMD_MASK;
5556                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5557                         WREG32(mmRLC_SAFE_MODE, data);
5558                         adev->gfx.rlc.in_safe_mode = false;
5559                 }
5560         }
5561
5562         for (i = 0; i < adev->usec_timeout; i++) {
5563                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5564                         break;
5565                 udelay(1);
5566         }
5567 }
5568
5569 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5570 {
5571         adev->gfx.rlc.in_safe_mode = true;
5572 }
5573
5574 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5575 {
5576         adev->gfx.rlc.in_safe_mode = false;
5577 }
5578
5579 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5580         .enter_safe_mode = cz_enter_rlc_safe_mode,
5581         .exit_safe_mode = cz_exit_rlc_safe_mode
5582 };
5583
5584 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5585         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5586         .exit_safe_mode = iceland_exit_rlc_safe_mode
5587 };
5588
5589 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5590         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5591         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5592 };
5593
5594 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5595                                                       bool enable)
5596 {
5597         uint32_t temp, data;
5598
5599         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5600
5601         /* It is disabled by HW by default */
5602         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5603                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5604                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5605                                 /* 1 - RLC memory Light sleep */
5606                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5607                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5608                                 if (temp != data)
5609                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5610                         }
5611
5612                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5613                                 /* 2 - CP memory Light sleep */
5614                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5615                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5616                                 if (temp != data)
5617                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5618                         }
5619                 }
5620
5621                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5622                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5623                 if (adev->flags & AMD_IS_APU)
5624                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5625                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5626                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5627                 else
5628                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5629                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5630                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5631                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5632
5633                 if (temp != data)
5634                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5635
5636                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5637                 gfx_v8_0_wait_for_rlc_serdes(adev);
5638
5639                 /* 5 - clear mgcg override */
5640                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5641
5642                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5643                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5644                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5645                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5646                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5647                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5648                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5649                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5650                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5651                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5652                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5653                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5654                         if (temp != data)
5655                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5656                 }
5657                 udelay(50);
5658
5659                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5660                 gfx_v8_0_wait_for_rlc_serdes(adev);
5661         } else {
5662                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5663                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5664                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5665                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5666                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5667                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5668                 if (temp != data)
5669                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5670
5671                 /* 2 - disable MGLS in RLC */
5672                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5673                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5674                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5675                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5676                 }
5677
5678                 /* 3 - disable MGLS in CP */
5679                 data = RREG32(mmCP_MEM_SLP_CNTL);
5680                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5681                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5682                         WREG32(mmCP_MEM_SLP_CNTL, data);
5683                 }
5684
5685                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5686                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5687                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5688                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5689                 if (temp != data)
5690                         WREG32(mmCGTS_SM_CTRL_REG, data);
5691
5692                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5693                 gfx_v8_0_wait_for_rlc_serdes(adev);
5694
5695                 /* 6 - set mgcg override */
5696                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5697
5698                 udelay(50);
5699
5700                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5701                 gfx_v8_0_wait_for_rlc_serdes(adev);
5702         }
5703
5704         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5705 }
5706
5707 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5708                                                       bool enable)
5709 {
5710         uint32_t temp, temp1, data, data1;
5711
5712         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5713
5714         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5715
5716         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5717                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5718                  * Cmp_busy/GFX_Idle interrupts
5719                  */
5720                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5721
5722                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5723                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5724                 if (temp1 != data1)
5725                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5726
5727                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5728                 gfx_v8_0_wait_for_rlc_serdes(adev);
5729
5730                 /* 3 - clear cgcg override */
5731                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5732
5733                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5734                 gfx_v8_0_wait_for_rlc_serdes(adev);
5735
5736                 /* 4 - write cmd to set CGLS */
5737                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5738
5739                 /* 5 - enable cgcg */
5740                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5741
5742                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5743                         /* enable cgls*/
5744                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5745
5746                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5747                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5748
5749                         if (temp1 != data1)
5750                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5751                 } else {
5752                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5753                 }
5754
5755                 if (temp != data)
5756                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5757         } else {
5758                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5759                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5760
5761                 /* TEST CGCG */
5762                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5763                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5764                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5765                 if (temp1 != data1)
5766                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5767
5768                 /* read gfx register to wake up cgcg */
5769                 RREG32(mmCB_CGTT_SCLK_CTRL);
5770                 RREG32(mmCB_CGTT_SCLK_CTRL);
5771                 RREG32(mmCB_CGTT_SCLK_CTRL);
5772                 RREG32(mmCB_CGTT_SCLK_CTRL);
5773
5774                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5775                 gfx_v8_0_wait_for_rlc_serdes(adev);
5776
5777                 /* write cmd to Set CGCG Overrride */
5778                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5779
5780                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5781                 gfx_v8_0_wait_for_rlc_serdes(adev);
5782
5783                 /* write cmd to Clear CGLS */
5784                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5785
5786                 /* disable cgcg, cgls should be disabled too. */
5787                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5788                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5789                 if (temp != data)
5790                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5791         }
5792
5793         gfx_v8_0_wait_for_rlc_serdes(adev);
5794
5795         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5796 }
5797 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5798                                             bool enable)
5799 {
5800         if (enable) {
5801                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5802                  * ===  MGCG + MGLS + TS(CG/LS) ===
5803                  */
5804                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5805                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5806         } else {
5807                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5808                  * ===  CGCG + CGLS ===
5809                  */
5810                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5811                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5812         }
5813         return 0;
5814 }
5815
5816 static int gfx_v8_0_set_clockgating_state(void *handle,
5817                                           enum amd_clockgating_state state)
5818 {
5819         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5820
5821         switch (adev->asic_type) {
5822         case CHIP_FIJI:
5823         case CHIP_CARRIZO:
5824         case CHIP_STONEY:
5825                 gfx_v8_0_update_gfx_clock_gating(adev,
5826                                                  state == AMD_CG_STATE_GATE ? true : false);
5827                 break;
5828         default:
5829                 break;
5830         }
5831         return 0;
5832 }
5833
5834 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5835 {
5836         u32 rptr;
5837
5838         rptr = ring->adev->wb.wb[ring->rptr_offs];
5839
5840         return rptr;
5841 }
5842
5843 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5844 {
5845         struct amdgpu_device *adev = ring->adev;
5846         u32 wptr;
5847
5848         if (ring->use_doorbell)
5849                 /* XXX check if swapping is necessary on BE */
5850                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5851         else
5852                 wptr = RREG32(mmCP_RB0_WPTR);
5853
5854         return wptr;
5855 }
5856
5857 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5858 {
5859         struct amdgpu_device *adev = ring->adev;
5860
5861         if (ring->use_doorbell) {
5862                 /* XXX check if swapping is necessary on BE */
5863                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5864                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5865         } else {
5866                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5867                 (void)RREG32(mmCP_RB0_WPTR);
5868         }
5869 }
5870
5871 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5872 {
5873         u32 ref_and_mask, reg_mem_engine;
5874
5875         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5876                 switch (ring->me) {
5877                 case 1:
5878                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5879                         break;
5880                 case 2:
5881                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5882                         break;
5883                 default:
5884                         return;
5885                 }
5886                 reg_mem_engine = 0;
5887         } else {
5888                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5889                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5890         }
5891
5892         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5893         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5894                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5895                                  reg_mem_engine));
5896         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5897         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5898         amdgpu_ring_write(ring, ref_and_mask);
5899         amdgpu_ring_write(ring, ref_and_mask);
5900         amdgpu_ring_write(ring, 0x20); /* poll interval */
5901 }
5902
5903 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5904 {
5905         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5906         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5907                                  WRITE_DATA_DST_SEL(0) |
5908                                  WR_CONFIRM));
5909         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5910         amdgpu_ring_write(ring, 0);
5911         amdgpu_ring_write(ring, 1);
5912
5913 }
5914
5915 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5916                                       struct amdgpu_ib *ib,
5917                                       unsigned vm_id, bool ctx_switch)
5918 {
5919         u32 header, control = 0;
5920         u32 next_rptr = ring->wptr + 5;
5921
5922         if (ctx_switch)
5923                 next_rptr += 2;
5924
5925         next_rptr += 4;
5926         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5927         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5928         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5929         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5930         amdgpu_ring_write(ring, next_rptr);
5931
5932         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5933         if (ctx_switch) {
5934                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5935                 amdgpu_ring_write(ring, 0);
5936         }
5937
5938         if (ib->flags & AMDGPU_IB_FLAG_CE)
5939                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5940         else
5941                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5942
5943         control |= ib->length_dw | (vm_id << 24);
5944
5945         amdgpu_ring_write(ring, header);
5946         amdgpu_ring_write(ring,
5947 #ifdef __BIG_ENDIAN
5948                           (2 << 0) |
5949 #endif
5950                           (ib->gpu_addr & 0xFFFFFFFC));
5951         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5952         amdgpu_ring_write(ring, control);
5953 }
5954
5955 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5956                                           struct amdgpu_ib *ib,
5957                                           unsigned vm_id, bool ctx_switch)
5958 {
5959         u32 header, control = 0;
5960         u32 next_rptr = ring->wptr + 5;
5961
5962         control |= INDIRECT_BUFFER_VALID;
5963
5964         next_rptr += 4;
5965         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5966         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5967         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5968         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5969         amdgpu_ring_write(ring, next_rptr);
5970
5971         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5972
5973         control |= ib->length_dw | (vm_id << 24);
5974
5975         amdgpu_ring_write(ring, header);
5976         amdgpu_ring_write(ring,
5977 #ifdef __BIG_ENDIAN
5978                                           (2 << 0) |
5979 #endif
5980                                           (ib->gpu_addr & 0xFFFFFFFC));
5981         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5982         amdgpu_ring_write(ring, control);
5983 }
5984
5985 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5986                                          u64 seq, unsigned flags)
5987 {
5988         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5989         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5990
5991         /* EVENT_WRITE_EOP - flush caches, send int */
5992         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5993         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5994                                  EOP_TC_ACTION_EN |
5995                                  EOP_TC_WB_ACTION_EN |
5996                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5997                                  EVENT_INDEX(5)));
5998         amdgpu_ring_write(ring, addr & 0xfffffffc);
5999         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6000                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6001         amdgpu_ring_write(ring, lower_32_bits(seq));
6002         amdgpu_ring_write(ring, upper_32_bits(seq));
6003
6004 }
6005
6006 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6007 {
6008         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6009         uint32_t seq = ring->fence_drv.sync_seq;
6010         uint64_t addr = ring->fence_drv.gpu_addr;
6011
6012         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6013         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6014                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6015                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6016         amdgpu_ring_write(ring, addr & 0xfffffffc);
6017         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6018         amdgpu_ring_write(ring, seq);
6019         amdgpu_ring_write(ring, 0xffffffff);
6020         amdgpu_ring_write(ring, 4); /* poll interval */
6021
6022         if (usepfp) {
6023                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
6024                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6025                 amdgpu_ring_write(ring, 0);
6026                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6027                 amdgpu_ring_write(ring, 0);
6028         }
6029 }
6030
6031 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6032                                         unsigned vm_id, uint64_t pd_addr)
6033 {
6034         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6035
6036         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6037         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6038                                  WRITE_DATA_DST_SEL(0)) |
6039                                  WR_CONFIRM);
6040         if (vm_id < 8) {
6041                 amdgpu_ring_write(ring,
6042                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6043         } else {
6044                 amdgpu_ring_write(ring,
6045                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6046         }
6047         amdgpu_ring_write(ring, 0);
6048         amdgpu_ring_write(ring, pd_addr >> 12);
6049
6050         /* bits 0-15 are the VM contexts0-15 */
6051         /* invalidate the cache */
6052         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6053         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6054                                  WRITE_DATA_DST_SEL(0)));
6055         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6056         amdgpu_ring_write(ring, 0);
6057         amdgpu_ring_write(ring, 1 << vm_id);
6058
6059         /* wait for the invalidate to complete */
6060         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6061         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6062                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6063                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6064         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6065         amdgpu_ring_write(ring, 0);
6066         amdgpu_ring_write(ring, 0); /* ref */
6067         amdgpu_ring_write(ring, 0); /* mask */
6068         amdgpu_ring_write(ring, 0x20); /* poll interval */
6069
6070         /* compute doesn't have PFP */
6071         if (usepfp) {
6072                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6073                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6074                 amdgpu_ring_write(ring, 0x0);
6075                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6076                 amdgpu_ring_write(ring, 0);
6077                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6078                 amdgpu_ring_write(ring, 0);
6079         }
6080 }
6081
6082 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6083 {
6084         return ring->adev->wb.wb[ring->rptr_offs];
6085 }
6086
6087 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6088 {
6089         return ring->adev->wb.wb[ring->wptr_offs];
6090 }
6091
6092 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6093 {
6094         struct amdgpu_device *adev = ring->adev;
6095
6096         /* XXX check if swapping is necessary on BE */
6097         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6098         WDOORBELL32(ring->doorbell_index, ring->wptr);
6099 }
6100
6101 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6102                                              u64 addr, u64 seq,
6103                                              unsigned flags)
6104 {
6105         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6106         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6107
6108         /* RELEASE_MEM - flush caches, send int */
6109         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6110         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6111                                  EOP_TC_ACTION_EN |
6112                                  EOP_TC_WB_ACTION_EN |
6113                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6114                                  EVENT_INDEX(5)));
6115         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6116         amdgpu_ring_write(ring, addr & 0xfffffffc);
6117         amdgpu_ring_write(ring, upper_32_bits(addr));
6118         amdgpu_ring_write(ring, lower_32_bits(seq));
6119         amdgpu_ring_write(ring, upper_32_bits(seq));
6120 }
6121
6122 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6123                                                  enum amdgpu_interrupt_state state)
6124 {
6125         u32 cp_int_cntl;
6126
6127         switch (state) {
6128         case AMDGPU_IRQ_STATE_DISABLE:
6129                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6130                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6131                                             TIME_STAMP_INT_ENABLE, 0);
6132                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6133                 break;
6134         case AMDGPU_IRQ_STATE_ENABLE:
6135                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6136                 cp_int_cntl =
6137                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6138                                       TIME_STAMP_INT_ENABLE, 1);
6139                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6140                 break;
6141         default:
6142                 break;
6143         }
6144 }
6145
6146 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6147                                                      int me, int pipe,
6148                                                      enum amdgpu_interrupt_state state)
6149 {
6150         u32 mec_int_cntl, mec_int_cntl_reg;
6151
6152         /*
6153          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6154          * handles the setting of interrupts for this specific pipe. All other
6155          * pipes' interrupts are set by amdkfd.
6156          */
6157
6158         if (me == 1) {
6159                 switch (pipe) {
6160                 case 0:
6161                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6162                         break;
6163                 default:
6164                         DRM_DEBUG("invalid pipe %d\n", pipe);
6165                         return;
6166                 }
6167         } else {
6168                 DRM_DEBUG("invalid me %d\n", me);
6169                 return;
6170         }
6171
6172         switch (state) {
6173         case AMDGPU_IRQ_STATE_DISABLE:
6174                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6175                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6176                                              TIME_STAMP_INT_ENABLE, 0);
6177                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6178                 break;
6179         case AMDGPU_IRQ_STATE_ENABLE:
6180                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6181                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6182                                              TIME_STAMP_INT_ENABLE, 1);
6183                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6184                 break;
6185         default:
6186                 break;
6187         }
6188 }
6189
6190 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6191                                              struct amdgpu_irq_src *source,
6192                                              unsigned type,
6193                                              enum amdgpu_interrupt_state state)
6194 {
6195         u32 cp_int_cntl;
6196
6197         switch (state) {
6198         case AMDGPU_IRQ_STATE_DISABLE:
6199                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6200                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6201                                             PRIV_REG_INT_ENABLE, 0);
6202                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6203                 break;
6204         case AMDGPU_IRQ_STATE_ENABLE:
6205                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6206                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6207                                             PRIV_REG_INT_ENABLE, 1);
6208                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6209                 break;
6210         default:
6211                 break;
6212         }
6213
6214         return 0;
6215 }
6216
6217 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6218                                               struct amdgpu_irq_src *source,
6219                                               unsigned type,
6220                                               enum amdgpu_interrupt_state state)
6221 {
6222         u32 cp_int_cntl;
6223
6224         switch (state) {
6225         case AMDGPU_IRQ_STATE_DISABLE:
6226                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6227                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6228                                             PRIV_INSTR_INT_ENABLE, 0);
6229                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6230                 break;
6231         case AMDGPU_IRQ_STATE_ENABLE:
6232                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6233                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6234                                             PRIV_INSTR_INT_ENABLE, 1);
6235                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6236                 break;
6237         default:
6238                 break;
6239         }
6240
6241         return 0;
6242 }
6243
6244 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6245                                             struct amdgpu_irq_src *src,
6246                                             unsigned type,
6247                                             enum amdgpu_interrupt_state state)
6248 {
6249         switch (type) {
6250         case AMDGPU_CP_IRQ_GFX_EOP:
6251                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6252                 break;
6253         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6254                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6255                 break;
6256         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6257                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6258                 break;
6259         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6260                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6261                 break;
6262         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6263                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6264                 break;
6265         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6266                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6267                 break;
6268         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6269                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6270                 break;
6271         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6272                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6273                 break;
6274         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6275                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6276                 break;
6277         default:
6278                 break;
6279         }
6280         return 0;
6281 }
6282
6283 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6284                             struct amdgpu_irq_src *source,
6285                             struct amdgpu_iv_entry *entry)
6286 {
6287         int i;
6288         u8 me_id, pipe_id, queue_id;
6289         struct amdgpu_ring *ring;
6290
6291         DRM_DEBUG("IH: CP EOP\n");
6292         me_id = (entry->ring_id & 0x0c) >> 2;
6293         pipe_id = (entry->ring_id & 0x03) >> 0;
6294         queue_id = (entry->ring_id & 0x70) >> 4;
6295
6296         switch (me_id) {
6297         case 0:
6298                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6299                 break;
6300         case 1:
6301         case 2:
6302                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6303                         ring = &adev->gfx.compute_ring[i];
6304                         /* Per-queue interrupt is supported for MEC starting from VI.
6305                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6306                           */
6307                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6308                                 amdgpu_fence_process(ring);
6309                 }
6310                 break;
6311         }
6312         return 0;
6313 }
6314
6315 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6316                                  struct amdgpu_irq_src *source,
6317                                  struct amdgpu_iv_entry *entry)
6318 {
6319         DRM_ERROR("Illegal register access in command stream\n");
6320         schedule_work(&adev->reset_work);
6321         return 0;
6322 }
6323
6324 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6325                                   struct amdgpu_irq_src *source,
6326                                   struct amdgpu_iv_entry *entry)
6327 {
6328         DRM_ERROR("Illegal instruction in command stream\n");
6329         schedule_work(&adev->reset_work);
6330         return 0;
6331 }
6332
6333 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6334         .name = "gfx_v8_0",
6335         .early_init = gfx_v8_0_early_init,
6336         .late_init = gfx_v8_0_late_init,
6337         .sw_init = gfx_v8_0_sw_init,
6338         .sw_fini = gfx_v8_0_sw_fini,
6339         .hw_init = gfx_v8_0_hw_init,
6340         .hw_fini = gfx_v8_0_hw_fini,
6341         .suspend = gfx_v8_0_suspend,
6342         .resume = gfx_v8_0_resume,
6343         .is_idle = gfx_v8_0_is_idle,
6344         .wait_for_idle = gfx_v8_0_wait_for_idle,
6345         .soft_reset = gfx_v8_0_soft_reset,
6346         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6347         .set_powergating_state = gfx_v8_0_set_powergating_state,
6348 };
6349
6350 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6351         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6352         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6353         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6354         .parse_cs = NULL,
6355         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6356         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6357         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6358         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6359         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6360         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6361         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6362         .test_ring = gfx_v8_0_ring_test_ring,
6363         .test_ib = gfx_v8_0_ring_test_ib,
6364         .insert_nop = amdgpu_ring_insert_nop,
6365         .pad_ib = amdgpu_ring_generic_pad_ib,
6366 };
6367
6368 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6369         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6370         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6371         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6372         .parse_cs = NULL,
6373         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6374         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6375         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6376         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6377         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6378         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6379         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6380         .test_ring = gfx_v8_0_ring_test_ring,
6381         .test_ib = gfx_v8_0_ring_test_ib,
6382         .insert_nop = amdgpu_ring_insert_nop,
6383         .pad_ib = amdgpu_ring_generic_pad_ib,
6384 };
6385
6386 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6387 {
6388         int i;
6389
6390         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6391                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6392
6393         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6394                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6395 }
6396
6397 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6398         .set = gfx_v8_0_set_eop_interrupt_state,
6399         .process = gfx_v8_0_eop_irq,
6400 };
6401
6402 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6403         .set = gfx_v8_0_set_priv_reg_fault_state,
6404         .process = gfx_v8_0_priv_reg_irq,
6405 };
6406
6407 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6408         .set = gfx_v8_0_set_priv_inst_fault_state,
6409         .process = gfx_v8_0_priv_inst_irq,
6410 };
6411
6412 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6413 {
6414         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6415         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6416
6417         adev->gfx.priv_reg_irq.num_types = 1;
6418         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6419
6420         adev->gfx.priv_inst_irq.num_types = 1;
6421         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6422 }
6423
6424 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6425 {
6426         switch (adev->asic_type) {
6427         case CHIP_TOPAZ:
6428                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6429                 break;
6430         case CHIP_STONEY:
6431         case CHIP_CARRIZO:
6432                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6433                 break;
6434         default:
6435                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6436                 break;
6437         }
6438 }
6439
6440 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6441 {
6442         /* init asci gds info */
6443         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6444         adev->gds.gws.total_size = 64;
6445         adev->gds.oa.total_size = 16;
6446
6447         if (adev->gds.mem.total_size == 64 * 1024) {
6448                 adev->gds.mem.gfx_partition_size = 4096;
6449                 adev->gds.mem.cs_partition_size = 4096;
6450
6451                 adev->gds.gws.gfx_partition_size = 4;
6452                 adev->gds.gws.cs_partition_size = 4;
6453
6454                 adev->gds.oa.gfx_partition_size = 4;
6455                 adev->gds.oa.cs_partition_size = 1;
6456         } else {
6457                 adev->gds.mem.gfx_partition_size = 1024;
6458                 adev->gds.mem.cs_partition_size = 1024;
6459
6460                 adev->gds.gws.gfx_partition_size = 16;
6461                 adev->gds.gws.cs_partition_size = 16;
6462
6463                 adev->gds.oa.gfx_partition_size = 4;
6464                 adev->gds.oa.cs_partition_size = 4;
6465         }
6466 }
6467
6468 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6469                                                  u32 bitmap)
6470 {
6471         u32 data;
6472
6473         if (!bitmap)
6474                 return;
6475
6476         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6477         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6478
6479         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6480 }
6481
6482 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6483 {
6484         u32 data, mask;
6485
6486         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6487         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6488
6489         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6490         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6491
6492         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6493
6494         return (~data) & mask;
6495 }
6496
6497 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6498 {
6499         int i, j, k, counter, active_cu_number = 0;
6500         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6501         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6502         unsigned disable_masks[4 * 2];
6503
6504         memset(cu_info, 0, sizeof(*cu_info));
6505
6506         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6507
6508         mutex_lock(&adev->grbm_idx_mutex);
6509         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6510                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6511                         mask = 1;
6512                         ao_bitmap = 0;
6513                         counter = 0;
6514                         gfx_v8_0_select_se_sh(adev, i, j);
6515                         if (i < 4 && j < 2)
6516                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6517                                         adev, disable_masks[i * 2 + j]);
6518                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6519                         cu_info->bitmap[i][j] = bitmap;
6520
6521                         for (k = 0; k < 16; k ++) {
6522                                 if (bitmap & mask) {
6523                                         if (counter < 2)
6524                                                 ao_bitmap |= mask;
6525                                         counter ++;
6526                                 }
6527                                 mask <<= 1;
6528                         }
6529                         active_cu_number += counter;
6530                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6531                 }
6532         }
6533         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6534         mutex_unlock(&adev->grbm_idx_mutex);
6535
6536         cu_info->number = active_cu_number;
6537         cu_info->ao_cu_mask = ao_cu_mask;
6538 }