drm/msm: bump kernel api version for explicit fencing
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166         mmGB_GPU_ID, 0x0000000f, 0x00000000,
167         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283         mmSQ_CONFIG, 0x07f80000, 0x01180000,
284         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291
292 static const u32 polaris11_golden_common_all[] =
293 {
294         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315         mmSQ_CONFIG, 0x07f80000, 0x07180000,
316         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 polaris10_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334
335 static const u32 fiji_golden_common_all[] =
336 {
337         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348
349 static const u32 golden_settings_fiji_a10[] =
350 {
351         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402
403 static const u32 golden_settings_iceland_a11[] =
404 {
405         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408         mmGB_GPU_ID, 0x0000000f, 0x00000000,
409         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422
423 static const u32 iceland_golden_common_all[] =
424 {
425         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502
503 static const u32 cz_golden_settings_a11[] =
504 {
505         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507         mmGB_GPU_ID, 0x0000000f, 0x00000000,
508         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518
519 static const u32 cz_golden_common_all[] =
520 {
521         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609
610 static const u32 stoney_golden_settings_a11[] =
611 {
612         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613         mmGB_GPU_ID, 0x0000000f, 0x00000000,
614         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623
624 static const u32 stoney_golden_common_all[] =
625 {
626         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
644 };
645
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652
653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
654 {
655         switch (adev->asic_type) {
656         case CHIP_TOPAZ:
657                 amdgpu_program_register_sequence(adev,
658                                                  iceland_mgcg_cgcg_init,
659                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
660                 amdgpu_program_register_sequence(adev,
661                                                  golden_settings_iceland_a11,
662                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
663                 amdgpu_program_register_sequence(adev,
664                                                  iceland_golden_common_all,
665                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
666                 break;
667         case CHIP_FIJI:
668                 amdgpu_program_register_sequence(adev,
669                                                  fiji_mgcg_cgcg_init,
670                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
671                 amdgpu_program_register_sequence(adev,
672                                                  golden_settings_fiji_a10,
673                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
674                 amdgpu_program_register_sequence(adev,
675                                                  fiji_golden_common_all,
676                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
677                 break;
678
679         case CHIP_TONGA:
680                 amdgpu_program_register_sequence(adev,
681                                                  tonga_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_tonga_a11,
685                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
686                 amdgpu_program_register_sequence(adev,
687                                                  tonga_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
689                 break;
690         case CHIP_POLARIS11:
691                 amdgpu_program_register_sequence(adev,
692                                                  golden_settings_polaris11_a11,
693                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
694                 amdgpu_program_register_sequence(adev,
695                                                  polaris11_golden_common_all,
696                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
697                 break;
698         case CHIP_POLARIS10:
699                 amdgpu_program_register_sequence(adev,
700                                                  golden_settings_polaris10_a11,
701                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
702                 amdgpu_program_register_sequence(adev,
703                                                  polaris10_golden_common_all,
704                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
705                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
706                 if (adev->pdev->revision == 0xc7 &&
707                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
708                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
709                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
710                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
711                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
712                 }
713                 break;
714         case CHIP_CARRIZO:
715                 amdgpu_program_register_sequence(adev,
716                                                  cz_mgcg_cgcg_init,
717                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
718                 amdgpu_program_register_sequence(adev,
719                                                  cz_golden_settings_a11,
720                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
721                 amdgpu_program_register_sequence(adev,
722                                                  cz_golden_common_all,
723                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
724                 break;
725         case CHIP_STONEY:
726                 amdgpu_program_register_sequence(adev,
727                                                  stoney_mgcg_cgcg_init,
728                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
729                 amdgpu_program_register_sequence(adev,
730                                                  stoney_golden_settings_a11,
731                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
732                 amdgpu_program_register_sequence(adev,
733                                                  stoney_golden_common_all,
734                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
735                 break;
736         default:
737                 break;
738         }
739 }
740
741 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
742 {
743         int i;
744
745         adev->gfx.scratch.num_reg = 7;
746         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
747         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
748                 adev->gfx.scratch.free[i] = true;
749                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
750         }
751 }
752
753 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
754 {
755         struct amdgpu_device *adev = ring->adev;
756         uint32_t scratch;
757         uint32_t tmp = 0;
758         unsigned i;
759         int r;
760
761         r = amdgpu_gfx_scratch_get(adev, &scratch);
762         if (r) {
763                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
764                 return r;
765         }
766         WREG32(scratch, 0xCAFEDEAD);
767         r = amdgpu_ring_alloc(ring, 3);
768         if (r) {
769                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
770                           ring->idx, r);
771                 amdgpu_gfx_scratch_free(adev, scratch);
772                 return r;
773         }
774         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
775         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
776         amdgpu_ring_write(ring, 0xDEADBEEF);
777         amdgpu_ring_commit(ring);
778
779         for (i = 0; i < adev->usec_timeout; i++) {
780                 tmp = RREG32(scratch);
781                 if (tmp == 0xDEADBEEF)
782                         break;
783                 DRM_UDELAY(1);
784         }
785         if (i < adev->usec_timeout) {
786                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
787                          ring->idx, i);
788         } else {
789                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
790                           ring->idx, scratch, tmp);
791                 r = -EINVAL;
792         }
793         amdgpu_gfx_scratch_free(adev, scratch);
794         return r;
795 }
796
797 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
798 {
799         struct amdgpu_device *adev = ring->adev;
800         struct amdgpu_ib ib;
801         struct fence *f = NULL;
802         uint32_t scratch;
803         uint32_t tmp = 0;
804         long r;
805
806         r = amdgpu_gfx_scratch_get(adev, &scratch);
807         if (r) {
808                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
809                 return r;
810         }
811         WREG32(scratch, 0xCAFEDEAD);
812         memset(&ib, 0, sizeof(ib));
813         r = amdgpu_ib_get(adev, NULL, 256, &ib);
814         if (r) {
815                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
816                 goto err1;
817         }
818         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
819         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
820         ib.ptr[2] = 0xDEADBEEF;
821         ib.length_dw = 3;
822
823         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
824         if (r)
825                 goto err2;
826
827         r = fence_wait_timeout(f, false, timeout);
828         if (r == 0) {
829                 DRM_ERROR("amdgpu: IB test timed out.\n");
830                 r = -ETIMEDOUT;
831                 goto err2;
832         } else if (r < 0) {
833                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
834                 goto err2;
835         }
836         tmp = RREG32(scratch);
837         if (tmp == 0xDEADBEEF) {
838                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
839                 r = 0;
840         } else {
841                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
842                           scratch, tmp);
843                 r = -EINVAL;
844         }
845 err2:
846         amdgpu_ib_free(adev, &ib, NULL);
847         fence_put(f);
848 err1:
849         amdgpu_gfx_scratch_free(adev, scratch);
850         return r;
851 }
852
853
854 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
855         release_firmware(adev->gfx.pfp_fw);
856         adev->gfx.pfp_fw = NULL;
857         release_firmware(adev->gfx.me_fw);
858         adev->gfx.me_fw = NULL;
859         release_firmware(adev->gfx.ce_fw);
860         adev->gfx.ce_fw = NULL;
861         release_firmware(adev->gfx.rlc_fw);
862         adev->gfx.rlc_fw = NULL;
863         release_firmware(adev->gfx.mec_fw);
864         adev->gfx.mec_fw = NULL;
865         if ((adev->asic_type != CHIP_STONEY) &&
866             (adev->asic_type != CHIP_TOPAZ))
867                 release_firmware(adev->gfx.mec2_fw);
868         adev->gfx.mec2_fw = NULL;
869
870         kfree(adev->gfx.rlc.register_list_format);
871 }
872
873 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
874 {
875         const char *chip_name;
876         char fw_name[30];
877         int err;
878         struct amdgpu_firmware_info *info = NULL;
879         const struct common_firmware_header *header = NULL;
880         const struct gfx_firmware_header_v1_0 *cp_hdr;
881         const struct rlc_firmware_header_v2_0 *rlc_hdr;
882         unsigned int *tmp = NULL, i;
883
884         DRM_DEBUG("\n");
885
886         switch (adev->asic_type) {
887         case CHIP_TOPAZ:
888                 chip_name = "topaz";
889                 break;
890         case CHIP_TONGA:
891                 chip_name = "tonga";
892                 break;
893         case CHIP_CARRIZO:
894                 chip_name = "carrizo";
895                 break;
896         case CHIP_FIJI:
897                 chip_name = "fiji";
898                 break;
899         case CHIP_POLARIS11:
900                 chip_name = "polaris11";
901                 break;
902         case CHIP_POLARIS10:
903                 chip_name = "polaris10";
904                 break;
905         case CHIP_STONEY:
906                 chip_name = "stoney";
907                 break;
908         default:
909                 BUG();
910         }
911
912         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
913         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
914         if (err)
915                 goto out;
916         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
917         if (err)
918                 goto out;
919         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
920         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
921         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
922
923         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
924         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
925         if (err)
926                 goto out;
927         err = amdgpu_ucode_validate(adev->gfx.me_fw);
928         if (err)
929                 goto out;
930         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
931         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933
934         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
935         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
936         if (err)
937                 goto out;
938         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
939         if (err)
940                 goto out;
941         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
942         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944
945         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
946         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
947         if (err)
948                 goto out;
949         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
950         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
951         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
952         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
953
954         adev->gfx.rlc.save_and_restore_offset =
955                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
956         adev->gfx.rlc.clear_state_descriptor_offset =
957                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
958         adev->gfx.rlc.avail_scratch_ram_locations =
959                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
960         adev->gfx.rlc.reg_restore_list_size =
961                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
962         adev->gfx.rlc.reg_list_format_start =
963                         le32_to_cpu(rlc_hdr->reg_list_format_start);
964         adev->gfx.rlc.reg_list_format_separate_start =
965                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
966         adev->gfx.rlc.starting_offsets_start =
967                         le32_to_cpu(rlc_hdr->starting_offsets_start);
968         adev->gfx.rlc.reg_list_format_size_bytes =
969                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
970         adev->gfx.rlc.reg_list_size_bytes =
971                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
972
973         adev->gfx.rlc.register_list_format =
974                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
975                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
976
977         if (!adev->gfx.rlc.register_list_format) {
978                 err = -ENOMEM;
979                 goto out;
980         }
981
982         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
984         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
985                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
986
987         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
988
989         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
990                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
991         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
992                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
993
994         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
995         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
996         if (err)
997                 goto out;
998         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
999         if (err)
1000                 goto out;
1001         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1002         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1003         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1004
1005         if ((adev->asic_type != CHIP_STONEY) &&
1006             (adev->asic_type != CHIP_TOPAZ)) {
1007                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1008                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1009                 if (!err) {
1010                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1011                         if (err)
1012                                 goto out;
1013                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1014                                 adev->gfx.mec2_fw->data;
1015                         adev->gfx.mec2_fw_version =
1016                                 le32_to_cpu(cp_hdr->header.ucode_version);
1017                         adev->gfx.mec2_feature_version =
1018                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1019                 } else {
1020                         err = 0;
1021                         adev->gfx.mec2_fw = NULL;
1022                 }
1023         }
1024
1025         if (adev->firmware.smu_load) {
1026                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1027                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1028                 info->fw = adev->gfx.pfp_fw;
1029                 header = (const struct common_firmware_header *)info->fw->data;
1030                 adev->firmware.fw_size +=
1031                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032
1033                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1034                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1035                 info->fw = adev->gfx.me_fw;
1036                 header = (const struct common_firmware_header *)info->fw->data;
1037                 adev->firmware.fw_size +=
1038                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039
1040                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1041                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1042                 info->fw = adev->gfx.ce_fw;
1043                 header = (const struct common_firmware_header *)info->fw->data;
1044                 adev->firmware.fw_size +=
1045                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046
1047                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1048                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1049                 info->fw = adev->gfx.rlc_fw;
1050                 header = (const struct common_firmware_header *)info->fw->data;
1051                 adev->firmware.fw_size +=
1052                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1055                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1056                 info->fw = adev->gfx.mec_fw;
1057                 header = (const struct common_firmware_header *)info->fw->data;
1058                 adev->firmware.fw_size +=
1059                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061                 if (adev->gfx.mec2_fw) {
1062                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1063                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1064                         info->fw = adev->gfx.mec2_fw;
1065                         header = (const struct common_firmware_header *)info->fw->data;
1066                         adev->firmware.fw_size +=
1067                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1068                 }
1069
1070         }
1071
1072 out:
1073         if (err) {
1074                 dev_err(adev->dev,
1075                         "gfx8: Failed to load firmware \"%s\"\n",
1076                         fw_name);
1077                 release_firmware(adev->gfx.pfp_fw);
1078                 adev->gfx.pfp_fw = NULL;
1079                 release_firmware(adev->gfx.me_fw);
1080                 adev->gfx.me_fw = NULL;
1081                 release_firmware(adev->gfx.ce_fw);
1082                 adev->gfx.ce_fw = NULL;
1083                 release_firmware(adev->gfx.rlc_fw);
1084                 adev->gfx.rlc_fw = NULL;
1085                 release_firmware(adev->gfx.mec_fw);
1086                 adev->gfx.mec_fw = NULL;
1087                 release_firmware(adev->gfx.mec2_fw);
1088                 adev->gfx.mec2_fw = NULL;
1089         }
1090         return err;
1091 }
1092
1093 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1094                                     volatile u32 *buffer)
1095 {
1096         u32 count = 0, i;
1097         const struct cs_section_def *sect = NULL;
1098         const struct cs_extent_def *ext = NULL;
1099
1100         if (adev->gfx.rlc.cs_data == NULL)
1101                 return;
1102         if (buffer == NULL)
1103                 return;
1104
1105         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1106         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1107
1108         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1109         buffer[count++] = cpu_to_le32(0x80000000);
1110         buffer[count++] = cpu_to_le32(0x80000000);
1111
1112         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1113                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1114                         if (sect->id == SECT_CONTEXT) {
1115                                 buffer[count++] =
1116                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1117                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1118                                                 PACKET3_SET_CONTEXT_REG_START);
1119                                 for (i = 0; i < ext->reg_count; i++)
1120                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1121                         } else {
1122                                 return;
1123                         }
1124                 }
1125         }
1126
1127         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1128         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1129                         PACKET3_SET_CONTEXT_REG_START);
1130         switch (adev->asic_type) {
1131         case CHIP_TONGA:
1132         case CHIP_POLARIS10:
1133                 buffer[count++] = cpu_to_le32(0x16000012);
1134                 buffer[count++] = cpu_to_le32(0x0000002A);
1135                 break;
1136         case CHIP_POLARIS11:
1137                 buffer[count++] = cpu_to_le32(0x16000012);
1138                 buffer[count++] = cpu_to_le32(0x00000000);
1139                 break;
1140         case CHIP_FIJI:
1141                 buffer[count++] = cpu_to_le32(0x3a00161a);
1142                 buffer[count++] = cpu_to_le32(0x0000002e);
1143                 break;
1144         case CHIP_TOPAZ:
1145         case CHIP_CARRIZO:
1146                 buffer[count++] = cpu_to_le32(0x00000002);
1147                 buffer[count++] = cpu_to_le32(0x00000000);
1148                 break;
1149         case CHIP_STONEY:
1150                 buffer[count++] = cpu_to_le32(0x00000000);
1151                 buffer[count++] = cpu_to_le32(0x00000000);
1152                 break;
1153         default:
1154                 buffer[count++] = cpu_to_le32(0x00000000);
1155                 buffer[count++] = cpu_to_le32(0x00000000);
1156                 break;
1157         }
1158
1159         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1160         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1161
1162         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1163         buffer[count++] = cpu_to_le32(0);
1164 }
1165
1166 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1167 {
1168         const __le32 *fw_data;
1169         volatile u32 *dst_ptr;
1170         int me, i, max_me = 4;
1171         u32 bo_offset = 0;
1172         u32 table_offset, table_size;
1173
1174         if (adev->asic_type == CHIP_CARRIZO)
1175                 max_me = 5;
1176
1177         /* write the cp table buffer */
1178         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1179         for (me = 0; me < max_me; me++) {
1180                 if (me == 0) {
1181                         const struct gfx_firmware_header_v1_0 *hdr =
1182                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1183                         fw_data = (const __le32 *)
1184                                 (adev->gfx.ce_fw->data +
1185                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1186                         table_offset = le32_to_cpu(hdr->jt_offset);
1187                         table_size = le32_to_cpu(hdr->jt_size);
1188                 } else if (me == 1) {
1189                         const struct gfx_firmware_header_v1_0 *hdr =
1190                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1191                         fw_data = (const __le32 *)
1192                                 (adev->gfx.pfp_fw->data +
1193                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194                         table_offset = le32_to_cpu(hdr->jt_offset);
1195                         table_size = le32_to_cpu(hdr->jt_size);
1196                 } else if (me == 2) {
1197                         const struct gfx_firmware_header_v1_0 *hdr =
1198                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1199                         fw_data = (const __le32 *)
1200                                 (adev->gfx.me_fw->data +
1201                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202                         table_offset = le32_to_cpu(hdr->jt_offset);
1203                         table_size = le32_to_cpu(hdr->jt_size);
1204                 } else if (me == 3) {
1205                         const struct gfx_firmware_header_v1_0 *hdr =
1206                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1207                         fw_data = (const __le32 *)
1208                                 (adev->gfx.mec_fw->data +
1209                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210                         table_offset = le32_to_cpu(hdr->jt_offset);
1211                         table_size = le32_to_cpu(hdr->jt_size);
1212                 } else  if (me == 4) {
1213                         const struct gfx_firmware_header_v1_0 *hdr =
1214                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1215                         fw_data = (const __le32 *)
1216                                 (adev->gfx.mec2_fw->data +
1217                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218                         table_offset = le32_to_cpu(hdr->jt_offset);
1219                         table_size = le32_to_cpu(hdr->jt_size);
1220                 }
1221
1222                 for (i = 0; i < table_size; i ++) {
1223                         dst_ptr[bo_offset + i] =
1224                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1225                 }
1226
1227                 bo_offset += table_size;
1228         }
1229 }
1230
1231 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1232 {
1233         int r;
1234
1235         /* clear state block */
1236         if (adev->gfx.rlc.clear_state_obj) {
1237                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1238                 if (unlikely(r != 0))
1239                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1240                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1241                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1242                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1243                 adev->gfx.rlc.clear_state_obj = NULL;
1244         }
1245
1246         /* jump table block */
1247         if (adev->gfx.rlc.cp_table_obj) {
1248                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1249                 if (unlikely(r != 0))
1250                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1251                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1252                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1253                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1254                 adev->gfx.rlc.cp_table_obj = NULL;
1255         }
1256 }
1257
1258 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1259 {
1260         volatile u32 *dst_ptr;
1261         u32 dws;
1262         const struct cs_section_def *cs_data;
1263         int r;
1264
1265         adev->gfx.rlc.cs_data = vi_cs_data;
1266
1267         cs_data = adev->gfx.rlc.cs_data;
1268
1269         if (cs_data) {
1270                 /* clear state block */
1271                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1272
1273                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1274                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1275                                              AMDGPU_GEM_DOMAIN_VRAM,
1276                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1277                                              NULL, NULL,
1278                                              &adev->gfx.rlc.clear_state_obj);
1279                         if (r) {
1280                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281                                 gfx_v8_0_rlc_fini(adev);
1282                                 return r;
1283                         }
1284                 }
1285                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286                 if (unlikely(r != 0)) {
1287                         gfx_v8_0_rlc_fini(adev);
1288                         return r;
1289                 }
1290                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291                                   &adev->gfx.rlc.clear_state_gpu_addr);
1292                 if (r) {
1293                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295                         gfx_v8_0_rlc_fini(adev);
1296                         return r;
1297                 }
1298
1299                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300                 if (r) {
1301                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302                         gfx_v8_0_rlc_fini(adev);
1303                         return r;
1304                 }
1305                 /* set up the cs buffer */
1306                 dst_ptr = adev->gfx.rlc.cs_ptr;
1307                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310         }
1311
1312         if ((adev->asic_type == CHIP_CARRIZO) ||
1313             (adev->asic_type == CHIP_STONEY)) {
1314                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1316                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317                                              AMDGPU_GEM_DOMAIN_VRAM,
1318                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1319                                              NULL, NULL,
1320                                              &adev->gfx.rlc.cp_table_obj);
1321                         if (r) {
1322                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1323                                 return r;
1324                         }
1325                 }
1326
1327                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1328                 if (unlikely(r != 0)) {
1329                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1330                         return r;
1331                 }
1332                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1333                                   &adev->gfx.rlc.cp_table_gpu_addr);
1334                 if (r) {
1335                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1336                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1337                         return r;
1338                 }
1339                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1340                 if (r) {
1341                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1342                         return r;
1343                 }
1344
1345                 cz_init_cp_jump_table(adev);
1346
1347                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1348                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1349         }
1350
1351         return 0;
1352 }
1353
1354 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1355 {
1356         int r;
1357
1358         if (adev->gfx.mec.hpd_eop_obj) {
1359                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1360                 if (unlikely(r != 0))
1361                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1362                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1363                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1364                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1365                 adev->gfx.mec.hpd_eop_obj = NULL;
1366         }
1367 }
1368
1369 #define MEC_HPD_SIZE 2048
1370
1371 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1372 {
1373         int r;
1374         u32 *hpd;
1375
1376         /*
1377          * we assign only 1 pipe because all other pipes will
1378          * be handled by KFD
1379          */
1380         adev->gfx.mec.num_mec = 1;
1381         adev->gfx.mec.num_pipe = 1;
1382         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1383
1384         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1385                 r = amdgpu_bo_create(adev,
1386                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1387                                      PAGE_SIZE, true,
1388                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1389                                      &adev->gfx.mec.hpd_eop_obj);
1390                 if (r) {
1391                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1392                         return r;
1393                 }
1394         }
1395
1396         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1397         if (unlikely(r != 0)) {
1398                 gfx_v8_0_mec_fini(adev);
1399                 return r;
1400         }
1401         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1402                           &adev->gfx.mec.hpd_eop_gpu_addr);
1403         if (r) {
1404                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1405                 gfx_v8_0_mec_fini(adev);
1406                 return r;
1407         }
1408         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1409         if (r) {
1410                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1411                 gfx_v8_0_mec_fini(adev);
1412                 return r;
1413         }
1414
1415         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1416
1417         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1418         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1419
1420         return 0;
1421 }
1422
1423 static const u32 vgpr_init_compute_shader[] =
1424 {
1425         0x7e000209, 0x7e020208,
1426         0x7e040207, 0x7e060206,
1427         0x7e080205, 0x7e0a0204,
1428         0x7e0c0203, 0x7e0e0202,
1429         0x7e100201, 0x7e120200,
1430         0x7e140209, 0x7e160208,
1431         0x7e180207, 0x7e1a0206,
1432         0x7e1c0205, 0x7e1e0204,
1433         0x7e200203, 0x7e220202,
1434         0x7e240201, 0x7e260200,
1435         0x7e280209, 0x7e2a0208,
1436         0x7e2c0207, 0x7e2e0206,
1437         0x7e300205, 0x7e320204,
1438         0x7e340203, 0x7e360202,
1439         0x7e380201, 0x7e3a0200,
1440         0x7e3c0209, 0x7e3e0208,
1441         0x7e400207, 0x7e420206,
1442         0x7e440205, 0x7e460204,
1443         0x7e480203, 0x7e4a0202,
1444         0x7e4c0201, 0x7e4e0200,
1445         0x7e500209, 0x7e520208,
1446         0x7e540207, 0x7e560206,
1447         0x7e580205, 0x7e5a0204,
1448         0x7e5c0203, 0x7e5e0202,
1449         0x7e600201, 0x7e620200,
1450         0x7e640209, 0x7e660208,
1451         0x7e680207, 0x7e6a0206,
1452         0x7e6c0205, 0x7e6e0204,
1453         0x7e700203, 0x7e720202,
1454         0x7e740201, 0x7e760200,
1455         0x7e780209, 0x7e7a0208,
1456         0x7e7c0207, 0x7e7e0206,
1457         0xbf8a0000, 0xbf810000,
1458 };
1459
1460 static const u32 sgpr_init_compute_shader[] =
1461 {
1462         0xbe8a0100, 0xbe8c0102,
1463         0xbe8e0104, 0xbe900106,
1464         0xbe920108, 0xbe940100,
1465         0xbe960102, 0xbe980104,
1466         0xbe9a0106, 0xbe9c0108,
1467         0xbe9e0100, 0xbea00102,
1468         0xbea20104, 0xbea40106,
1469         0xbea60108, 0xbea80100,
1470         0xbeaa0102, 0xbeac0104,
1471         0xbeae0106, 0xbeb00108,
1472         0xbeb20100, 0xbeb40102,
1473         0xbeb60104, 0xbeb80106,
1474         0xbeba0108, 0xbebc0100,
1475         0xbebe0102, 0xbec00104,
1476         0xbec20106, 0xbec40108,
1477         0xbec60100, 0xbec80102,
1478         0xbee60004, 0xbee70005,
1479         0xbeea0006, 0xbeeb0007,
1480         0xbee80008, 0xbee90009,
1481         0xbefc0000, 0xbf8a0000,
1482         0xbf810000, 0x00000000,
1483 };
1484
1485 static const u32 vgpr_init_regs[] =
1486 {
1487         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1488         mmCOMPUTE_RESOURCE_LIMITS, 0,
1489         mmCOMPUTE_NUM_THREAD_X, 256*4,
1490         mmCOMPUTE_NUM_THREAD_Y, 1,
1491         mmCOMPUTE_NUM_THREAD_Z, 1,
1492         mmCOMPUTE_PGM_RSRC2, 20,
1493         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1494         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1495         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1496         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1497         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1498         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1499         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1500         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1501         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1502         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1503 };
1504
1505 static const u32 sgpr1_init_regs[] =
1506 {
1507         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1508         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1509         mmCOMPUTE_NUM_THREAD_X, 256*5,
1510         mmCOMPUTE_NUM_THREAD_Y, 1,
1511         mmCOMPUTE_NUM_THREAD_Z, 1,
1512         mmCOMPUTE_PGM_RSRC2, 20,
1513         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1514         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1515         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1516         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1517         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1518         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1519         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1520         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1521         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1522         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1523 };
1524
1525 static const u32 sgpr2_init_regs[] =
1526 {
1527         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1528         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1529         mmCOMPUTE_NUM_THREAD_X, 256*5,
1530         mmCOMPUTE_NUM_THREAD_Y, 1,
1531         mmCOMPUTE_NUM_THREAD_Z, 1,
1532         mmCOMPUTE_PGM_RSRC2, 20,
1533         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1534         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1535         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1536         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1537         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1538         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1539         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1540         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1541         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1542         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1543 };
1544
1545 static const u32 sec_ded_counter_registers[] =
1546 {
1547         mmCPC_EDC_ATC_CNT,
1548         mmCPC_EDC_SCRATCH_CNT,
1549         mmCPC_EDC_UCODE_CNT,
1550         mmCPF_EDC_ATC_CNT,
1551         mmCPF_EDC_ROQ_CNT,
1552         mmCPF_EDC_TAG_CNT,
1553         mmCPG_EDC_ATC_CNT,
1554         mmCPG_EDC_DMA_CNT,
1555         mmCPG_EDC_TAG_CNT,
1556         mmDC_EDC_CSINVOC_CNT,
1557         mmDC_EDC_RESTORE_CNT,
1558         mmDC_EDC_STATE_CNT,
1559         mmGDS_EDC_CNT,
1560         mmGDS_EDC_GRBM_CNT,
1561         mmGDS_EDC_OA_DED,
1562         mmSPI_EDC_CNT,
1563         mmSQC_ATC_EDC_GATCL1_CNT,
1564         mmSQC_EDC_CNT,
1565         mmSQ_EDC_DED_CNT,
1566         mmSQ_EDC_INFO,
1567         mmSQ_EDC_SEC_CNT,
1568         mmTCC_EDC_CNT,
1569         mmTCP_ATC_EDC_GATCL1_CNT,
1570         mmTCP_EDC_CNT,
1571         mmTD_EDC_CNT
1572 };
1573
1574 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1575 {
1576         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1577         struct amdgpu_ib ib;
1578         struct fence *f = NULL;
1579         int r, i;
1580         u32 tmp;
1581         unsigned total_size, vgpr_offset, sgpr_offset;
1582         u64 gpu_addr;
1583
1584         /* only supported on CZ */
1585         if (adev->asic_type != CHIP_CARRIZO)
1586                 return 0;
1587
1588         /* bail if the compute ring is not ready */
1589         if (!ring->ready)
1590                 return 0;
1591
1592         tmp = RREG32(mmGB_EDC_MODE);
1593         WREG32(mmGB_EDC_MODE, 0);
1594
1595         total_size =
1596                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1597         total_size +=
1598                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1599         total_size +=
1600                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1601         total_size = ALIGN(total_size, 256);
1602         vgpr_offset = total_size;
1603         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1604         sgpr_offset = total_size;
1605         total_size += sizeof(sgpr_init_compute_shader);
1606
1607         /* allocate an indirect buffer to put the commands in */
1608         memset(&ib, 0, sizeof(ib));
1609         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1610         if (r) {
1611                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1612                 return r;
1613         }
1614
1615         /* load the compute shaders */
1616         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1617                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1618
1619         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1620                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1621
1622         /* init the ib length to 0 */
1623         ib.length_dw = 0;
1624
1625         /* VGPR */
1626         /* write the register state for the compute dispatch */
1627         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1628                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1630                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1631         }
1632         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1634         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638
1639         /* write dispatch packet */
1640         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641         ib.ptr[ib.length_dw++] = 8; /* x */
1642         ib.ptr[ib.length_dw++] = 1; /* y */
1643         ib.ptr[ib.length_dw++] = 1; /* z */
1644         ib.ptr[ib.length_dw++] =
1645                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646
1647         /* write CS partial flush packet */
1648         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650
1651         /* SGPR1 */
1652         /* write the register state for the compute dispatch */
1653         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1654                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1656                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1657         }
1658         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665         /* write dispatch packet */
1666         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667         ib.ptr[ib.length_dw++] = 8; /* x */
1668         ib.ptr[ib.length_dw++] = 1; /* y */
1669         ib.ptr[ib.length_dw++] = 1; /* z */
1670         ib.ptr[ib.length_dw++] =
1671                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673         /* write CS partial flush packet */
1674         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677         /* SGPR2 */
1678         /* write the register state for the compute dispatch */
1679         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1680                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1682                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1683         }
1684         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690
1691         /* write dispatch packet */
1692         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693         ib.ptr[ib.length_dw++] = 8; /* x */
1694         ib.ptr[ib.length_dw++] = 1; /* y */
1695         ib.ptr[ib.length_dw++] = 1; /* z */
1696         ib.ptr[ib.length_dw++] =
1697                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698
1699         /* write CS partial flush packet */
1700         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702
1703         /* shedule the ib on the ring */
1704         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1705         if (r) {
1706                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1707                 goto fail;
1708         }
1709
1710         /* wait for the GPU to finish processing the IB */
1711         r = fence_wait(f, false);
1712         if (r) {
1713                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1714                 goto fail;
1715         }
1716
1717         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1718         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1719         WREG32(mmGB_EDC_MODE, tmp);
1720
1721         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1722         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1723         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1724
1725
1726         /* read back registers to clear the counters */
1727         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1728                 RREG32(sec_ded_counter_registers[i]);
1729
1730 fail:
1731         amdgpu_ib_free(adev, &ib, NULL);
1732         fence_put(f);
1733
1734         return r;
1735 }
1736
1737 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1738 {
1739         u32 gb_addr_config;
1740         u32 mc_shared_chmap, mc_arb_ramcfg;
1741         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1742         u32 tmp;
1743         int ret;
1744
1745         switch (adev->asic_type) {
1746         case CHIP_TOPAZ:
1747                 adev->gfx.config.max_shader_engines = 1;
1748                 adev->gfx.config.max_tile_pipes = 2;
1749                 adev->gfx.config.max_cu_per_sh = 6;
1750                 adev->gfx.config.max_sh_per_se = 1;
1751                 adev->gfx.config.max_backends_per_se = 2;
1752                 adev->gfx.config.max_texture_channel_caches = 2;
1753                 adev->gfx.config.max_gprs = 256;
1754                 adev->gfx.config.max_gs_threads = 32;
1755                 adev->gfx.config.max_hw_contexts = 8;
1756
1757                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1758                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1759                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1760                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1761                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1762                 break;
1763         case CHIP_FIJI:
1764                 adev->gfx.config.max_shader_engines = 4;
1765                 adev->gfx.config.max_tile_pipes = 16;
1766                 adev->gfx.config.max_cu_per_sh = 16;
1767                 adev->gfx.config.max_sh_per_se = 1;
1768                 adev->gfx.config.max_backends_per_se = 4;
1769                 adev->gfx.config.max_texture_channel_caches = 16;
1770                 adev->gfx.config.max_gprs = 256;
1771                 adev->gfx.config.max_gs_threads = 32;
1772                 adev->gfx.config.max_hw_contexts = 8;
1773
1774                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1775                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1776                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1777                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1778                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1779                 break;
1780         case CHIP_POLARIS11:
1781                 ret = amdgpu_atombios_get_gfx_info(adev);
1782                 if (ret)
1783                         return ret;
1784                 adev->gfx.config.max_gprs = 256;
1785                 adev->gfx.config.max_gs_threads = 32;
1786                 adev->gfx.config.max_hw_contexts = 8;
1787
1788                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1789                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1790                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1791                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1792                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1793                 break;
1794         case CHIP_POLARIS10:
1795                 ret = amdgpu_atombios_get_gfx_info(adev);
1796                 if (ret)
1797                         return ret;
1798                 adev->gfx.config.max_gprs = 256;
1799                 adev->gfx.config.max_gs_threads = 32;
1800                 adev->gfx.config.max_hw_contexts = 8;
1801
1802                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1803                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1804                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1805                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1806                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1807                 break;
1808         case CHIP_TONGA:
1809                 adev->gfx.config.max_shader_engines = 4;
1810                 adev->gfx.config.max_tile_pipes = 8;
1811                 adev->gfx.config.max_cu_per_sh = 8;
1812                 adev->gfx.config.max_sh_per_se = 1;
1813                 adev->gfx.config.max_backends_per_se = 2;
1814                 adev->gfx.config.max_texture_channel_caches = 8;
1815                 adev->gfx.config.max_gprs = 256;
1816                 adev->gfx.config.max_gs_threads = 32;
1817                 adev->gfx.config.max_hw_contexts = 8;
1818
1819                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1820                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1821                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1822                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1823                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1824                 break;
1825         case CHIP_CARRIZO:
1826                 adev->gfx.config.max_shader_engines = 1;
1827                 adev->gfx.config.max_tile_pipes = 2;
1828                 adev->gfx.config.max_sh_per_se = 1;
1829                 adev->gfx.config.max_backends_per_se = 2;
1830
1831                 switch (adev->pdev->revision) {
1832                 case 0xc4:
1833                 case 0x84:
1834                 case 0xc8:
1835                 case 0xcc:
1836                 case 0xe1:
1837                 case 0xe3:
1838                         /* B10 */
1839                         adev->gfx.config.max_cu_per_sh = 8;
1840                         break;
1841                 case 0xc5:
1842                 case 0x81:
1843                 case 0x85:
1844                 case 0xc9:
1845                 case 0xcd:
1846                 case 0xe2:
1847                 case 0xe4:
1848                         /* B8 */
1849                         adev->gfx.config.max_cu_per_sh = 6;
1850                         break;
1851                 case 0xc6:
1852                 case 0xca:
1853                 case 0xce:
1854                 case 0x88:
1855                         /* B6 */
1856                         adev->gfx.config.max_cu_per_sh = 6;
1857                         break;
1858                 case 0xc7:
1859                 case 0x87:
1860                 case 0xcb:
1861                 case 0xe5:
1862                 case 0x89:
1863                 default:
1864                         /* B4 */
1865                         adev->gfx.config.max_cu_per_sh = 4;
1866                         break;
1867                 }
1868
1869                 adev->gfx.config.max_texture_channel_caches = 2;
1870                 adev->gfx.config.max_gprs = 256;
1871                 adev->gfx.config.max_gs_threads = 32;
1872                 adev->gfx.config.max_hw_contexts = 8;
1873
1874                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1878                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1879                 break;
1880         case CHIP_STONEY:
1881                 adev->gfx.config.max_shader_engines = 1;
1882                 adev->gfx.config.max_tile_pipes = 2;
1883                 adev->gfx.config.max_sh_per_se = 1;
1884                 adev->gfx.config.max_backends_per_se = 1;
1885
1886                 switch (adev->pdev->revision) {
1887                 case 0xc0:
1888                 case 0xc1:
1889                 case 0xc2:
1890                 case 0xc4:
1891                 case 0xc8:
1892                 case 0xc9:
1893                         adev->gfx.config.max_cu_per_sh = 3;
1894                         break;
1895                 case 0xd0:
1896                 case 0xd1:
1897                 case 0xd2:
1898                 default:
1899                         adev->gfx.config.max_cu_per_sh = 2;
1900                         break;
1901                 }
1902
1903                 adev->gfx.config.max_texture_channel_caches = 2;
1904                 adev->gfx.config.max_gprs = 256;
1905                 adev->gfx.config.max_gs_threads = 16;
1906                 adev->gfx.config.max_hw_contexts = 8;
1907
1908                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1909                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1910                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1911                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1912                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1913                 break;
1914         default:
1915                 adev->gfx.config.max_shader_engines = 2;
1916                 adev->gfx.config.max_tile_pipes = 4;
1917                 adev->gfx.config.max_cu_per_sh = 2;
1918                 adev->gfx.config.max_sh_per_se = 1;
1919                 adev->gfx.config.max_backends_per_se = 2;
1920                 adev->gfx.config.max_texture_channel_caches = 4;
1921                 adev->gfx.config.max_gprs = 256;
1922                 adev->gfx.config.max_gs_threads = 32;
1923                 adev->gfx.config.max_hw_contexts = 8;
1924
1925                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1926                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1927                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1928                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1929                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1930                 break;
1931         }
1932
1933         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1934         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1935         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1936
1937         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1938         adev->gfx.config.mem_max_burst_length_bytes = 256;
1939         if (adev->flags & AMD_IS_APU) {
1940                 /* Get memory bank mapping mode. */
1941                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1942                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1943                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1944
1945                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1946                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1947                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1948
1949                 /* Validate settings in case only one DIMM installed. */
1950                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1951                         dimm00_addr_map = 0;
1952                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1953                         dimm01_addr_map = 0;
1954                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1955                         dimm10_addr_map = 0;
1956                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1957                         dimm11_addr_map = 0;
1958
1959                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1960                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1961                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1962                         adev->gfx.config.mem_row_size_in_kb = 2;
1963                 else
1964                         adev->gfx.config.mem_row_size_in_kb = 1;
1965         } else {
1966                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1967                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1968                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1969                         adev->gfx.config.mem_row_size_in_kb = 4;
1970         }
1971
1972         adev->gfx.config.shader_engine_tile_size = 32;
1973         adev->gfx.config.num_gpus = 1;
1974         adev->gfx.config.multi_gpu_tile_size = 64;
1975
1976         /* fix up row size */
1977         switch (adev->gfx.config.mem_row_size_in_kb) {
1978         case 1:
1979         default:
1980                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1981                 break;
1982         case 2:
1983                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1984                 break;
1985         case 4:
1986                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1987                 break;
1988         }
1989         adev->gfx.config.gb_addr_config = gb_addr_config;
1990
1991         return 0;
1992 }
1993
1994 static int gfx_v8_0_sw_init(void *handle)
1995 {
1996         int i, r;
1997         struct amdgpu_ring *ring;
1998         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1999
2000         /* EOP Event */
2001         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2002         if (r)
2003                 return r;
2004
2005         /* Privileged reg */
2006         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2007         if (r)
2008                 return r;
2009
2010         /* Privileged inst */
2011         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2012         if (r)
2013                 return r;
2014
2015         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2016
2017         gfx_v8_0_scratch_init(adev);
2018
2019         r = gfx_v8_0_init_microcode(adev);
2020         if (r) {
2021                 DRM_ERROR("Failed to load gfx firmware!\n");
2022                 return r;
2023         }
2024
2025         r = gfx_v8_0_rlc_init(adev);
2026         if (r) {
2027                 DRM_ERROR("Failed to init rlc BOs!\n");
2028                 return r;
2029         }
2030
2031         r = gfx_v8_0_mec_init(adev);
2032         if (r) {
2033                 DRM_ERROR("Failed to init MEC BOs!\n");
2034                 return r;
2035         }
2036
2037         /* set up the gfx ring */
2038         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2039                 ring = &adev->gfx.gfx_ring[i];
2040                 ring->ring_obj = NULL;
2041                 sprintf(ring->name, "gfx");
2042                 /* no gfx doorbells on iceland */
2043                 if (adev->asic_type != CHIP_TOPAZ) {
2044                         ring->use_doorbell = true;
2045                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2046                 }
2047
2048                 r = amdgpu_ring_init(adev, ring, 1024,
2049                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2050                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2051                                      AMDGPU_RING_TYPE_GFX);
2052                 if (r)
2053                         return r;
2054         }
2055
2056         /* set up the compute queues */
2057         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058                 unsigned irq_type;
2059
2060                 /* max 32 queues per MEC */
2061                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2063                         break;
2064                 }
2065                 ring = &adev->gfx.compute_ring[i];
2066                 ring->ring_obj = NULL;
2067                 ring->use_doorbell = true;
2068                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069                 ring->me = 1; /* first MEC */
2070                 ring->pipe = i / 8;
2071                 ring->queue = i % 8;
2072                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2075                 r = amdgpu_ring_init(adev, ring, 1024,
2076                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2077                                      &adev->gfx.eop_irq, irq_type,
2078                                      AMDGPU_RING_TYPE_COMPUTE);
2079                 if (r)
2080                         return r;
2081         }
2082
2083         /* reserve GDS, GWS and OA resource for gfx */
2084         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2085                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2086                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2087         if (r)
2088                 return r;
2089
2090         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2091                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2092                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2093         if (r)
2094                 return r;
2095
2096         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2097                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2098                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2099         if (r)
2100                 return r;
2101
2102         adev->gfx.ce_ram_size = 0x8000;
2103
2104         r = gfx_v8_0_gpu_early_init(adev);
2105         if (r)
2106                 return r;
2107
2108         return 0;
2109 }
2110
2111 static int gfx_v8_0_sw_fini(void *handle)
2112 {
2113         int i;
2114         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2115
2116         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2117         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2118         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2119
2120         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2121                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2122         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2123                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2124
2125         gfx_v8_0_mec_fini(adev);
2126         gfx_v8_0_rlc_fini(adev);
2127         gfx_v8_0_free_microcode(adev);
2128
2129         return 0;
2130 }
2131
2132 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2133 {
2134         uint32_t *modearray, *mod2array;
2135         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2136         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2137         u32 reg_offset;
2138
2139         modearray = adev->gfx.config.tile_mode_array;
2140         mod2array = adev->gfx.config.macrotile_mode_array;
2141
2142         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2143                 modearray[reg_offset] = 0;
2144
2145         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2146                 mod2array[reg_offset] = 0;
2147
2148         switch (adev->asic_type) {
2149         case CHIP_TOPAZ:
2150                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151                                 PIPE_CONFIG(ADDR_SURF_P2) |
2152                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155                                 PIPE_CONFIG(ADDR_SURF_P2) |
2156                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2159                                 PIPE_CONFIG(ADDR_SURF_P2) |
2160                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2163                                 PIPE_CONFIG(ADDR_SURF_P2) |
2164                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2166                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2167                                 PIPE_CONFIG(ADDR_SURF_P2) |
2168                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2169                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2170                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2171                                 PIPE_CONFIG(ADDR_SURF_P2) |
2172                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2173                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2174                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175                                 PIPE_CONFIG(ADDR_SURF_P2) |
2176                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2177                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2178                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2179                                 PIPE_CONFIG(ADDR_SURF_P2));
2180                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2181                                 PIPE_CONFIG(ADDR_SURF_P2) |
2182                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2183                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2208                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2221                                  PIPE_CONFIG(ADDR_SURF_P2) |
2222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2225                                  PIPE_CONFIG(ADDR_SURF_P2) |
2226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2228                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2229                                  PIPE_CONFIG(ADDR_SURF_P2) |
2230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2232                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2233                                  PIPE_CONFIG(ADDR_SURF_P2) |
2234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2236                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2237                                  PIPE_CONFIG(ADDR_SURF_P2) |
2238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2240                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2241                                  PIPE_CONFIG(ADDR_SURF_P2) |
2242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245                                  PIPE_CONFIG(ADDR_SURF_P2) |
2246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2248                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2249                                  PIPE_CONFIG(ADDR_SURF_P2) |
2250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2252
2253                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256                                 NUM_BANKS(ADDR_SURF_8_BANK));
2257                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2258                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260                                 NUM_BANKS(ADDR_SURF_8_BANK));
2261                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2263                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                 NUM_BANKS(ADDR_SURF_8_BANK));
2265                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                                 NUM_BANKS(ADDR_SURF_8_BANK));
2269                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2271                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2272                                 NUM_BANKS(ADDR_SURF_8_BANK));
2273                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276                                 NUM_BANKS(ADDR_SURF_8_BANK));
2277                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2279                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2280                                 NUM_BANKS(ADDR_SURF_8_BANK));
2281                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2282                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2283                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284                                 NUM_BANKS(ADDR_SURF_16_BANK));
2285                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                 NUM_BANKS(ADDR_SURF_16_BANK));
2289                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2290                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2291                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2292                                  NUM_BANKS(ADDR_SURF_16_BANK));
2293                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2294                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2295                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2296                                  NUM_BANKS(ADDR_SURF_16_BANK));
2297                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2299                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300                                  NUM_BANKS(ADDR_SURF_16_BANK));
2301                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2304                                  NUM_BANKS(ADDR_SURF_16_BANK));
2305                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2306                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2307                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2308                                  NUM_BANKS(ADDR_SURF_8_BANK));
2309
2310                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2311                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2312                             reg_offset != 23)
2313                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2314
2315                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2316                         if (reg_offset != 7)
2317                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2318
2319                 break;
2320         case CHIP_FIJI:
2321                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2324                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2325                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2328                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2332                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2336                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2337                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2338                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2340                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2341                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2344                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2345                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2346                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2348                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2351                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2352                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2353                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2354                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2355                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2356                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2364                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2367                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2372                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2376                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2379                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2380                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2387                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2388                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2391                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2392                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2396                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2399                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2400                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2403                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2404                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2407                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2408                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2411                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2412                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2415                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2416                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2419                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2420                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2423                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2424                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2427                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443
2444                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447                                 NUM_BANKS(ADDR_SURF_8_BANK));
2448                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451                                 NUM_BANKS(ADDR_SURF_8_BANK));
2452                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2454                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455                                 NUM_BANKS(ADDR_SURF_8_BANK));
2456                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459                                 NUM_BANKS(ADDR_SURF_8_BANK));
2460                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                                 NUM_BANKS(ADDR_SURF_8_BANK));
2464                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467                                 NUM_BANKS(ADDR_SURF_8_BANK));
2468                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                                 NUM_BANKS(ADDR_SURF_8_BANK));
2472                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2474                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475                                 NUM_BANKS(ADDR_SURF_8_BANK));
2476                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479                                 NUM_BANKS(ADDR_SURF_8_BANK));
2480                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2482                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                                  NUM_BANKS(ADDR_SURF_8_BANK));
2484                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                                  NUM_BANKS(ADDR_SURF_8_BANK));
2488                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491                                  NUM_BANKS(ADDR_SURF_8_BANK));
2492                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                                  NUM_BANKS(ADDR_SURF_8_BANK));
2496                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                                  NUM_BANKS(ADDR_SURF_4_BANK));
2500
2501                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2502                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2503
2504                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505                         if (reg_offset != 7)
2506                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2507
2508                 break;
2509         case CHIP_TONGA:
2510                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2513                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2514                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2517                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2521                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2522                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2523                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2525                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2526                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2529                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2530                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2533                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2534                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2537                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2538                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2541                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2542                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2543                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2544                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2556                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2561                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2567                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2569                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2571                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2573                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2575                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2576                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2577                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2578                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2579                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2580                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2581                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2583                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2585                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2587                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2588                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2589                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2591                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2592                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2593                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2595                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2596                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2597                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2599                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2600                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2601                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2603                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2604                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2605                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2607                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2608                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2609                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2611                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2612                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2613                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2615                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2616                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2617                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2619                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2623                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2627                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2629                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2631                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2632
2633                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2635                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2636                                 NUM_BANKS(ADDR_SURF_16_BANK));
2637                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2639                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2640                                 NUM_BANKS(ADDR_SURF_16_BANK));
2641                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2643                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2644                                 NUM_BANKS(ADDR_SURF_16_BANK));
2645                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2647                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2648                                 NUM_BANKS(ADDR_SURF_16_BANK));
2649                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2651                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2652                                 NUM_BANKS(ADDR_SURF_16_BANK));
2653                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2656                                 NUM_BANKS(ADDR_SURF_16_BANK));
2657                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2660                                 NUM_BANKS(ADDR_SURF_16_BANK));
2661                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2663                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2664                                 NUM_BANKS(ADDR_SURF_16_BANK));
2665                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2667                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2668                                 NUM_BANKS(ADDR_SURF_16_BANK));
2669                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2671                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2672                                  NUM_BANKS(ADDR_SURF_16_BANK));
2673                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676                                  NUM_BANKS(ADDR_SURF_16_BANK));
2677                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2680                                  NUM_BANKS(ADDR_SURF_8_BANK));
2681                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2683                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2684                                  NUM_BANKS(ADDR_SURF_4_BANK));
2685                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2688                                  NUM_BANKS(ADDR_SURF_4_BANK));
2689
2690                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2691                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2692
2693                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2694                         if (reg_offset != 7)
2695                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2696
2697                 break;
2698         case CHIP_POLARIS11:
2699                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2702                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2710                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2711                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2714                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2715                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2718                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2719                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2722                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2723                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2726                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2727                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2730                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2733                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2736                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2740                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2744                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2745                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2748                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2750                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2752                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2753                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2756                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2757                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2758                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2760                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2761                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2762                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2764                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2765                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2766                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2768                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2769                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2770                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2773                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2774                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2776                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2778                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2782                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2784                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2785                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2786                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2788                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2789                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2790                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2792                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2793                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2794                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2796                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2797                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2798                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2800                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2802                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2808                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2817                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2821
2822                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856
2857                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861
2862                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2863                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865                                 NUM_BANKS(ADDR_SURF_16_BANK));
2866
2867                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2869                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870                                 NUM_BANKS(ADDR_SURF_16_BANK));
2871
2872                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876
2877                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880                                 NUM_BANKS(ADDR_SURF_16_BANK));
2881
2882                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2885                                 NUM_BANKS(ADDR_SURF_8_BANK));
2886
2887                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2889                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2890                                 NUM_BANKS(ADDR_SURF_4_BANK));
2891
2892                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2893                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2894
2895                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2896                         if (reg_offset != 7)
2897                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2898
2899                 break;
2900         case CHIP_POLARIS10:
2901                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2904                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2908                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2912                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2913                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2916                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2920                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2921                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2924                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2928                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2929                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2931                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2932                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2935                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2936                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2940                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2944                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2947                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2949                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2951                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2952                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2954                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2960                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2964                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2967                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2968                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2969                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2971                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2972                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2975                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2976                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2978                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2980                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2984                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2986                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2987                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2988                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2990                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2991                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2992                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2994                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2995                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2996                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2998                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2999                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3000                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3002                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3004                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3008                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3010                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3014                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3016                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3018                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3019                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3021                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3022                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3023
3024                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                                 NUM_BANKS(ADDR_SURF_16_BANK));
3053
3054                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057                                 NUM_BANKS(ADDR_SURF_16_BANK));
3058
3059                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062                                 NUM_BANKS(ADDR_SURF_16_BANK));
3063
3064                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3066                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067                                 NUM_BANKS(ADDR_SURF_16_BANK));
3068
3069                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072                                 NUM_BANKS(ADDR_SURF_16_BANK));
3073
3074                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077                                 NUM_BANKS(ADDR_SURF_16_BANK));
3078
3079                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082                                 NUM_BANKS(ADDR_SURF_8_BANK));
3083
3084                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087                                 NUM_BANKS(ADDR_SURF_4_BANK));
3088
3089                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3092                                 NUM_BANKS(ADDR_SURF_4_BANK));
3093
3094                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3095                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3096
3097                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3098                         if (reg_offset != 7)
3099                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3100
3101                 break;
3102         case CHIP_STONEY:
3103                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P2) |
3105                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3106                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3107                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                 PIPE_CONFIG(ADDR_SURF_P2) |
3109                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3110                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3111                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112                                 PIPE_CONFIG(ADDR_SURF_P2) |
3113                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3114                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3115                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116                                 PIPE_CONFIG(ADDR_SURF_P2) |
3117                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3118                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3119                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120                                 PIPE_CONFIG(ADDR_SURF_P2) |
3121                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3122                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3123                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3124                                 PIPE_CONFIG(ADDR_SURF_P2) |
3125                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3126                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3127                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                 PIPE_CONFIG(ADDR_SURF_P2) |
3129                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3131                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3132                                 PIPE_CONFIG(ADDR_SURF_P2));
3133                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134                                 PIPE_CONFIG(ADDR_SURF_P2) |
3135                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3136                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138                                  PIPE_CONFIG(ADDR_SURF_P2) |
3139                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3140                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146                                  PIPE_CONFIG(ADDR_SURF_P2) |
3147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3149                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3150                                  PIPE_CONFIG(ADDR_SURF_P2) |
3151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3153                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3154                                  PIPE_CONFIG(ADDR_SURF_P2) |
3155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3157                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3158                                  PIPE_CONFIG(ADDR_SURF_P2) |
3159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3161                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3162                                  PIPE_CONFIG(ADDR_SURF_P2) |
3163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3165                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3166                                  PIPE_CONFIG(ADDR_SURF_P2) |
3167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3170                                  PIPE_CONFIG(ADDR_SURF_P2) |
3171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3173                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3174                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3177                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3178                                  PIPE_CONFIG(ADDR_SURF_P2) |
3179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3181                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3182                                  PIPE_CONFIG(ADDR_SURF_P2) |
3183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3185                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3186                                  PIPE_CONFIG(ADDR_SURF_P2) |
3187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3189                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3190                                  PIPE_CONFIG(ADDR_SURF_P2) |
3191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3194                                  PIPE_CONFIG(ADDR_SURF_P2) |
3195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3197                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198                                  PIPE_CONFIG(ADDR_SURF_P2) |
3199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202                                  PIPE_CONFIG(ADDR_SURF_P2) |
3203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3205
3206                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3207                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3208                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3209                                 NUM_BANKS(ADDR_SURF_8_BANK));
3210                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3212                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213                                 NUM_BANKS(ADDR_SURF_8_BANK));
3214                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3216                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3217                                 NUM_BANKS(ADDR_SURF_8_BANK));
3218                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3219                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3220                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3221                                 NUM_BANKS(ADDR_SURF_8_BANK));
3222                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3223                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3224                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3225                                 NUM_BANKS(ADDR_SURF_8_BANK));
3226                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3228                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3229                                 NUM_BANKS(ADDR_SURF_8_BANK));
3230                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3233                                 NUM_BANKS(ADDR_SURF_8_BANK));
3234                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3235                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3236                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237                                 NUM_BANKS(ADDR_SURF_16_BANK));
3238                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241                                 NUM_BANKS(ADDR_SURF_16_BANK));
3242                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3243                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3244                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3245                                  NUM_BANKS(ADDR_SURF_16_BANK));
3246                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3248                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249                                  NUM_BANKS(ADDR_SURF_16_BANK));
3250                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253                                  NUM_BANKS(ADDR_SURF_16_BANK));
3254                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3256                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3257                                  NUM_BANKS(ADDR_SURF_16_BANK));
3258                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3261                                  NUM_BANKS(ADDR_SURF_8_BANK));
3262
3263                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3264                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3265                             reg_offset != 23)
3266                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3267
3268                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3269                         if (reg_offset != 7)
3270                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3271
3272                 break;
3273         default:
3274                 dev_warn(adev->dev,
3275                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3276                          adev->asic_type);
3277
3278         case CHIP_CARRIZO:
3279                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280                                 PIPE_CONFIG(ADDR_SURF_P2) |
3281                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3283                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284                                 PIPE_CONFIG(ADDR_SURF_P2) |
3285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288                                 PIPE_CONFIG(ADDR_SURF_P2) |
3289                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                 PIPE_CONFIG(ADDR_SURF_P2) |
3293                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296                                 PIPE_CONFIG(ADDR_SURF_P2) |
3297                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3300                                 PIPE_CONFIG(ADDR_SURF_P2) |
3301                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304                                 PIPE_CONFIG(ADDR_SURF_P2) |
3305                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3307                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3308                                 PIPE_CONFIG(ADDR_SURF_P2));
3309                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3310                                 PIPE_CONFIG(ADDR_SURF_P2) |
3311                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3312                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3314                                  PIPE_CONFIG(ADDR_SURF_P2) |
3315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3318                                  PIPE_CONFIG(ADDR_SURF_P2) |
3319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3321                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3322                                  PIPE_CONFIG(ADDR_SURF_P2) |
3323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3325                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3326                                  PIPE_CONFIG(ADDR_SURF_P2) |
3327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3330                                  PIPE_CONFIG(ADDR_SURF_P2) |
3331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3333                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3334                                  PIPE_CONFIG(ADDR_SURF_P2) |
3335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3337                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3338                                  PIPE_CONFIG(ADDR_SURF_P2) |
3339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3342                                  PIPE_CONFIG(ADDR_SURF_P2) |
3343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3346                                  PIPE_CONFIG(ADDR_SURF_P2) |
3347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3350                                  PIPE_CONFIG(ADDR_SURF_P2) |
3351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3354                                  PIPE_CONFIG(ADDR_SURF_P2) |
3355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3358                                  PIPE_CONFIG(ADDR_SURF_P2) |
3359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3362                                  PIPE_CONFIG(ADDR_SURF_P2) |
3363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3366                                  PIPE_CONFIG(ADDR_SURF_P2) |
3367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3370                                  PIPE_CONFIG(ADDR_SURF_P2) |
3371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3373                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3374                                  PIPE_CONFIG(ADDR_SURF_P2) |
3375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3377                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3378                                  PIPE_CONFIG(ADDR_SURF_P2) |
3379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3381
3382                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3384                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385                                 NUM_BANKS(ADDR_SURF_8_BANK));
3386                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389                                 NUM_BANKS(ADDR_SURF_8_BANK));
3390                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3392                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3393                                 NUM_BANKS(ADDR_SURF_8_BANK));
3394                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397                                 NUM_BANKS(ADDR_SURF_8_BANK));
3398                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401                                 NUM_BANKS(ADDR_SURF_8_BANK));
3402                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405                                 NUM_BANKS(ADDR_SURF_8_BANK));
3406                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409                                 NUM_BANKS(ADDR_SURF_8_BANK));
3410                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3411                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3412                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413                                 NUM_BANKS(ADDR_SURF_16_BANK));
3414                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3415                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3416                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417                                 NUM_BANKS(ADDR_SURF_16_BANK));
3418                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3419                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3420                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421                                  NUM_BANKS(ADDR_SURF_16_BANK));
3422                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3423                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3424                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425                                  NUM_BANKS(ADDR_SURF_16_BANK));
3426                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3428                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429                                  NUM_BANKS(ADDR_SURF_16_BANK));
3430                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3433                                  NUM_BANKS(ADDR_SURF_16_BANK));
3434                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3437                                  NUM_BANKS(ADDR_SURF_8_BANK));
3438
3439                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3440                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3441                             reg_offset != 23)
3442                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3443
3444                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3445                         if (reg_offset != 7)
3446                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3447
3448                 break;
3449         }
3450 }
3451
3452 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3453                                   u32 se_num, u32 sh_num, u32 instance)
3454 {
3455         u32 data;
3456
3457         if (instance == 0xffffffff)
3458                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3459         else
3460                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3461
3462         if (se_num == 0xffffffff)
3463                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3464         else
3465                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3466
3467         if (sh_num == 0xffffffff)
3468                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3469         else
3470                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3471
3472         WREG32(mmGRBM_GFX_INDEX, data);
3473 }
3474
3475 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3476 {
3477         return (u32)((1ULL << bit_width) - 1);
3478 }
3479
3480 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3481 {
3482         u32 data, mask;
3483
3484         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3485                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3486
3487         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3488
3489         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3490                                        adev->gfx.config.max_sh_per_se);
3491
3492         return (~data) & mask;
3493 }
3494
3495 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3496 {
3497         int i, j;
3498         u32 data;
3499         u32 active_rbs = 0;
3500         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3501                                         adev->gfx.config.max_sh_per_se;
3502
3503         mutex_lock(&adev->grbm_idx_mutex);
3504         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3505                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3506                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3507                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3508                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3509                                                rb_bitmap_width_per_sh);
3510                 }
3511         }
3512         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3513         mutex_unlock(&adev->grbm_idx_mutex);
3514
3515         adev->gfx.config.backend_enable_mask = active_rbs;
3516         adev->gfx.config.num_rbs = hweight32(active_rbs);
3517 }
3518
3519 /**
3520  * gfx_v8_0_init_compute_vmid - gart enable
3521  *
3522  * @rdev: amdgpu_device pointer
3523  *
3524  * Initialize compute vmid sh_mem registers
3525  *
3526  */
3527 #define DEFAULT_SH_MEM_BASES    (0x6000)
3528 #define FIRST_COMPUTE_VMID      (8)
3529 #define LAST_COMPUTE_VMID       (16)
3530 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3531 {
3532         int i;
3533         uint32_t sh_mem_config;
3534         uint32_t sh_mem_bases;
3535
3536         /*
3537          * Configure apertures:
3538          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3539          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3540          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3541          */
3542         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3543
3544         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3545                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3546                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3547                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3548                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3549                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3550
3551         mutex_lock(&adev->srbm_mutex);
3552         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3553                 vi_srbm_select(adev, 0, 0, 0, i);
3554                 /* CP and shaders */
3555                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3556                 WREG32(mmSH_MEM_APE1_BASE, 1);
3557                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3558                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3559         }
3560         vi_srbm_select(adev, 0, 0, 0, 0);
3561         mutex_unlock(&adev->srbm_mutex);
3562 }
3563
3564 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3565 {
3566         u32 tmp;
3567         int i;
3568
3569         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3570         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3571         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3572         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3573
3574         gfx_v8_0_tiling_mode_table_init(adev);
3575         gfx_v8_0_setup_rb(adev);
3576         gfx_v8_0_get_cu_info(adev);
3577
3578         /* XXX SH_MEM regs */
3579         /* where to put LDS, scratch, GPUVM in FSA64 space */
3580         mutex_lock(&adev->srbm_mutex);
3581         for (i = 0; i < 16; i++) {
3582                 vi_srbm_select(adev, 0, 0, 0, i);
3583                 /* CP and shaders */
3584                 if (i == 0) {
3585                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3586                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3587                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3588                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3589                         WREG32(mmSH_MEM_CONFIG, tmp);
3590                 } else {
3591                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3592                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3593                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3594                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3595                         WREG32(mmSH_MEM_CONFIG, tmp);
3596                 }
3597
3598                 WREG32(mmSH_MEM_APE1_BASE, 1);
3599                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3600                 WREG32(mmSH_MEM_BASES, 0);
3601         }
3602         vi_srbm_select(adev, 0, 0, 0, 0);
3603         mutex_unlock(&adev->srbm_mutex);
3604
3605         gfx_v8_0_init_compute_vmid(adev);
3606
3607         mutex_lock(&adev->grbm_idx_mutex);
3608         /*
3609          * making sure that the following register writes will be broadcasted
3610          * to all the shaders
3611          */
3612         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3613
3614         WREG32(mmPA_SC_FIFO_SIZE,
3615                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3616                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3617                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3618                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3619                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3620                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3621                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3622                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3623         mutex_unlock(&adev->grbm_idx_mutex);
3624
3625 }
3626
3627 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3628 {
3629         u32 i, j, k;
3630         u32 mask;
3631
3632         mutex_lock(&adev->grbm_idx_mutex);
3633         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3634                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3635                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3636                         for (k = 0; k < adev->usec_timeout; k++) {
3637                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3638                                         break;
3639                                 udelay(1);
3640                         }
3641                 }
3642         }
3643         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3644         mutex_unlock(&adev->grbm_idx_mutex);
3645
3646         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3647                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3648                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3649                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3650         for (k = 0; k < adev->usec_timeout; k++) {
3651                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3652                         break;
3653                 udelay(1);
3654         }
3655 }
3656
3657 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3658                                                bool enable)
3659 {
3660         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3661
3662         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3663         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3664         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3665         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3666
3667         WREG32(mmCP_INT_CNTL_RING0, tmp);
3668 }
3669
3670 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3671 {
3672         /* csib */
3673         WREG32(mmRLC_CSIB_ADDR_HI,
3674                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3675         WREG32(mmRLC_CSIB_ADDR_LO,
3676                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3677         WREG32(mmRLC_CSIB_LENGTH,
3678                         adev->gfx.rlc.clear_state_size);
3679 }
3680
3681 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3682                                 int ind_offset,
3683                                 int list_size,
3684                                 int *unique_indices,
3685                                 int *indices_count,
3686                                 int max_indices,
3687                                 int *ind_start_offsets,
3688                                 int *offset_count,
3689                                 int max_offset)
3690 {
3691         int indices;
3692         bool new_entry = true;
3693
3694         for (; ind_offset < list_size; ind_offset++) {
3695
3696                 if (new_entry) {
3697                         new_entry = false;
3698                         ind_start_offsets[*offset_count] = ind_offset;
3699                         *offset_count = *offset_count + 1;
3700                         BUG_ON(*offset_count >= max_offset);
3701                 }
3702
3703                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3704                         new_entry = true;
3705                         continue;
3706                 }
3707
3708                 ind_offset += 2;
3709
3710                 /* look for the matching indice */
3711                 for (indices = 0;
3712                         indices < *indices_count;
3713                         indices++) {
3714                         if (unique_indices[indices] ==
3715                                 register_list_format[ind_offset])
3716                                 break;
3717                 }
3718
3719                 if (indices >= *indices_count) {
3720                         unique_indices[*indices_count] =
3721                                 register_list_format[ind_offset];
3722                         indices = *indices_count;
3723                         *indices_count = *indices_count + 1;
3724                         BUG_ON(*indices_count >= max_indices);
3725                 }
3726
3727                 register_list_format[ind_offset] = indices;
3728         }
3729 }
3730
3731 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3732 {
3733         int i, temp, data;
3734         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3735         int indices_count = 0;
3736         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3737         int offset_count = 0;
3738
3739         int list_size;
3740         unsigned int *register_list_format =
3741                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3742         if (register_list_format == NULL)
3743                 return -ENOMEM;
3744         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3745                         adev->gfx.rlc.reg_list_format_size_bytes);
3746
3747         gfx_v8_0_parse_ind_reg_list(register_list_format,
3748                                 RLC_FormatDirectRegListLength,
3749                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3750                                 unique_indices,
3751                                 &indices_count,
3752                                 sizeof(unique_indices) / sizeof(int),
3753                                 indirect_start_offsets,
3754                                 &offset_count,
3755                                 sizeof(indirect_start_offsets)/sizeof(int));
3756
3757         /* save and restore list */
3758         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3759
3760         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3761         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3762                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3763
3764         /* indirect list */
3765         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3766         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3767                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3768
3769         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3770         list_size = list_size >> 1;
3771         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3772         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3773
3774         /* starting offsets starts */
3775         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3776                 adev->gfx.rlc.starting_offsets_start);
3777         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3778                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3779                                 indirect_start_offsets[i]);
3780
3781         /* unique indices */
3782         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3783         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3784         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3785                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3786                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3787         }
3788         kfree(register_list_format);
3789
3790         return 0;
3791 }
3792
3793 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3794 {
3795         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3796 }
3797
3798 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3799 {
3800         uint32_t data;
3801
3802         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3803                               AMD_PG_SUPPORT_GFX_SMG |
3804                               AMD_PG_SUPPORT_GFX_DMG)) {
3805                 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3806
3807                 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3808                 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3809                 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3810                 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3811                 WREG32(mmRLC_PG_DELAY, data);
3812
3813                 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3814                 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3815         }
3816 }
3817
3818 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3819                                                 bool enable)
3820 {
3821         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3822 }
3823
3824 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3825                                                   bool enable)
3826 {
3827         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3828 }
3829
3830 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3831 {
3832         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3833 }
3834
3835 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3836 {
3837         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3838                               AMD_PG_SUPPORT_GFX_SMG |
3839                               AMD_PG_SUPPORT_GFX_DMG |
3840                               AMD_PG_SUPPORT_CP |
3841                               AMD_PG_SUPPORT_GDS |
3842                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3843                 gfx_v8_0_init_csb(adev);
3844                 gfx_v8_0_init_save_restore_list(adev);
3845                 gfx_v8_0_enable_save_restore_machine(adev);
3846
3847                 if ((adev->asic_type == CHIP_CARRIZO) ||
3848                     (adev->asic_type == CHIP_STONEY)) {
3849                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3850                         gfx_v8_0_init_power_gating(adev);
3851                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3852                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3853                                 cz_enable_sck_slow_down_on_power_up(adev, true);
3854                                 cz_enable_sck_slow_down_on_power_down(adev, true);
3855                         } else {
3856                                 cz_enable_sck_slow_down_on_power_up(adev, false);
3857                                 cz_enable_sck_slow_down_on_power_down(adev, false);
3858                         }
3859                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3860                                 cz_enable_cp_power_gating(adev, true);
3861                         else
3862                                 cz_enable_cp_power_gating(adev, false);
3863                 } else if (adev->asic_type == CHIP_POLARIS11) {
3864                         gfx_v8_0_init_power_gating(adev);
3865                 }
3866         }
3867 }
3868
3869 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3870 {
3871         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3872
3873         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3874         gfx_v8_0_wait_for_rlc_serdes(adev);
3875 }
3876
3877 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3878 {
3879         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3880         udelay(50);
3881
3882         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3883         udelay(50);
3884 }
3885
3886 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3887 {
3888         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
3889
3890         /* carrizo do enable cp interrupt after cp inited */
3891         if (!(adev->flags & AMD_IS_APU))
3892                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3893
3894         udelay(50);
3895 }
3896
3897 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3898 {
3899         const struct rlc_firmware_header_v2_0 *hdr;
3900         const __le32 *fw_data;
3901         unsigned i, fw_size;
3902
3903         if (!adev->gfx.rlc_fw)
3904                 return -EINVAL;
3905
3906         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3907         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3908
3909         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3910                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3911         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3912
3913         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3914         for (i = 0; i < fw_size; i++)
3915                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3916         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3917
3918         return 0;
3919 }
3920
3921 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3922 {
3923         int r;
3924
3925         gfx_v8_0_rlc_stop(adev);
3926
3927         /* disable CG */
3928         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3929         if (adev->asic_type == CHIP_POLARIS11 ||
3930             adev->asic_type == CHIP_POLARIS10)
3931                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3932
3933         /* disable PG */
3934         WREG32(mmRLC_PG_CNTL, 0);
3935
3936         gfx_v8_0_rlc_reset(adev);
3937         gfx_v8_0_init_pg(adev);
3938
3939         if (!adev->pp_enabled) {
3940                 if (!adev->firmware.smu_load) {
3941                         /* legacy rlc firmware loading */
3942                         r = gfx_v8_0_rlc_load_microcode(adev);
3943                         if (r)
3944                                 return r;
3945                 } else {
3946                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3947                                                         AMDGPU_UCODE_ID_RLC_G);
3948                         if (r)
3949                                 return -EINVAL;
3950                 }
3951         }
3952
3953         gfx_v8_0_rlc_start(adev);
3954
3955         return 0;
3956 }
3957
3958 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3959 {
3960         int i;
3961         u32 tmp = RREG32(mmCP_ME_CNTL);
3962
3963         if (enable) {
3964                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3965                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3966                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3967         } else {
3968                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3969                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3970                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3971                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3972                         adev->gfx.gfx_ring[i].ready = false;
3973         }
3974         WREG32(mmCP_ME_CNTL, tmp);
3975         udelay(50);
3976 }
3977
3978 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3979 {
3980         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3981         const struct gfx_firmware_header_v1_0 *ce_hdr;
3982         const struct gfx_firmware_header_v1_0 *me_hdr;
3983         const __le32 *fw_data;
3984         unsigned i, fw_size;
3985
3986         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3987                 return -EINVAL;
3988
3989         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3990                 adev->gfx.pfp_fw->data;
3991         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3992                 adev->gfx.ce_fw->data;
3993         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3994                 adev->gfx.me_fw->data;
3995
3996         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3997         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3998         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3999
4000         gfx_v8_0_cp_gfx_enable(adev, false);
4001
4002         /* PFP */
4003         fw_data = (const __le32 *)
4004                 (adev->gfx.pfp_fw->data +
4005                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4006         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4007         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4008         for (i = 0; i < fw_size; i++)
4009                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4010         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4011
4012         /* CE */
4013         fw_data = (const __le32 *)
4014                 (adev->gfx.ce_fw->data +
4015                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4016         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4017         WREG32(mmCP_CE_UCODE_ADDR, 0);
4018         for (i = 0; i < fw_size; i++)
4019                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4020         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4021
4022         /* ME */
4023         fw_data = (const __le32 *)
4024                 (adev->gfx.me_fw->data +
4025                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4026         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4027         WREG32(mmCP_ME_RAM_WADDR, 0);
4028         for (i = 0; i < fw_size; i++)
4029                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4030         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4031
4032         return 0;
4033 }
4034
4035 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4036 {
4037         u32 count = 0;
4038         const struct cs_section_def *sect = NULL;
4039         const struct cs_extent_def *ext = NULL;
4040
4041         /* begin clear state */
4042         count += 2;
4043         /* context control state */
4044         count += 3;
4045
4046         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4047                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4048                         if (sect->id == SECT_CONTEXT)
4049                                 count += 2 + ext->reg_count;
4050                         else
4051                                 return 0;
4052                 }
4053         }
4054         /* pa_sc_raster_config/pa_sc_raster_config1 */
4055         count += 4;
4056         /* end clear state */
4057         count += 2;
4058         /* clear state */
4059         count += 2;
4060
4061         return count;
4062 }
4063
4064 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4065 {
4066         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4067         const struct cs_section_def *sect = NULL;
4068         const struct cs_extent_def *ext = NULL;
4069         int r, i;
4070
4071         /* init the CP */
4072         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4073         WREG32(mmCP_ENDIAN_SWAP, 0);
4074         WREG32(mmCP_DEVICE_ID, 1);
4075
4076         gfx_v8_0_cp_gfx_enable(adev, true);
4077
4078         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4079         if (r) {
4080                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4081                 return r;
4082         }
4083
4084         /* clear state buffer */
4085         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4086         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4087
4088         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4089         amdgpu_ring_write(ring, 0x80000000);
4090         amdgpu_ring_write(ring, 0x80000000);
4091
4092         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4093                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4094                         if (sect->id == SECT_CONTEXT) {
4095                                 amdgpu_ring_write(ring,
4096                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4097                                                ext->reg_count));
4098                                 amdgpu_ring_write(ring,
4099                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4100                                 for (i = 0; i < ext->reg_count; i++)
4101                                         amdgpu_ring_write(ring, ext->extent[i]);
4102                         }
4103                 }
4104         }
4105
4106         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4107         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4108         switch (adev->asic_type) {
4109         case CHIP_TONGA:
4110         case CHIP_POLARIS10:
4111                 amdgpu_ring_write(ring, 0x16000012);
4112                 amdgpu_ring_write(ring, 0x0000002A);
4113                 break;
4114         case CHIP_POLARIS11:
4115                 amdgpu_ring_write(ring, 0x16000012);
4116                 amdgpu_ring_write(ring, 0x00000000);
4117                 break;
4118         case CHIP_FIJI:
4119                 amdgpu_ring_write(ring, 0x3a00161a);
4120                 amdgpu_ring_write(ring, 0x0000002e);
4121                 break;
4122         case CHIP_CARRIZO:
4123                 amdgpu_ring_write(ring, 0x00000002);
4124                 amdgpu_ring_write(ring, 0x00000000);
4125                 break;
4126         case CHIP_TOPAZ:
4127                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4128                                 0x00000000 : 0x00000002);
4129                 amdgpu_ring_write(ring, 0x00000000);
4130                 break;
4131         case CHIP_STONEY:
4132                 amdgpu_ring_write(ring, 0x00000000);
4133                 amdgpu_ring_write(ring, 0x00000000);
4134                 break;
4135         default:
4136                 BUG();
4137         }
4138
4139         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4140         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4141
4142         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4143         amdgpu_ring_write(ring, 0);
4144
4145         /* init the CE partitions */
4146         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4147         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4148         amdgpu_ring_write(ring, 0x8000);
4149         amdgpu_ring_write(ring, 0x8000);
4150
4151         amdgpu_ring_commit(ring);
4152
4153         return 0;
4154 }
4155
4156 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4157 {
4158         struct amdgpu_ring *ring;
4159         u32 tmp;
4160         u32 rb_bufsz;
4161         u64 rb_addr, rptr_addr;
4162         int r;
4163
4164         /* Set the write pointer delay */
4165         WREG32(mmCP_RB_WPTR_DELAY, 0);
4166
4167         /* set the RB to use vmid 0 */
4168         WREG32(mmCP_RB_VMID, 0);
4169
4170         /* Set ring buffer size */
4171         ring = &adev->gfx.gfx_ring[0];
4172         rb_bufsz = order_base_2(ring->ring_size / 8);
4173         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4174         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4175         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4176         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4177 #ifdef __BIG_ENDIAN
4178         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4179 #endif
4180         WREG32(mmCP_RB0_CNTL, tmp);
4181
4182         /* Initialize the ring buffer's read and write pointers */
4183         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4184         ring->wptr = 0;
4185         WREG32(mmCP_RB0_WPTR, ring->wptr);
4186
4187         /* set the wb address wether it's enabled or not */
4188         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4189         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4190         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4191
4192         mdelay(1);
4193         WREG32(mmCP_RB0_CNTL, tmp);
4194
4195         rb_addr = ring->gpu_addr >> 8;
4196         WREG32(mmCP_RB0_BASE, rb_addr);
4197         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4198
4199         /* no gfx doorbells on iceland */
4200         if (adev->asic_type != CHIP_TOPAZ) {
4201                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4202                 if (ring->use_doorbell) {
4203                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4204                                             DOORBELL_OFFSET, ring->doorbell_index);
4205                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4206                                             DOORBELL_HIT, 0);
4207                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4208                                             DOORBELL_EN, 1);
4209                 } else {
4210                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4211                                             DOORBELL_EN, 0);
4212                 }
4213                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4214
4215                 if (adev->asic_type == CHIP_TONGA) {
4216                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4217                                             DOORBELL_RANGE_LOWER,
4218                                             AMDGPU_DOORBELL_GFX_RING0);
4219                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4220
4221                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4222                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4223                 }
4224
4225         }
4226
4227         /* start the ring */
4228         gfx_v8_0_cp_gfx_start(adev);
4229         ring->ready = true;
4230         r = amdgpu_ring_test_ring(ring);
4231         if (r)
4232                 ring->ready = false;
4233
4234         return r;
4235 }
4236
4237 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4238 {
4239         int i;
4240
4241         if (enable) {
4242                 WREG32(mmCP_MEC_CNTL, 0);
4243         } else {
4244                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4245                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4246                         adev->gfx.compute_ring[i].ready = false;
4247         }
4248         udelay(50);
4249 }
4250
4251 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4252 {
4253         const struct gfx_firmware_header_v1_0 *mec_hdr;
4254         const __le32 *fw_data;
4255         unsigned i, fw_size;
4256
4257         if (!adev->gfx.mec_fw)
4258                 return -EINVAL;
4259
4260         gfx_v8_0_cp_compute_enable(adev, false);
4261
4262         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4263         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4264
4265         fw_data = (const __le32 *)
4266                 (adev->gfx.mec_fw->data +
4267                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4268         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4269
4270         /* MEC1 */
4271         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4272         for (i = 0; i < fw_size; i++)
4273                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4274         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4275
4276         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4277         if (adev->gfx.mec2_fw) {
4278                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4279
4280                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4281                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4282
4283                 fw_data = (const __le32 *)
4284                         (adev->gfx.mec2_fw->data +
4285                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4286                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4287
4288                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4289                 for (i = 0; i < fw_size; i++)
4290                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4291                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4292         }
4293
4294         return 0;
4295 }
4296
4297 struct vi_mqd {
4298         uint32_t header;  /* ordinal0 */
4299         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4300         uint32_t compute_dim_x;  /* ordinal2 */
4301         uint32_t compute_dim_y;  /* ordinal3 */
4302         uint32_t compute_dim_z;  /* ordinal4 */
4303         uint32_t compute_start_x;  /* ordinal5 */
4304         uint32_t compute_start_y;  /* ordinal6 */
4305         uint32_t compute_start_z;  /* ordinal7 */
4306         uint32_t compute_num_thread_x;  /* ordinal8 */
4307         uint32_t compute_num_thread_y;  /* ordinal9 */
4308         uint32_t compute_num_thread_z;  /* ordinal10 */
4309         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4310         uint32_t compute_perfcount_enable;  /* ordinal12 */
4311         uint32_t compute_pgm_lo;  /* ordinal13 */
4312         uint32_t compute_pgm_hi;  /* ordinal14 */
4313         uint32_t compute_tba_lo;  /* ordinal15 */
4314         uint32_t compute_tba_hi;  /* ordinal16 */
4315         uint32_t compute_tma_lo;  /* ordinal17 */
4316         uint32_t compute_tma_hi;  /* ordinal18 */
4317         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4318         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4319         uint32_t compute_vmid;  /* ordinal21 */
4320         uint32_t compute_resource_limits;  /* ordinal22 */
4321         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4322         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4323         uint32_t compute_tmpring_size;  /* ordinal25 */
4324         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4325         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4326         uint32_t compute_restart_x;  /* ordinal28 */
4327         uint32_t compute_restart_y;  /* ordinal29 */
4328         uint32_t compute_restart_z;  /* ordinal30 */
4329         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4330         uint32_t compute_misc_reserved;  /* ordinal32 */
4331         uint32_t compute_dispatch_id;  /* ordinal33 */
4332         uint32_t compute_threadgroup_id;  /* ordinal34 */
4333         uint32_t compute_relaunch;  /* ordinal35 */
4334         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4335         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4336         uint32_t compute_wave_restore_control;  /* ordinal38 */
4337         uint32_t reserved9;  /* ordinal39 */
4338         uint32_t reserved10;  /* ordinal40 */
4339         uint32_t reserved11;  /* ordinal41 */
4340         uint32_t reserved12;  /* ordinal42 */
4341         uint32_t reserved13;  /* ordinal43 */
4342         uint32_t reserved14;  /* ordinal44 */
4343         uint32_t reserved15;  /* ordinal45 */
4344         uint32_t reserved16;  /* ordinal46 */
4345         uint32_t reserved17;  /* ordinal47 */
4346         uint32_t reserved18;  /* ordinal48 */
4347         uint32_t reserved19;  /* ordinal49 */
4348         uint32_t reserved20;  /* ordinal50 */
4349         uint32_t reserved21;  /* ordinal51 */
4350         uint32_t reserved22;  /* ordinal52 */
4351         uint32_t reserved23;  /* ordinal53 */
4352         uint32_t reserved24;  /* ordinal54 */
4353         uint32_t reserved25;  /* ordinal55 */
4354         uint32_t reserved26;  /* ordinal56 */
4355         uint32_t reserved27;  /* ordinal57 */
4356         uint32_t reserved28;  /* ordinal58 */
4357         uint32_t reserved29;  /* ordinal59 */
4358         uint32_t reserved30;  /* ordinal60 */
4359         uint32_t reserved31;  /* ordinal61 */
4360         uint32_t reserved32;  /* ordinal62 */
4361         uint32_t reserved33;  /* ordinal63 */
4362         uint32_t reserved34;  /* ordinal64 */
4363         uint32_t compute_user_data_0;  /* ordinal65 */
4364         uint32_t compute_user_data_1;  /* ordinal66 */
4365         uint32_t compute_user_data_2;  /* ordinal67 */
4366         uint32_t compute_user_data_3;  /* ordinal68 */
4367         uint32_t compute_user_data_4;  /* ordinal69 */
4368         uint32_t compute_user_data_5;  /* ordinal70 */
4369         uint32_t compute_user_data_6;  /* ordinal71 */
4370         uint32_t compute_user_data_7;  /* ordinal72 */
4371         uint32_t compute_user_data_8;  /* ordinal73 */
4372         uint32_t compute_user_data_9;  /* ordinal74 */
4373         uint32_t compute_user_data_10;  /* ordinal75 */
4374         uint32_t compute_user_data_11;  /* ordinal76 */
4375         uint32_t compute_user_data_12;  /* ordinal77 */
4376         uint32_t compute_user_data_13;  /* ordinal78 */
4377         uint32_t compute_user_data_14;  /* ordinal79 */
4378         uint32_t compute_user_data_15;  /* ordinal80 */
4379         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4380         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4381         uint32_t reserved35;  /* ordinal83 */
4382         uint32_t reserved36;  /* ordinal84 */
4383         uint32_t reserved37;  /* ordinal85 */
4384         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4385         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4386         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4387         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4388         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4389         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4390         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4391         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4392         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4393         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4394         uint32_t reserved38;  /* ordinal96 */
4395         uint32_t reserved39;  /* ordinal97 */
4396         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4397         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4398         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4399         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4400         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4401         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4402         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4403         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4404         uint32_t reserved40;  /* ordinal106 */
4405         uint32_t reserved41;  /* ordinal107 */
4406         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4407         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4408         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4409         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4410         uint32_t reserved42;  /* ordinal112 */
4411         uint32_t reserved43;  /* ordinal113 */
4412         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4413         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4414         uint32_t cp_packet_id_lo;  /* ordinal116 */
4415         uint32_t cp_packet_id_hi;  /* ordinal117 */
4416         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4417         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4418         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4419         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4420         uint32_t gds_save_mask_lo;  /* ordinal122 */
4421         uint32_t gds_save_mask_hi;  /* ordinal123 */
4422         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4423         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4424         uint32_t reserved44;  /* ordinal126 */
4425         uint32_t reserved45;  /* ordinal127 */
4426         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4427         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4428         uint32_t cp_hqd_active;  /* ordinal130 */
4429         uint32_t cp_hqd_vmid;  /* ordinal131 */
4430         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4431         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4432         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4433         uint32_t cp_hqd_quantum;  /* ordinal135 */
4434         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4435         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4436         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4437         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4438         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4439         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4440         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4441         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4442         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4443         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4444         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4445         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4446         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4447         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4448         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4449         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4450         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4451         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4452         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4453         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4454         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4455         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4456         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4457         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4458         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4459         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4460         uint32_t cp_mqd_control;  /* ordinal162 */
4461         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4462         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4463         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4464         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4465         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4466         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4467         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4468         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4469         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4470         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4471         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4472         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4473         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4474         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4475         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4476         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4477         uint32_t cp_hqd_error;  /* ordinal179 */
4478         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4479         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4480         uint32_t reserved46;  /* ordinal182 */
4481         uint32_t reserved47;  /* ordinal183 */
4482         uint32_t reserved48;  /* ordinal184 */
4483         uint32_t reserved49;  /* ordinal185 */
4484         uint32_t reserved50;  /* ordinal186 */
4485         uint32_t reserved51;  /* ordinal187 */
4486         uint32_t reserved52;  /* ordinal188 */
4487         uint32_t reserved53;  /* ordinal189 */
4488         uint32_t reserved54;  /* ordinal190 */
4489         uint32_t reserved55;  /* ordinal191 */
4490         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4491         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4492         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4493         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4494         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4495         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4496         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4497         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4498         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4499         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4500         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4501         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4502         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4503         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4504         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4505         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4506         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4507         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4508         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4509         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4510         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4511         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4512         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4513         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4514         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4515         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4516         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4517         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4518         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4519         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4520         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4521         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4522         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4523         uint32_t reserved56;  /* ordinal225 */
4524         uint32_t reserved57;  /* ordinal226 */
4525         uint32_t reserved58;  /* ordinal227 */
4526         uint32_t set_resources_header;  /* ordinal228 */
4527         uint32_t set_resources_dw1;  /* ordinal229 */
4528         uint32_t set_resources_dw2;  /* ordinal230 */
4529         uint32_t set_resources_dw3;  /* ordinal231 */
4530         uint32_t set_resources_dw4;  /* ordinal232 */
4531         uint32_t set_resources_dw5;  /* ordinal233 */
4532         uint32_t set_resources_dw6;  /* ordinal234 */
4533         uint32_t set_resources_dw7;  /* ordinal235 */
4534         uint32_t reserved59;  /* ordinal236 */
4535         uint32_t reserved60;  /* ordinal237 */
4536         uint32_t reserved61;  /* ordinal238 */
4537         uint32_t reserved62;  /* ordinal239 */
4538         uint32_t reserved63;  /* ordinal240 */
4539         uint32_t reserved64;  /* ordinal241 */
4540         uint32_t reserved65;  /* ordinal242 */
4541         uint32_t reserved66;  /* ordinal243 */
4542         uint32_t reserved67;  /* ordinal244 */
4543         uint32_t reserved68;  /* ordinal245 */
4544         uint32_t reserved69;  /* ordinal246 */
4545         uint32_t reserved70;  /* ordinal247 */
4546         uint32_t reserved71;  /* ordinal248 */
4547         uint32_t reserved72;  /* ordinal249 */
4548         uint32_t reserved73;  /* ordinal250 */
4549         uint32_t reserved74;  /* ordinal251 */
4550         uint32_t reserved75;  /* ordinal252 */
4551         uint32_t reserved76;  /* ordinal253 */
4552         uint32_t reserved77;  /* ordinal254 */
4553         uint32_t reserved78;  /* ordinal255 */
4554
4555         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4556 };
4557
4558 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4559 {
4560         int i, r;
4561
4562         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4563                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4564
4565                 if (ring->mqd_obj) {
4566                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4567                         if (unlikely(r != 0))
4568                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4569
4570                         amdgpu_bo_unpin(ring->mqd_obj);
4571                         amdgpu_bo_unreserve(ring->mqd_obj);
4572
4573                         amdgpu_bo_unref(&ring->mqd_obj);
4574                         ring->mqd_obj = NULL;
4575                 }
4576         }
4577 }
4578
4579 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4580 {
4581         int r, i, j;
4582         u32 tmp;
4583         bool use_doorbell = true;
4584         u64 hqd_gpu_addr;
4585         u64 mqd_gpu_addr;
4586         u64 eop_gpu_addr;
4587         u64 wb_gpu_addr;
4588         u32 *buf;
4589         struct vi_mqd *mqd;
4590
4591         /* init the pipes */
4592         mutex_lock(&adev->srbm_mutex);
4593         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4594                 int me = (i < 4) ? 1 : 2;
4595                 int pipe = (i < 4) ? i : (i - 4);
4596
4597                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4598                 eop_gpu_addr >>= 8;
4599
4600                 vi_srbm_select(adev, me, pipe, 0, 0);
4601
4602                 /* write the EOP addr */
4603                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4604                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4605
4606                 /* set the VMID assigned */
4607                 WREG32(mmCP_HQD_VMID, 0);
4608
4609                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4610                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4611                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4612                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4613                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4614         }
4615         vi_srbm_select(adev, 0, 0, 0, 0);
4616         mutex_unlock(&adev->srbm_mutex);
4617
4618         /* init the queues.  Just two for now. */
4619         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4620                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4621
4622                 if (ring->mqd_obj == NULL) {
4623                         r = amdgpu_bo_create(adev,
4624                                              sizeof(struct vi_mqd),
4625                                              PAGE_SIZE, true,
4626                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4627                                              NULL, &ring->mqd_obj);
4628                         if (r) {
4629                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4630                                 return r;
4631                         }
4632                 }
4633
4634                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4635                 if (unlikely(r != 0)) {
4636                         gfx_v8_0_cp_compute_fini(adev);
4637                         return r;
4638                 }
4639                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4640                                   &mqd_gpu_addr);
4641                 if (r) {
4642                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4643                         gfx_v8_0_cp_compute_fini(adev);
4644                         return r;
4645                 }
4646                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4647                 if (r) {
4648                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4649                         gfx_v8_0_cp_compute_fini(adev);
4650                         return r;
4651                 }
4652
4653                 /* init the mqd struct */
4654                 memset(buf, 0, sizeof(struct vi_mqd));
4655
4656                 mqd = (struct vi_mqd *)buf;
4657                 mqd->header = 0xC0310800;
4658                 mqd->compute_pipelinestat_enable = 0x00000001;
4659                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4660                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4661                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4662                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4663                 mqd->compute_misc_reserved = 0x00000003;
4664
4665                 mutex_lock(&adev->srbm_mutex);
4666                 vi_srbm_select(adev, ring->me,
4667                                ring->pipe,
4668                                ring->queue, 0);
4669
4670                 /* disable wptr polling */
4671                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4672                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4673                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4674
4675                 mqd->cp_hqd_eop_base_addr_lo =
4676                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4677                 mqd->cp_hqd_eop_base_addr_hi =
4678                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4679
4680                 /* enable doorbell? */
4681                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4682                 if (use_doorbell) {
4683                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4684                 } else {
4685                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4686                 }
4687                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4688                 mqd->cp_hqd_pq_doorbell_control = tmp;
4689
4690                 /* disable the queue if it's active */
4691                 mqd->cp_hqd_dequeue_request = 0;
4692                 mqd->cp_hqd_pq_rptr = 0;
4693                 mqd->cp_hqd_pq_wptr= 0;
4694                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4695                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4696                         for (j = 0; j < adev->usec_timeout; j++) {
4697                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4698                                         break;
4699                                 udelay(1);
4700                         }
4701                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4702                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4703                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4704                 }
4705
4706                 /* set the pointer to the MQD */
4707                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4708                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4709                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4710                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4711
4712                 /* set MQD vmid to 0 */
4713                 tmp = RREG32(mmCP_MQD_CONTROL);
4714                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4715                 WREG32(mmCP_MQD_CONTROL, tmp);
4716                 mqd->cp_mqd_control = tmp;
4717
4718                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4719                 hqd_gpu_addr = ring->gpu_addr >> 8;
4720                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4721                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4722                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4723                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4724
4725                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4726                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4727                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4728                                     (order_base_2(ring->ring_size / 4) - 1));
4729                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4730                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4731 #ifdef __BIG_ENDIAN
4732                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4733 #endif
4734                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4735                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4736                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4737                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4738                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4739                 mqd->cp_hqd_pq_control = tmp;
4740
4741                 /* set the wb address wether it's enabled or not */
4742                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4743                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4744                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4745                         upper_32_bits(wb_gpu_addr) & 0xffff;
4746                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4747                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4748                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4749                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4750
4751                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4752                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4753                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4754                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4755                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4756                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4757                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4758
4759                 /* enable the doorbell if requested */
4760                 if (use_doorbell) {
4761                         if ((adev->asic_type == CHIP_CARRIZO) ||
4762                             (adev->asic_type == CHIP_FIJI) ||
4763                             (adev->asic_type == CHIP_STONEY) ||
4764                             (adev->asic_type == CHIP_POLARIS11) ||
4765                             (adev->asic_type == CHIP_POLARIS10)) {
4766                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4767                                        AMDGPU_DOORBELL_KIQ << 2);
4768                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4769                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4770                         }
4771                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4772                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4773                                             DOORBELL_OFFSET, ring->doorbell_index);
4774                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4775                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4776                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4777                         mqd->cp_hqd_pq_doorbell_control = tmp;
4778
4779                 } else {
4780                         mqd->cp_hqd_pq_doorbell_control = 0;
4781                 }
4782                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4783                        mqd->cp_hqd_pq_doorbell_control);
4784
4785                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4786                 ring->wptr = 0;
4787                 mqd->cp_hqd_pq_wptr = ring->wptr;
4788                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4789                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4790
4791                 /* set the vmid for the queue */
4792                 mqd->cp_hqd_vmid = 0;
4793                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4794
4795                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4796                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4797                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4798                 mqd->cp_hqd_persistent_state = tmp;
4799                 if (adev->asic_type == CHIP_STONEY ||
4800                         adev->asic_type == CHIP_POLARIS11 ||
4801                         adev->asic_type == CHIP_POLARIS10) {
4802                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4803                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4804                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4805                 }
4806
4807                 /* activate the queue */
4808                 mqd->cp_hqd_active = 1;
4809                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4810
4811                 vi_srbm_select(adev, 0, 0, 0, 0);
4812                 mutex_unlock(&adev->srbm_mutex);
4813
4814                 amdgpu_bo_kunmap(ring->mqd_obj);
4815                 amdgpu_bo_unreserve(ring->mqd_obj);
4816         }
4817
4818         if (use_doorbell) {
4819                 tmp = RREG32(mmCP_PQ_STATUS);
4820                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4821                 WREG32(mmCP_PQ_STATUS, tmp);
4822         }
4823
4824         gfx_v8_0_cp_compute_enable(adev, true);
4825
4826         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4827                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4828
4829                 ring->ready = true;
4830                 r = amdgpu_ring_test_ring(ring);
4831                 if (r)
4832                         ring->ready = false;
4833         }
4834
4835         return 0;
4836 }
4837
4838 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4839 {
4840         int r;
4841
4842         if (!(adev->flags & AMD_IS_APU))
4843                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4844
4845         if (!adev->pp_enabled) {
4846                 if (!adev->firmware.smu_load) {
4847                         /* legacy firmware loading */
4848                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4849                         if (r)
4850                                 return r;
4851
4852                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4853                         if (r)
4854                                 return r;
4855                 } else {
4856                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4857                                                         AMDGPU_UCODE_ID_CP_CE);
4858                         if (r)
4859                                 return -EINVAL;
4860
4861                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4862                                                         AMDGPU_UCODE_ID_CP_PFP);
4863                         if (r)
4864                                 return -EINVAL;
4865
4866                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4867                                                         AMDGPU_UCODE_ID_CP_ME);
4868                         if (r)
4869                                 return -EINVAL;
4870
4871                         if (adev->asic_type == CHIP_TOPAZ) {
4872                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4873                                 if (r)
4874                                         return r;
4875                         } else {
4876                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4877                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4878                                 if (r)
4879                                         return -EINVAL;
4880                         }
4881                 }
4882         }
4883
4884         r = gfx_v8_0_cp_gfx_resume(adev);
4885         if (r)
4886                 return r;
4887
4888         r = gfx_v8_0_cp_compute_resume(adev);
4889         if (r)
4890                 return r;
4891
4892         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4893
4894         return 0;
4895 }
4896
4897 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4898 {
4899         gfx_v8_0_cp_gfx_enable(adev, enable);
4900         gfx_v8_0_cp_compute_enable(adev, enable);
4901 }
4902
4903 static int gfx_v8_0_hw_init(void *handle)
4904 {
4905         int r;
4906         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4907
4908         gfx_v8_0_init_golden_registers(adev);
4909         gfx_v8_0_gpu_init(adev);
4910
4911         r = gfx_v8_0_rlc_resume(adev);
4912         if (r)
4913                 return r;
4914
4915         r = gfx_v8_0_cp_resume(adev);
4916
4917         return r;
4918 }
4919
4920 static int gfx_v8_0_hw_fini(void *handle)
4921 {
4922         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4923
4924         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4925         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4926         gfx_v8_0_cp_enable(adev, false);
4927         gfx_v8_0_rlc_stop(adev);
4928         gfx_v8_0_cp_compute_fini(adev);
4929
4930         amdgpu_set_powergating_state(adev,
4931                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4932
4933         return 0;
4934 }
4935
4936 static int gfx_v8_0_suspend(void *handle)
4937 {
4938         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4939
4940         return gfx_v8_0_hw_fini(adev);
4941 }
4942
4943 static int gfx_v8_0_resume(void *handle)
4944 {
4945         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4946
4947         return gfx_v8_0_hw_init(adev);
4948 }
4949
4950 static bool gfx_v8_0_is_idle(void *handle)
4951 {
4952         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4953
4954         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4955                 return false;
4956         else
4957                 return true;
4958 }
4959
4960 static int gfx_v8_0_wait_for_idle(void *handle)
4961 {
4962         unsigned i;
4963         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4964
4965         for (i = 0; i < adev->usec_timeout; i++) {
4966                 if (gfx_v8_0_is_idle(handle))
4967                         return 0;
4968
4969                 udelay(1);
4970         }
4971         return -ETIMEDOUT;
4972 }
4973
4974 static int gfx_v8_0_check_soft_reset(void *handle)
4975 {
4976         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4977         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4978         u32 tmp;
4979
4980         /* GRBM_STATUS */
4981         tmp = RREG32(mmGRBM_STATUS);
4982         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4983                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4984                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4985                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4986                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4987                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4988                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4989                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4990                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4991                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4992                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4993                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4994                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4995         }
4996
4997         /* GRBM_STATUS2 */
4998         tmp = RREG32(mmGRBM_STATUS2);
4999         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5000                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5001                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5002
5003         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5004             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5005             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5006                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5007                                                 SOFT_RESET_CPF, 1);
5008                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5009                                                 SOFT_RESET_CPC, 1);
5010                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5011                                                 SOFT_RESET_CPG, 1);
5012                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5013                                                 SOFT_RESET_GRBM, 1);
5014         }
5015
5016         /* SRBM_STATUS */
5017         tmp = RREG32(mmSRBM_STATUS);
5018         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5019                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5020                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5021         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5022                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5023                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5024
5025         if (grbm_soft_reset || srbm_soft_reset) {
5026                 adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = true;
5027                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5028                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5029         } else {
5030                 adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = false;
5031                 adev->gfx.grbm_soft_reset = 0;
5032                 adev->gfx.srbm_soft_reset = 0;
5033         }
5034
5035         return 0;
5036 }
5037
5038 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5039                                   struct amdgpu_ring *ring)
5040 {
5041         int i;
5042
5043         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5044         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5045                 u32 tmp;
5046                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5047                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5048                                     DEQUEUE_REQ, 2);
5049                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5050                 for (i = 0; i < adev->usec_timeout; i++) {
5051                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5052                                 break;
5053                         udelay(1);
5054                 }
5055         }
5056 }
5057
5058 static int gfx_v8_0_pre_soft_reset(void *handle)
5059 {
5060         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5061         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5062
5063         if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
5064                 return 0;
5065
5066         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5067         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5068
5069         /* stop the rlc */
5070         gfx_v8_0_rlc_stop(adev);
5071
5072         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5073             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5074                 /* Disable GFX parsing/prefetching */
5075                 gfx_v8_0_cp_gfx_enable(adev, false);
5076
5077         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5078             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5079             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5080             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5081                 int i;
5082
5083                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5084                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5085
5086                         gfx_v8_0_inactive_hqd(adev, ring);
5087                 }
5088                 /* Disable MEC parsing/prefetching */
5089                 gfx_v8_0_cp_compute_enable(adev, false);
5090         }
5091
5092        return 0;
5093 }
5094
5095 static int gfx_v8_0_soft_reset(void *handle)
5096 {
5097         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5098         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5099         u32 tmp;
5100
5101         if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
5102                 return 0;
5103
5104         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5105         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5106
5107         if (grbm_soft_reset || srbm_soft_reset) {
5108                 tmp = RREG32(mmGMCON_DEBUG);
5109                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5110                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5111                 WREG32(mmGMCON_DEBUG, tmp);
5112                 udelay(50);
5113         }
5114
5115         if (grbm_soft_reset) {
5116                 tmp = RREG32(mmGRBM_SOFT_RESET);
5117                 tmp |= grbm_soft_reset;
5118                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5119                 WREG32(mmGRBM_SOFT_RESET, tmp);
5120                 tmp = RREG32(mmGRBM_SOFT_RESET);
5121
5122                 udelay(50);
5123
5124                 tmp &= ~grbm_soft_reset;
5125                 WREG32(mmGRBM_SOFT_RESET, tmp);
5126                 tmp = RREG32(mmGRBM_SOFT_RESET);
5127         }
5128
5129         if (srbm_soft_reset) {
5130                 tmp = RREG32(mmSRBM_SOFT_RESET);
5131                 tmp |= srbm_soft_reset;
5132                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5133                 WREG32(mmSRBM_SOFT_RESET, tmp);
5134                 tmp = RREG32(mmSRBM_SOFT_RESET);
5135
5136                 udelay(50);
5137
5138                 tmp &= ~srbm_soft_reset;
5139                 WREG32(mmSRBM_SOFT_RESET, tmp);
5140                 tmp = RREG32(mmSRBM_SOFT_RESET);
5141         }
5142
5143         if (grbm_soft_reset || srbm_soft_reset) {
5144                 tmp = RREG32(mmGMCON_DEBUG);
5145                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5146                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5147                 WREG32(mmGMCON_DEBUG, tmp);
5148         }
5149
5150         /* Wait a little for things to settle down */
5151         udelay(50);
5152
5153         return 0;
5154 }
5155
5156 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5157                               struct amdgpu_ring *ring)
5158 {
5159         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5160         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5161         WREG32(mmCP_HQD_PQ_RPTR, 0);
5162         WREG32(mmCP_HQD_PQ_WPTR, 0);
5163         vi_srbm_select(adev, 0, 0, 0, 0);
5164 }
5165
5166 static int gfx_v8_0_post_soft_reset(void *handle)
5167 {
5168         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5169         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5170
5171         if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
5172                 return 0;
5173
5174         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5175         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5176
5177         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5178             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5179                 gfx_v8_0_cp_gfx_resume(adev);
5180
5181         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5182             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5183             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5184             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5185                 int i;
5186
5187                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5188                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5189
5190                         gfx_v8_0_init_hqd(adev, ring);
5191                 }
5192                 gfx_v8_0_cp_compute_resume(adev);
5193         }
5194         gfx_v8_0_rlc_start(adev);
5195
5196         return 0;
5197 }
5198
5199 /**
5200  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5201  *
5202  * @adev: amdgpu_device pointer
5203  *
5204  * Fetches a GPU clock counter snapshot.
5205  * Returns the 64 bit clock counter snapshot.
5206  */
5207 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5208 {
5209         uint64_t clock;
5210
5211         mutex_lock(&adev->gfx.gpu_clock_mutex);
5212         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5213         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5214                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5215         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5216         return clock;
5217 }
5218
5219 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5220                                           uint32_t vmid,
5221                                           uint32_t gds_base, uint32_t gds_size,
5222                                           uint32_t gws_base, uint32_t gws_size,
5223                                           uint32_t oa_base, uint32_t oa_size)
5224 {
5225         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5226         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5227
5228         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5229         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5230
5231         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5232         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5233
5234         /* GDS Base */
5235         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5236         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5237                                 WRITE_DATA_DST_SEL(0)));
5238         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5239         amdgpu_ring_write(ring, 0);
5240         amdgpu_ring_write(ring, gds_base);
5241
5242         /* GDS Size */
5243         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5244         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5245                                 WRITE_DATA_DST_SEL(0)));
5246         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5247         amdgpu_ring_write(ring, 0);
5248         amdgpu_ring_write(ring, gds_size);
5249
5250         /* GWS */
5251         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5252         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5253                                 WRITE_DATA_DST_SEL(0)));
5254         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5255         amdgpu_ring_write(ring, 0);
5256         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5257
5258         /* OA */
5259         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5260         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5261                                 WRITE_DATA_DST_SEL(0)));
5262         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5263         amdgpu_ring_write(ring, 0);
5264         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5265 }
5266
5267 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5268         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5269         .select_se_sh = &gfx_v8_0_select_se_sh,
5270 };
5271
5272 static int gfx_v8_0_early_init(void *handle)
5273 {
5274         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5275
5276         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5277         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5278         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5279         gfx_v8_0_set_ring_funcs(adev);
5280         gfx_v8_0_set_irq_funcs(adev);
5281         gfx_v8_0_set_gds_init(adev);
5282         gfx_v8_0_set_rlc_funcs(adev);
5283
5284         return 0;
5285 }
5286
5287 static int gfx_v8_0_late_init(void *handle)
5288 {
5289         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5290         int r;
5291
5292         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5293         if (r)
5294                 return r;
5295
5296         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5297         if (r)
5298                 return r;
5299
5300         /* requires IBs so do in late init after IB pool is initialized */
5301         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5302         if (r)
5303                 return r;
5304
5305         amdgpu_set_powergating_state(adev,
5306                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5307
5308         return 0;
5309 }
5310
5311 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5312                                                        bool enable)
5313 {
5314         if (adev->asic_type == CHIP_POLARIS11)
5315                 /* Send msg to SMU via Powerplay */
5316                 amdgpu_set_powergating_state(adev,
5317                                              AMD_IP_BLOCK_TYPE_SMC,
5318                                              enable ?
5319                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5320
5321         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5322 }
5323
5324 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5325                                                         bool enable)
5326 {
5327         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5328 }
5329
5330 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5331                 bool enable)
5332 {
5333         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5334 }
5335
5336 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5337                                           bool enable)
5338 {
5339         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5340 }
5341
5342 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5343                                                 bool enable)
5344 {
5345         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5346
5347         /* Read any GFX register to wake up GFX. */
5348         if (!enable)
5349                 RREG32(mmDB_RENDER_CONTROL);
5350 }
5351
5352 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5353                                           bool enable)
5354 {
5355         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5356                 cz_enable_gfx_cg_power_gating(adev, true);
5357                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5358                         cz_enable_gfx_pipeline_power_gating(adev, true);
5359         } else {
5360                 cz_enable_gfx_cg_power_gating(adev, false);
5361                 cz_enable_gfx_pipeline_power_gating(adev, false);
5362         }
5363 }
5364
5365 static int gfx_v8_0_set_powergating_state(void *handle,
5366                                           enum amd_powergating_state state)
5367 {
5368         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5369         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5370
5371         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5372                 return 0;
5373
5374         switch (adev->asic_type) {
5375         case CHIP_CARRIZO:
5376         case CHIP_STONEY:
5377                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5378                         cz_update_gfx_cg_power_gating(adev, enable);
5379
5380                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5381                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5382                 else
5383                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5384
5385                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5386                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5387                 else
5388                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5389                 break;
5390         case CHIP_POLARIS11:
5391                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5392                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5393                 else
5394                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5395
5396                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5397                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5398                 else
5399                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5400
5401                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5402                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5403                 else
5404                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5405                 break;
5406         default:
5407                 break;
5408         }
5409
5410         return 0;
5411 }
5412
5413 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5414                                      uint32_t reg_addr, uint32_t cmd)
5415 {
5416         uint32_t data;
5417
5418         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5419
5420         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5421         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5422
5423         data = RREG32(mmRLC_SERDES_WR_CTRL);
5424         if (adev->asic_type == CHIP_STONEY)
5425                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5426                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5427                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5428                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5429                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5430                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5431                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5432                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5433                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5434         else
5435                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5436                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5437                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5438                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5439                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5440                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5441                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5442                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5443                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5444                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5445                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5446         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5447                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5448                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5449                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5450
5451         WREG32(mmRLC_SERDES_WR_CTRL, data);
5452 }
5453
5454 #define MSG_ENTER_RLC_SAFE_MODE     1
5455 #define MSG_EXIT_RLC_SAFE_MODE      0
5456 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5457 #define RLC_GPR_REG2__REQ__SHIFT 0
5458 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5459 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5460
5461 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5462 {
5463         u32 data = 0;
5464         unsigned i;
5465
5466         data = RREG32(mmRLC_CNTL);
5467         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5468                 return;
5469
5470         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5471             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5472                                AMD_PG_SUPPORT_GFX_DMG))) {
5473                 data |= RLC_GPR_REG2__REQ_MASK;
5474                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5475                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5476                 WREG32(mmRLC_GPR_REG2, data);
5477
5478                 for (i = 0; i < adev->usec_timeout; i++) {
5479                         if ((RREG32(mmRLC_GPM_STAT) &
5480                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5481                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5482                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5483                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5484                                 break;
5485                         udelay(1);
5486                 }
5487
5488                 for (i = 0; i < adev->usec_timeout; i++) {
5489                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5490                                 break;
5491                         udelay(1);
5492                 }
5493                 adev->gfx.rlc.in_safe_mode = true;
5494         }
5495 }
5496
5497 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5498 {
5499         u32 data;
5500         unsigned i;
5501
5502         data = RREG32(mmRLC_CNTL);
5503         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5504                 return;
5505
5506         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5507             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5508                                AMD_PG_SUPPORT_GFX_DMG))) {
5509                 data |= RLC_GPR_REG2__REQ_MASK;
5510                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5511                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5512                 WREG32(mmRLC_GPR_REG2, data);
5513                 adev->gfx.rlc.in_safe_mode = false;
5514         }
5515
5516         for (i = 0; i < adev->usec_timeout; i++) {
5517                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5518                         break;
5519                 udelay(1);
5520         }
5521 }
5522
5523 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5524 {
5525         u32 data;
5526         unsigned i;
5527
5528         data = RREG32(mmRLC_CNTL);
5529         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5530                 return;
5531
5532         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5533                 data |= RLC_SAFE_MODE__CMD_MASK;
5534                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5535                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5536                 WREG32(mmRLC_SAFE_MODE, data);
5537
5538                 for (i = 0; i < adev->usec_timeout; i++) {
5539                         if ((RREG32(mmRLC_GPM_STAT) &
5540                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5541                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5542                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5543                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5544                                 break;
5545                         udelay(1);
5546                 }
5547
5548                 for (i = 0; i < adev->usec_timeout; i++) {
5549                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5550                                 break;
5551                         udelay(1);
5552                 }
5553                 adev->gfx.rlc.in_safe_mode = true;
5554         }
5555 }
5556
5557 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5558 {
5559         u32 data = 0;
5560         unsigned i;
5561
5562         data = RREG32(mmRLC_CNTL);
5563         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5564                 return;
5565
5566         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5567                 if (adev->gfx.rlc.in_safe_mode) {
5568                         data |= RLC_SAFE_MODE__CMD_MASK;
5569                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5570                         WREG32(mmRLC_SAFE_MODE, data);
5571                         adev->gfx.rlc.in_safe_mode = false;
5572                 }
5573         }
5574
5575         for (i = 0; i < adev->usec_timeout; i++) {
5576                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5577                         break;
5578                 udelay(1);
5579         }
5580 }
5581
5582 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5583 {
5584         adev->gfx.rlc.in_safe_mode = true;
5585 }
5586
5587 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5588 {
5589         adev->gfx.rlc.in_safe_mode = false;
5590 }
5591
5592 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5593         .enter_safe_mode = cz_enter_rlc_safe_mode,
5594         .exit_safe_mode = cz_exit_rlc_safe_mode
5595 };
5596
5597 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5598         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5599         .exit_safe_mode = iceland_exit_rlc_safe_mode
5600 };
5601
5602 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5603         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5604         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5605 };
5606
5607 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5608                                                       bool enable)
5609 {
5610         uint32_t temp, data;
5611
5612         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5613
5614         /* It is disabled by HW by default */
5615         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5616                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5617                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5618                                 /* 1 - RLC memory Light sleep */
5619                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5620
5621                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5622                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5623                 }
5624
5625                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5626                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5627                 if (adev->flags & AMD_IS_APU)
5628                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5629                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5630                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5631                 else
5632                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5633                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5634                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5635                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5636
5637                 if (temp != data)
5638                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5639
5640                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5641                 gfx_v8_0_wait_for_rlc_serdes(adev);
5642
5643                 /* 5 - clear mgcg override */
5644                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5645
5646                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5647                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5648                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5649                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5650                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5651                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5652                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5653                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5654                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5655                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5656                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5657                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5658                         if (temp != data)
5659                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5660                 }
5661                 udelay(50);
5662
5663                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5664                 gfx_v8_0_wait_for_rlc_serdes(adev);
5665         } else {
5666                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5667                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5668                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5669                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5670                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5671                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5672                 if (temp != data)
5673                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5674
5675                 /* 2 - disable MGLS in RLC */
5676                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5677                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5678                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5679                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5680                 }
5681
5682                 /* 3 - disable MGLS in CP */
5683                 data = RREG32(mmCP_MEM_SLP_CNTL);
5684                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5685                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5686                         WREG32(mmCP_MEM_SLP_CNTL, data);
5687                 }
5688
5689                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5690                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5691                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5692                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5693                 if (temp != data)
5694                         WREG32(mmCGTS_SM_CTRL_REG, data);
5695
5696                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5697                 gfx_v8_0_wait_for_rlc_serdes(adev);
5698
5699                 /* 6 - set mgcg override */
5700                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5701
5702                 udelay(50);
5703
5704                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5705                 gfx_v8_0_wait_for_rlc_serdes(adev);
5706         }
5707
5708         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5709 }
5710
5711 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5712                                                       bool enable)
5713 {
5714         uint32_t temp, temp1, data, data1;
5715
5716         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5717
5718         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5719
5720         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5721                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5722                  * Cmp_busy/GFX_Idle interrupts
5723                  */
5724                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5725
5726                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5727                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5728                 if (temp1 != data1)
5729                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5730
5731                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5732                 gfx_v8_0_wait_for_rlc_serdes(adev);
5733
5734                 /* 3 - clear cgcg override */
5735                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5736
5737                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5738                 gfx_v8_0_wait_for_rlc_serdes(adev);
5739
5740                 /* 4 - write cmd to set CGLS */
5741                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5742
5743                 /* 5 - enable cgcg */
5744                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5745
5746                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5747                         /* enable cgls*/
5748                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5749
5750                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5751                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5752
5753                         if (temp1 != data1)
5754                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5755                 } else {
5756                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5757                 }
5758
5759                 if (temp != data)
5760                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5761         } else {
5762                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5763                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5764
5765                 /* TEST CGCG */
5766                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5767                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5768                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5769                 if (temp1 != data1)
5770                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5771
5772                 /* read gfx register to wake up cgcg */
5773                 RREG32(mmCB_CGTT_SCLK_CTRL);
5774                 RREG32(mmCB_CGTT_SCLK_CTRL);
5775                 RREG32(mmCB_CGTT_SCLK_CTRL);
5776                 RREG32(mmCB_CGTT_SCLK_CTRL);
5777
5778                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5779                 gfx_v8_0_wait_for_rlc_serdes(adev);
5780
5781                 /* write cmd to Set CGCG Overrride */
5782                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5783
5784                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5785                 gfx_v8_0_wait_for_rlc_serdes(adev);
5786
5787                 /* write cmd to Clear CGLS */
5788                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5789
5790                 /* disable cgcg, cgls should be disabled too. */
5791                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5792                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5793                 if (temp != data)
5794                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5795         }
5796
5797         gfx_v8_0_wait_for_rlc_serdes(adev);
5798
5799         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5800 }
5801 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5802                                             bool enable)
5803 {
5804         if (enable) {
5805                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5806                  * ===  MGCG + MGLS + TS(CG/LS) ===
5807                  */
5808                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5809                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5810         } else {
5811                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5812                  * ===  CGCG + CGLS ===
5813                  */
5814                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5815                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5816         }
5817         return 0;
5818 }
5819
5820 static int gfx_v8_0_set_clockgating_state(void *handle,
5821                                           enum amd_clockgating_state state)
5822 {
5823         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5824
5825         switch (adev->asic_type) {
5826         case CHIP_FIJI:
5827         case CHIP_CARRIZO:
5828         case CHIP_STONEY:
5829                 gfx_v8_0_update_gfx_clock_gating(adev,
5830                                                  state == AMD_CG_STATE_GATE ? true : false);
5831                 break;
5832         default:
5833                 break;
5834         }
5835         return 0;
5836 }
5837
5838 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5839 {
5840         return ring->adev->wb.wb[ring->rptr_offs];
5841 }
5842
5843 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5844 {
5845         struct amdgpu_device *adev = ring->adev;
5846
5847         if (ring->use_doorbell)
5848                 /* XXX check if swapping is necessary on BE */
5849                 return ring->adev->wb.wb[ring->wptr_offs];
5850         else
5851                 return RREG32(mmCP_RB0_WPTR);
5852 }
5853
5854 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5855 {
5856         struct amdgpu_device *adev = ring->adev;
5857
5858         if (ring->use_doorbell) {
5859                 /* XXX check if swapping is necessary on BE */
5860                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5861                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5862         } else {
5863                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5864                 (void)RREG32(mmCP_RB0_WPTR);
5865         }
5866 }
5867
5868 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5869 {
5870         u32 ref_and_mask, reg_mem_engine;
5871
5872         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5873                 switch (ring->me) {
5874                 case 1:
5875                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5876                         break;
5877                 case 2:
5878                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5879                         break;
5880                 default:
5881                         return;
5882                 }
5883                 reg_mem_engine = 0;
5884         } else {
5885                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5886                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5887         }
5888
5889         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5890         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5891                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5892                                  reg_mem_engine));
5893         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5894         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5895         amdgpu_ring_write(ring, ref_and_mask);
5896         amdgpu_ring_write(ring, ref_and_mask);
5897         amdgpu_ring_write(ring, 0x20); /* poll interval */
5898 }
5899
5900 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5901 {
5902         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5903         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5904                                  WRITE_DATA_DST_SEL(0) |
5905                                  WR_CONFIRM));
5906         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5907         amdgpu_ring_write(ring, 0);
5908         amdgpu_ring_write(ring, 1);
5909
5910 }
5911
5912 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5913                                       struct amdgpu_ib *ib,
5914                                       unsigned vm_id, bool ctx_switch)
5915 {
5916         u32 header, control = 0;
5917
5918         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5919         if (ctx_switch) {
5920                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5921                 amdgpu_ring_write(ring, 0);
5922         }
5923
5924         if (ib->flags & AMDGPU_IB_FLAG_CE)
5925                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5926         else
5927                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5928
5929         control |= ib->length_dw | (vm_id << 24);
5930
5931         amdgpu_ring_write(ring, header);
5932         amdgpu_ring_write(ring,
5933 #ifdef __BIG_ENDIAN
5934                           (2 << 0) |
5935 #endif
5936                           (ib->gpu_addr & 0xFFFFFFFC));
5937         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5938         amdgpu_ring_write(ring, control);
5939 }
5940
5941 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5942                                           struct amdgpu_ib *ib,
5943                                           unsigned vm_id, bool ctx_switch)
5944 {
5945         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
5946
5947         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5948         amdgpu_ring_write(ring,
5949 #ifdef __BIG_ENDIAN
5950                                 (2 << 0) |
5951 #endif
5952                                 (ib->gpu_addr & 0xFFFFFFFC));
5953         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5954         amdgpu_ring_write(ring, control);
5955 }
5956
5957 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5958                                          u64 seq, unsigned flags)
5959 {
5960         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5961         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5962
5963         /* EVENT_WRITE_EOP - flush caches, send int */
5964         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5965         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5966                                  EOP_TC_ACTION_EN |
5967                                  EOP_TC_WB_ACTION_EN |
5968                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5969                                  EVENT_INDEX(5)));
5970         amdgpu_ring_write(ring, addr & 0xfffffffc);
5971         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5972                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5973         amdgpu_ring_write(ring, lower_32_bits(seq));
5974         amdgpu_ring_write(ring, upper_32_bits(seq));
5975
5976 }
5977
5978 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5979 {
5980         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5981         uint32_t seq = ring->fence_drv.sync_seq;
5982         uint64_t addr = ring->fence_drv.gpu_addr;
5983
5984         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5985         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5986                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
5987                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5988         amdgpu_ring_write(ring, addr & 0xfffffffc);
5989         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5990         amdgpu_ring_write(ring, seq);
5991         amdgpu_ring_write(ring, 0xffffffff);
5992         amdgpu_ring_write(ring, 4); /* poll interval */
5993
5994         if (usepfp) {
5995                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5996                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5997                 amdgpu_ring_write(ring, 0);
5998                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5999                 amdgpu_ring_write(ring, 0);
6000         }
6001 }
6002
6003 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6004                                         unsigned vm_id, uint64_t pd_addr)
6005 {
6006         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6007
6008         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6009         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6010                                  WRITE_DATA_DST_SEL(0)) |
6011                                  WR_CONFIRM);
6012         if (vm_id < 8) {
6013                 amdgpu_ring_write(ring,
6014                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6015         } else {
6016                 amdgpu_ring_write(ring,
6017                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6018         }
6019         amdgpu_ring_write(ring, 0);
6020         amdgpu_ring_write(ring, pd_addr >> 12);
6021
6022         /* bits 0-15 are the VM contexts0-15 */
6023         /* invalidate the cache */
6024         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6025         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6026                                  WRITE_DATA_DST_SEL(0)));
6027         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6028         amdgpu_ring_write(ring, 0);
6029         amdgpu_ring_write(ring, 1 << vm_id);
6030
6031         /* wait for the invalidate to complete */
6032         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6033         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6034                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6035                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6036         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6037         amdgpu_ring_write(ring, 0);
6038         amdgpu_ring_write(ring, 0); /* ref */
6039         amdgpu_ring_write(ring, 0); /* mask */
6040         amdgpu_ring_write(ring, 0x20); /* poll interval */
6041
6042         /* compute doesn't have PFP */
6043         if (usepfp) {
6044                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6045                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6046                 amdgpu_ring_write(ring, 0x0);
6047                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6048                 amdgpu_ring_write(ring, 0);
6049                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6050                 amdgpu_ring_write(ring, 0);
6051         }
6052 }
6053
6054 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6055 {
6056         return ring->adev->wb.wb[ring->rptr_offs];
6057 }
6058
6059 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6060 {
6061         return ring->adev->wb.wb[ring->wptr_offs];
6062 }
6063
6064 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6065 {
6066         struct amdgpu_device *adev = ring->adev;
6067
6068         /* XXX check if swapping is necessary on BE */
6069         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6070         WDOORBELL32(ring->doorbell_index, ring->wptr);
6071 }
6072
6073 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6074                                              u64 addr, u64 seq,
6075                                              unsigned flags)
6076 {
6077         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6078         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6079
6080         /* RELEASE_MEM - flush caches, send int */
6081         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6082         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6083                                  EOP_TC_ACTION_EN |
6084                                  EOP_TC_WB_ACTION_EN |
6085                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6086                                  EVENT_INDEX(5)));
6087         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6088         amdgpu_ring_write(ring, addr & 0xfffffffc);
6089         amdgpu_ring_write(ring, upper_32_bits(addr));
6090         amdgpu_ring_write(ring, lower_32_bits(seq));
6091         amdgpu_ring_write(ring, upper_32_bits(seq));
6092 }
6093
6094 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6095                                                  enum amdgpu_interrupt_state state)
6096 {
6097         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6098                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6099 }
6100
6101 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6102                                                      int me, int pipe,
6103                                                      enum amdgpu_interrupt_state state)
6104 {
6105         /*
6106          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6107          * handles the setting of interrupts for this specific pipe. All other
6108          * pipes' interrupts are set by amdkfd.
6109          */
6110
6111         if (me == 1) {
6112                 switch (pipe) {
6113                 case 0:
6114                         break;
6115                 default:
6116                         DRM_DEBUG("invalid pipe %d\n", pipe);
6117                         return;
6118                 }
6119         } else {
6120                 DRM_DEBUG("invalid me %d\n", me);
6121                 return;
6122         }
6123
6124         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6125                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6126 }
6127
6128 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6129                                              struct amdgpu_irq_src *source,
6130                                              unsigned type,
6131                                              enum amdgpu_interrupt_state state)
6132 {
6133         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6134                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6135
6136         return 0;
6137 }
6138
6139 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6140                                               struct amdgpu_irq_src *source,
6141                                               unsigned type,
6142                                               enum amdgpu_interrupt_state state)
6143 {
6144         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6145                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6146
6147         return 0;
6148 }
6149
6150 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6151                                             struct amdgpu_irq_src *src,
6152                                             unsigned type,
6153                                             enum amdgpu_interrupt_state state)
6154 {
6155         switch (type) {
6156         case AMDGPU_CP_IRQ_GFX_EOP:
6157                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6158                 break;
6159         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6160                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6161                 break;
6162         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6163                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6164                 break;
6165         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6166                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6167                 break;
6168         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6169                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6170                 break;
6171         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6172                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6173                 break;
6174         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6175                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6176                 break;
6177         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6178                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6179                 break;
6180         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6181                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6182                 break;
6183         default:
6184                 break;
6185         }
6186         return 0;
6187 }
6188
6189 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6190                             struct amdgpu_irq_src *source,
6191                             struct amdgpu_iv_entry *entry)
6192 {
6193         int i;
6194         u8 me_id, pipe_id, queue_id;
6195         struct amdgpu_ring *ring;
6196
6197         DRM_DEBUG("IH: CP EOP\n");
6198         me_id = (entry->ring_id & 0x0c) >> 2;
6199         pipe_id = (entry->ring_id & 0x03) >> 0;
6200         queue_id = (entry->ring_id & 0x70) >> 4;
6201
6202         switch (me_id) {
6203         case 0:
6204                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6205                 break;
6206         case 1:
6207         case 2:
6208                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6209                         ring = &adev->gfx.compute_ring[i];
6210                         /* Per-queue interrupt is supported for MEC starting from VI.
6211                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6212                           */
6213                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6214                                 amdgpu_fence_process(ring);
6215                 }
6216                 break;
6217         }
6218         return 0;
6219 }
6220
6221 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6222                                  struct amdgpu_irq_src *source,
6223                                  struct amdgpu_iv_entry *entry)
6224 {
6225         DRM_ERROR("Illegal register access in command stream\n");
6226         schedule_work(&adev->reset_work);
6227         return 0;
6228 }
6229
6230 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6231                                   struct amdgpu_irq_src *source,
6232                                   struct amdgpu_iv_entry *entry)
6233 {
6234         DRM_ERROR("Illegal instruction in command stream\n");
6235         schedule_work(&adev->reset_work);
6236         return 0;
6237 }
6238
6239 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6240         .name = "gfx_v8_0",
6241         .early_init = gfx_v8_0_early_init,
6242         .late_init = gfx_v8_0_late_init,
6243         .sw_init = gfx_v8_0_sw_init,
6244         .sw_fini = gfx_v8_0_sw_fini,
6245         .hw_init = gfx_v8_0_hw_init,
6246         .hw_fini = gfx_v8_0_hw_fini,
6247         .suspend = gfx_v8_0_suspend,
6248         .resume = gfx_v8_0_resume,
6249         .is_idle = gfx_v8_0_is_idle,
6250         .wait_for_idle = gfx_v8_0_wait_for_idle,
6251         .check_soft_reset = gfx_v8_0_check_soft_reset,
6252         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6253         .soft_reset = gfx_v8_0_soft_reset,
6254         .post_soft_reset = gfx_v8_0_post_soft_reset,
6255         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6256         .set_powergating_state = gfx_v8_0_set_powergating_state,
6257 };
6258
6259 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6260         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6261         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6262         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6263         .parse_cs = NULL,
6264         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6265         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6266         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6267         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6268         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6269         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6270         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6271         .test_ring = gfx_v8_0_ring_test_ring,
6272         .test_ib = gfx_v8_0_ring_test_ib,
6273         .insert_nop = amdgpu_ring_insert_nop,
6274         .pad_ib = amdgpu_ring_generic_pad_ib,
6275 };
6276
6277 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6278         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6279         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6280         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6281         .parse_cs = NULL,
6282         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6283         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6284         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6285         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6286         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6287         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6288         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6289         .test_ring = gfx_v8_0_ring_test_ring,
6290         .test_ib = gfx_v8_0_ring_test_ib,
6291         .insert_nop = amdgpu_ring_insert_nop,
6292         .pad_ib = amdgpu_ring_generic_pad_ib,
6293 };
6294
6295 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6296 {
6297         int i;
6298
6299         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6300                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6301
6302         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6303                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6304 }
6305
6306 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6307         .set = gfx_v8_0_set_eop_interrupt_state,
6308         .process = gfx_v8_0_eop_irq,
6309 };
6310
6311 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6312         .set = gfx_v8_0_set_priv_reg_fault_state,
6313         .process = gfx_v8_0_priv_reg_irq,
6314 };
6315
6316 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6317         .set = gfx_v8_0_set_priv_inst_fault_state,
6318         .process = gfx_v8_0_priv_inst_irq,
6319 };
6320
6321 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6322 {
6323         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6324         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6325
6326         adev->gfx.priv_reg_irq.num_types = 1;
6327         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6328
6329         adev->gfx.priv_inst_irq.num_types = 1;
6330         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6331 }
6332
6333 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6334 {
6335         switch (adev->asic_type) {
6336         case CHIP_TOPAZ:
6337                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6338                 break;
6339         case CHIP_STONEY:
6340         case CHIP_CARRIZO:
6341                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6342                 break;
6343         default:
6344                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6345                 break;
6346         }
6347 }
6348
6349 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6350 {
6351         /* init asci gds info */
6352         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6353         adev->gds.gws.total_size = 64;
6354         adev->gds.oa.total_size = 16;
6355
6356         if (adev->gds.mem.total_size == 64 * 1024) {
6357                 adev->gds.mem.gfx_partition_size = 4096;
6358                 adev->gds.mem.cs_partition_size = 4096;
6359
6360                 adev->gds.gws.gfx_partition_size = 4;
6361                 adev->gds.gws.cs_partition_size = 4;
6362
6363                 adev->gds.oa.gfx_partition_size = 4;
6364                 adev->gds.oa.cs_partition_size = 1;
6365         } else {
6366                 adev->gds.mem.gfx_partition_size = 1024;
6367                 adev->gds.mem.cs_partition_size = 1024;
6368
6369                 adev->gds.gws.gfx_partition_size = 16;
6370                 adev->gds.gws.cs_partition_size = 16;
6371
6372                 adev->gds.oa.gfx_partition_size = 4;
6373                 adev->gds.oa.cs_partition_size = 4;
6374         }
6375 }
6376
6377 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6378                                                  u32 bitmap)
6379 {
6380         u32 data;
6381
6382         if (!bitmap)
6383                 return;
6384
6385         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6386         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6387
6388         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6389 }
6390
6391 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6392 {
6393         u32 data, mask;
6394
6395         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6396                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6397
6398         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6399
6400         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6401 }
6402
6403 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6404 {
6405         int i, j, k, counter, active_cu_number = 0;
6406         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6407         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6408         unsigned disable_masks[4 * 2];
6409
6410         memset(cu_info, 0, sizeof(*cu_info));
6411
6412         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6413
6414         mutex_lock(&adev->grbm_idx_mutex);
6415         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6416                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6417                         mask = 1;
6418                         ao_bitmap = 0;
6419                         counter = 0;
6420                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6421                         if (i < 4 && j < 2)
6422                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6423                                         adev, disable_masks[i * 2 + j]);
6424                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6425                         cu_info->bitmap[i][j] = bitmap;
6426
6427                         for (k = 0; k < 16; k ++) {
6428                                 if (bitmap & mask) {
6429                                         if (counter < 2)
6430                                                 ao_bitmap |= mask;
6431                                         counter ++;
6432                                 }
6433                                 mask <<= 1;
6434                         }
6435                         active_cu_number += counter;
6436                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6437                 }
6438         }
6439         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6440         mutex_unlock(&adev->grbm_idx_mutex);
6441
6442         cu_info->number = active_cu_number;
6443         cu_info->ao_cu_mask = ao_cu_mask;
6444 }