wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45
46 #include "dce/dce_10_0_d.h"
47 #include "dce/dce_10_0_sh_mask.h"
48
49 #define GFX8_NUM_GFX_RINGS     1
50 #define GFX8_NUM_COMPUTE_RINGS 8
51
52 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
53 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
55
56 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
57 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
58 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
59 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
60 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
61 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
62 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
63 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
64 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
65
66 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
67 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
68 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
69 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
70 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
71 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
72
73 /* BPM SERDES CMD */
74 #define SET_BPM_SERDES_CMD    1
75 #define CLE_BPM_SERDES_CMD    0
76
77 /* BPM Register Address*/
78 enum {
79         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
80         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
81         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
82         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
83         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
84         BPM_REG_FGCG_MAX
85 };
86
87 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
88 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
90 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
95 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
97 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
99
100 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
101 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
103 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
108 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
110 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
114 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
116 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
119
120 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
121 {
122         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
123         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
124         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
125         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
126         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
127         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
128         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
129         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
130         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
131         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
132         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
133         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
134         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
135         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
136         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
137         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
138 };
139
140 static const u32 golden_settings_tonga_a11[] =
141 {
142         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
143         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
144         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
145         mmGB_GPU_ID, 0x0000000f, 0x00000000,
146         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
147         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
148         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
149         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
150         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
151         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
152         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
153         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
154         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
155         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
156         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
157 };
158
159 static const u32 tonga_golden_common_all[] =
160 {
161         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
162         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
163         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
164         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
165         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
166         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
167         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
168         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
169 };
170
171 static const u32 tonga_mgcg_cgcg_init[] =
172 {
173         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
174         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
175         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
176         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
177         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
178         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
179         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
180         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
181         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
182         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
183         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
184         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
185         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
186         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
187         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
188         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
189         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
190         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
191         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
192         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
193         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
194         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
195         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
198         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
199         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
200         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
201         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
202         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
203         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
205         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
206         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
207         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
208         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
209         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
210         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
211         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
212         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
213         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
214         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
215         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
216         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
217         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
218         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
219         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
220         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
221         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
222         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
223         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
224         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
225         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
226         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
227         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
228         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
229         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
230         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
231         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
232         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
233         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
234         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
245         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
246         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
247         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
248 };
249
250 static const u32 fiji_golden_common_all[] =
251 {
252         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
253         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
254         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
255         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
256         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
257         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
258         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
259         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
262 };
263
264 static const u32 golden_settings_fiji_a10[] =
265 {
266         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
267         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
268         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
269         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
270         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
271         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
272         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
273         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
274         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
275         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
276         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
277 };
278
279 static const u32 fiji_mgcg_cgcg_init[] =
280 {
281         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
282         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
283         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
284         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
285         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
286         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
287         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
288         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
289         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
290         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
291         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
292         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
293         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
294         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
295         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
296         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
297         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
298         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
299         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
300         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
301         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
302         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
303         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
304         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
305         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
306         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
307         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
308         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
309         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
310         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
311         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
312         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
313         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
314         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
315         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
316 };
317
318 static const u32 golden_settings_iceland_a11[] =
319 {
320         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
321         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
322         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
323         mmGB_GPU_ID, 0x0000000f, 0x00000000,
324         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
325         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
326         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
327         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
328         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
329         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
330         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
331         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
332         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
333         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
334         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
335 };
336
337 static const u32 iceland_golden_common_all[] =
338 {
339         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
341         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
342         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
343         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
347 };
348
349 static const u32 iceland_mgcg_cgcg_init[] =
350 {
351         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
352         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
353         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
354         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
355         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
356         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
357         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
358         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
359         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
360         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
361         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
362         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
366         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
369         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
370         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
371         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
372         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
373         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
374         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
376         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
377         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
378         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
379         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
380         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
381         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
382         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
383         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
384         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
385         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
386         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
387         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
388         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
389         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
390         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
391         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
392         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
393         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
394         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
395         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
396         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
397         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
398         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
399         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
400         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
401         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
402         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
403         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
404         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
405         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
406         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
407         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
408         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
409         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
410         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
411         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
412         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
413         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
414         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
415 };
416
417 static const u32 cz_golden_settings_a11[] =
418 {
419         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
420         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421         mmGB_GPU_ID, 0x0000000f, 0x00000000,
422         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
423         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
424         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
428         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
429 };
430
431 static const u32 cz_golden_common_all[] =
432 {
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
440         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
441 };
442
443 static const u32 cz_mgcg_cgcg_init[] =
444 {
445         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
468         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
499         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
507         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
508         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
509         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
510         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
511         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
512         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
513         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
514         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
515         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
516         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
517         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
518         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
519         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
520 };
521
522 static const u32 stoney_golden_settings_a11[] =
523 {
524         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
525         mmGB_GPU_ID, 0x0000000f, 0x00000000,
526         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
527         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
528         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
529         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
530         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
531         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
532         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
533         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
534 };
535
536 static const u32 stoney_golden_common_all[] =
537 {
538         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
539         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
540         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
541         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
542         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
543         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
544         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
545         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
546 };
547
548 static const u32 stoney_mgcg_cgcg_init[] =
549 {
550         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
551         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
552         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
553         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
554         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
555         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
556 };
557
558 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
559 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
560 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
561
562 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
563 {
564         switch (adev->asic_type) {
565         case CHIP_TOPAZ:
566                 amdgpu_program_register_sequence(adev,
567                                                  iceland_mgcg_cgcg_init,
568                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
569                 amdgpu_program_register_sequence(adev,
570                                                  golden_settings_iceland_a11,
571                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
572                 amdgpu_program_register_sequence(adev,
573                                                  iceland_golden_common_all,
574                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
575                 break;
576         case CHIP_FIJI:
577                 amdgpu_program_register_sequence(adev,
578                                                  fiji_mgcg_cgcg_init,
579                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
580                 amdgpu_program_register_sequence(adev,
581                                                  golden_settings_fiji_a10,
582                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
583                 amdgpu_program_register_sequence(adev,
584                                                  fiji_golden_common_all,
585                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
586                 break;
587
588         case CHIP_TONGA:
589                 amdgpu_program_register_sequence(adev,
590                                                  tonga_mgcg_cgcg_init,
591                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
592                 amdgpu_program_register_sequence(adev,
593                                                  golden_settings_tonga_a11,
594                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
595                 amdgpu_program_register_sequence(adev,
596                                                  tonga_golden_common_all,
597                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
598                 break;
599         case CHIP_CARRIZO:
600                 amdgpu_program_register_sequence(adev,
601                                                  cz_mgcg_cgcg_init,
602                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
603                 amdgpu_program_register_sequence(adev,
604                                                  cz_golden_settings_a11,
605                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
606                 amdgpu_program_register_sequence(adev,
607                                                  cz_golden_common_all,
608                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
609                 break;
610         case CHIP_STONEY:
611                 amdgpu_program_register_sequence(adev,
612                                                  stoney_mgcg_cgcg_init,
613                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
614                 amdgpu_program_register_sequence(adev,
615                                                  stoney_golden_settings_a11,
616                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
617                 amdgpu_program_register_sequence(adev,
618                                                  stoney_golden_common_all,
619                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
620                 break;
621         default:
622                 break;
623         }
624 }
625
626 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
627 {
628         int i;
629
630         adev->gfx.scratch.num_reg = 7;
631         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
632         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
633                 adev->gfx.scratch.free[i] = true;
634                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
635         }
636 }
637
638 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
639 {
640         struct amdgpu_device *adev = ring->adev;
641         uint32_t scratch;
642         uint32_t tmp = 0;
643         unsigned i;
644         int r;
645
646         r = amdgpu_gfx_scratch_get(adev, &scratch);
647         if (r) {
648                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
649                 return r;
650         }
651         WREG32(scratch, 0xCAFEDEAD);
652         r = amdgpu_ring_alloc(ring, 3);
653         if (r) {
654                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
655                           ring->idx, r);
656                 amdgpu_gfx_scratch_free(adev, scratch);
657                 return r;
658         }
659         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
660         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
661         amdgpu_ring_write(ring, 0xDEADBEEF);
662         amdgpu_ring_commit(ring);
663
664         for (i = 0; i < adev->usec_timeout; i++) {
665                 tmp = RREG32(scratch);
666                 if (tmp == 0xDEADBEEF)
667                         break;
668                 DRM_UDELAY(1);
669         }
670         if (i < adev->usec_timeout) {
671                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
672                          ring->idx, i);
673         } else {
674                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
675                           ring->idx, scratch, tmp);
676                 r = -EINVAL;
677         }
678         amdgpu_gfx_scratch_free(adev, scratch);
679         return r;
680 }
681
682 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
683 {
684         struct amdgpu_device *adev = ring->adev;
685         struct amdgpu_ib ib;
686         struct fence *f = NULL;
687         uint32_t scratch;
688         uint32_t tmp = 0;
689         unsigned i;
690         int r;
691
692         r = amdgpu_gfx_scratch_get(adev, &scratch);
693         if (r) {
694                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
695                 return r;
696         }
697         WREG32(scratch, 0xCAFEDEAD);
698         memset(&ib, 0, sizeof(ib));
699         r = amdgpu_ib_get(adev, NULL, 256, &ib);
700         if (r) {
701                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
702                 goto err1;
703         }
704         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
705         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
706         ib.ptr[2] = 0xDEADBEEF;
707         ib.length_dw = 3;
708
709         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
710         if (r)
711                 goto err2;
712
713         r = fence_wait(f, false);
714         if (r) {
715                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
716                 goto err2;
717         }
718         for (i = 0; i < adev->usec_timeout; i++) {
719                 tmp = RREG32(scratch);
720                 if (tmp == 0xDEADBEEF)
721                         break;
722                 DRM_UDELAY(1);
723         }
724         if (i < adev->usec_timeout) {
725                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
726                          ring->idx, i);
727                 goto err2;
728         } else {
729                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
730                           scratch, tmp);
731                 r = -EINVAL;
732         }
733 err2:
734         fence_put(f);
735         amdgpu_ib_free(adev, &ib, NULL);
736         fence_put(f);
737 err1:
738         amdgpu_gfx_scratch_free(adev, scratch);
739         return r;
740 }
741
742 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
743 {
744         const char *chip_name;
745         char fw_name[30];
746         int err;
747         struct amdgpu_firmware_info *info = NULL;
748         const struct common_firmware_header *header = NULL;
749         const struct gfx_firmware_header_v1_0 *cp_hdr;
750
751         DRM_DEBUG("\n");
752
753         switch (adev->asic_type) {
754         case CHIP_TOPAZ:
755                 chip_name = "topaz";
756                 break;
757         case CHIP_TONGA:
758                 chip_name = "tonga";
759                 break;
760         case CHIP_CARRIZO:
761                 chip_name = "carrizo";
762                 break;
763         case CHIP_FIJI:
764                 chip_name = "fiji";
765                 break;
766         case CHIP_STONEY:
767                 chip_name = "stoney";
768                 break;
769         default:
770                 BUG();
771         }
772
773         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
774         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
775         if (err)
776                 goto out;
777         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
778         if (err)
779                 goto out;
780         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
781         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
782         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
783
784         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
785         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
786         if (err)
787                 goto out;
788         err = amdgpu_ucode_validate(adev->gfx.me_fw);
789         if (err)
790                 goto out;
791         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
792         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
793         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
794
795         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
796         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
797         if (err)
798                 goto out;
799         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
800         if (err)
801                 goto out;
802         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
803         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
804         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
805
806         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
807         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
808         if (err)
809                 goto out;
810         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
811         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
812         adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
813         adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
814
815         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
816         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
817         if (err)
818                 goto out;
819         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
820         if (err)
821                 goto out;
822         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
823         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
824         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
825
826         if ((adev->asic_type != CHIP_STONEY) &&
827             (adev->asic_type != CHIP_TOPAZ)) {
828                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
829                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
830                 if (!err) {
831                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
832                         if (err)
833                                 goto out;
834                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
835                                 adev->gfx.mec2_fw->data;
836                         adev->gfx.mec2_fw_version =
837                                 le32_to_cpu(cp_hdr->header.ucode_version);
838                         adev->gfx.mec2_feature_version =
839                                 le32_to_cpu(cp_hdr->ucode_feature_version);
840                 } else {
841                         err = 0;
842                         adev->gfx.mec2_fw = NULL;
843                 }
844         }
845
846         if (adev->firmware.smu_load) {
847                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
848                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
849                 info->fw = adev->gfx.pfp_fw;
850                 header = (const struct common_firmware_header *)info->fw->data;
851                 adev->firmware.fw_size +=
852                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
853
854                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
855                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
856                 info->fw = adev->gfx.me_fw;
857                 header = (const struct common_firmware_header *)info->fw->data;
858                 adev->firmware.fw_size +=
859                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
860
861                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
862                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
863                 info->fw = adev->gfx.ce_fw;
864                 header = (const struct common_firmware_header *)info->fw->data;
865                 adev->firmware.fw_size +=
866                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
867
868                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
869                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
870                 info->fw = adev->gfx.rlc_fw;
871                 header = (const struct common_firmware_header *)info->fw->data;
872                 adev->firmware.fw_size +=
873                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
874
875                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
876                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
877                 info->fw = adev->gfx.mec_fw;
878                 header = (const struct common_firmware_header *)info->fw->data;
879                 adev->firmware.fw_size +=
880                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
881
882                 if (adev->gfx.mec2_fw) {
883                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
884                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
885                         info->fw = adev->gfx.mec2_fw;
886                         header = (const struct common_firmware_header *)info->fw->data;
887                         adev->firmware.fw_size +=
888                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
889                 }
890
891         }
892
893 out:
894         if (err) {
895                 dev_err(adev->dev,
896                         "gfx8: Failed to load firmware \"%s\"\n",
897                         fw_name);
898                 release_firmware(adev->gfx.pfp_fw);
899                 adev->gfx.pfp_fw = NULL;
900                 release_firmware(adev->gfx.me_fw);
901                 adev->gfx.me_fw = NULL;
902                 release_firmware(adev->gfx.ce_fw);
903                 adev->gfx.ce_fw = NULL;
904                 release_firmware(adev->gfx.rlc_fw);
905                 adev->gfx.rlc_fw = NULL;
906                 release_firmware(adev->gfx.mec_fw);
907                 adev->gfx.mec_fw = NULL;
908                 release_firmware(adev->gfx.mec2_fw);
909                 adev->gfx.mec2_fw = NULL;
910         }
911         return err;
912 }
913
914 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
915 {
916         int r;
917
918         if (adev->gfx.mec.hpd_eop_obj) {
919                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
920                 if (unlikely(r != 0))
921                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
922                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
923                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
924
925                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
926                 adev->gfx.mec.hpd_eop_obj = NULL;
927         }
928 }
929
930 #define MEC_HPD_SIZE 2048
931
932 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
933 {
934         int r;
935         u32 *hpd;
936
937         /*
938          * we assign only 1 pipe because all other pipes will
939          * be handled by KFD
940          */
941         adev->gfx.mec.num_mec = 1;
942         adev->gfx.mec.num_pipe = 1;
943         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
944
945         if (adev->gfx.mec.hpd_eop_obj == NULL) {
946                 r = amdgpu_bo_create(adev,
947                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
948                                      PAGE_SIZE, true,
949                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
950                                      &adev->gfx.mec.hpd_eop_obj);
951                 if (r) {
952                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
953                         return r;
954                 }
955         }
956
957         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
958         if (unlikely(r != 0)) {
959                 gfx_v8_0_mec_fini(adev);
960                 return r;
961         }
962         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
963                           &adev->gfx.mec.hpd_eop_gpu_addr);
964         if (r) {
965                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
966                 gfx_v8_0_mec_fini(adev);
967                 return r;
968         }
969         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
970         if (r) {
971                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
972                 gfx_v8_0_mec_fini(adev);
973                 return r;
974         }
975
976         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
977
978         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
979         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
980
981         return 0;
982 }
983
984 static const u32 vgpr_init_compute_shader[] =
985 {
986         0x7e000209, 0x7e020208,
987         0x7e040207, 0x7e060206,
988         0x7e080205, 0x7e0a0204,
989         0x7e0c0203, 0x7e0e0202,
990         0x7e100201, 0x7e120200,
991         0x7e140209, 0x7e160208,
992         0x7e180207, 0x7e1a0206,
993         0x7e1c0205, 0x7e1e0204,
994         0x7e200203, 0x7e220202,
995         0x7e240201, 0x7e260200,
996         0x7e280209, 0x7e2a0208,
997         0x7e2c0207, 0x7e2e0206,
998         0x7e300205, 0x7e320204,
999         0x7e340203, 0x7e360202,
1000         0x7e380201, 0x7e3a0200,
1001         0x7e3c0209, 0x7e3e0208,
1002         0x7e400207, 0x7e420206,
1003         0x7e440205, 0x7e460204,
1004         0x7e480203, 0x7e4a0202,
1005         0x7e4c0201, 0x7e4e0200,
1006         0x7e500209, 0x7e520208,
1007         0x7e540207, 0x7e560206,
1008         0x7e580205, 0x7e5a0204,
1009         0x7e5c0203, 0x7e5e0202,
1010         0x7e600201, 0x7e620200,
1011         0x7e640209, 0x7e660208,
1012         0x7e680207, 0x7e6a0206,
1013         0x7e6c0205, 0x7e6e0204,
1014         0x7e700203, 0x7e720202,
1015         0x7e740201, 0x7e760200,
1016         0x7e780209, 0x7e7a0208,
1017         0x7e7c0207, 0x7e7e0206,
1018         0xbf8a0000, 0xbf810000,
1019 };
1020
1021 static const u32 sgpr_init_compute_shader[] =
1022 {
1023         0xbe8a0100, 0xbe8c0102,
1024         0xbe8e0104, 0xbe900106,
1025         0xbe920108, 0xbe940100,
1026         0xbe960102, 0xbe980104,
1027         0xbe9a0106, 0xbe9c0108,
1028         0xbe9e0100, 0xbea00102,
1029         0xbea20104, 0xbea40106,
1030         0xbea60108, 0xbea80100,
1031         0xbeaa0102, 0xbeac0104,
1032         0xbeae0106, 0xbeb00108,
1033         0xbeb20100, 0xbeb40102,
1034         0xbeb60104, 0xbeb80106,
1035         0xbeba0108, 0xbebc0100,
1036         0xbebe0102, 0xbec00104,
1037         0xbec20106, 0xbec40108,
1038         0xbec60100, 0xbec80102,
1039         0xbee60004, 0xbee70005,
1040         0xbeea0006, 0xbeeb0007,
1041         0xbee80008, 0xbee90009,
1042         0xbefc0000, 0xbf8a0000,
1043         0xbf810000, 0x00000000,
1044 };
1045
1046 static const u32 vgpr_init_regs[] =
1047 {
1048         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1049         mmCOMPUTE_RESOURCE_LIMITS, 0,
1050         mmCOMPUTE_NUM_THREAD_X, 256*4,
1051         mmCOMPUTE_NUM_THREAD_Y, 1,
1052         mmCOMPUTE_NUM_THREAD_Z, 1,
1053         mmCOMPUTE_PGM_RSRC2, 20,
1054         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1055         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1056         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1057         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1058         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1059         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1060         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1061         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1062         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1063         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1064 };
1065
1066 static const u32 sgpr1_init_regs[] =
1067 {
1068         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1069         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1070         mmCOMPUTE_NUM_THREAD_X, 256*5,
1071         mmCOMPUTE_NUM_THREAD_Y, 1,
1072         mmCOMPUTE_NUM_THREAD_Z, 1,
1073         mmCOMPUTE_PGM_RSRC2, 20,
1074         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1075         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1076         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1077         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1078         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1079         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1080         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1081         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1082         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1083         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1084 };
1085
1086 static const u32 sgpr2_init_regs[] =
1087 {
1088         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1089         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1090         mmCOMPUTE_NUM_THREAD_X, 256*5,
1091         mmCOMPUTE_NUM_THREAD_Y, 1,
1092         mmCOMPUTE_NUM_THREAD_Z, 1,
1093         mmCOMPUTE_PGM_RSRC2, 20,
1094         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1095         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1096         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1097         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1098         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1099         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1100         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1101         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1102         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1103         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1104 };
1105
1106 static const u32 sec_ded_counter_registers[] =
1107 {
1108         mmCPC_EDC_ATC_CNT,
1109         mmCPC_EDC_SCRATCH_CNT,
1110         mmCPC_EDC_UCODE_CNT,
1111         mmCPF_EDC_ATC_CNT,
1112         mmCPF_EDC_ROQ_CNT,
1113         mmCPF_EDC_TAG_CNT,
1114         mmCPG_EDC_ATC_CNT,
1115         mmCPG_EDC_DMA_CNT,
1116         mmCPG_EDC_TAG_CNT,
1117         mmDC_EDC_CSINVOC_CNT,
1118         mmDC_EDC_RESTORE_CNT,
1119         mmDC_EDC_STATE_CNT,
1120         mmGDS_EDC_CNT,
1121         mmGDS_EDC_GRBM_CNT,
1122         mmGDS_EDC_OA_DED,
1123         mmSPI_EDC_CNT,
1124         mmSQC_ATC_EDC_GATCL1_CNT,
1125         mmSQC_EDC_CNT,
1126         mmSQ_EDC_DED_CNT,
1127         mmSQ_EDC_INFO,
1128         mmSQ_EDC_SEC_CNT,
1129         mmTCC_EDC_CNT,
1130         mmTCP_ATC_EDC_GATCL1_CNT,
1131         mmTCP_EDC_CNT,
1132         mmTD_EDC_CNT
1133 };
1134
1135 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1136 {
1137         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1138         struct amdgpu_ib ib;
1139         struct fence *f = NULL;
1140         int r, i;
1141         u32 tmp;
1142         unsigned total_size, vgpr_offset, sgpr_offset;
1143         u64 gpu_addr;
1144
1145         /* only supported on CZ */
1146         if (adev->asic_type != CHIP_CARRIZO)
1147                 return 0;
1148
1149         /* bail if the compute ring is not ready */
1150         if (!ring->ready)
1151                 return 0;
1152
1153         tmp = RREG32(mmGB_EDC_MODE);
1154         WREG32(mmGB_EDC_MODE, 0);
1155
1156         total_size =
1157                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1158         total_size +=
1159                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1160         total_size +=
1161                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162         total_size = ALIGN(total_size, 256);
1163         vgpr_offset = total_size;
1164         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1165         sgpr_offset = total_size;
1166         total_size += sizeof(sgpr_init_compute_shader);
1167
1168         /* allocate an indirect buffer to put the commands in */
1169         memset(&ib, 0, sizeof(ib));
1170         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1171         if (r) {
1172                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1173                 return r;
1174         }
1175
1176         /* load the compute shaders */
1177         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1178                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1179
1180         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1181                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1182
1183         /* init the ib length to 0 */
1184         ib.length_dw = 0;
1185
1186         /* VGPR */
1187         /* write the register state for the compute dispatch */
1188         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1189                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1190                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1191                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1192         }
1193         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1194         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1195         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1196         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1197         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1198         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1199
1200         /* write dispatch packet */
1201         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1202         ib.ptr[ib.length_dw++] = 8; /* x */
1203         ib.ptr[ib.length_dw++] = 1; /* y */
1204         ib.ptr[ib.length_dw++] = 1; /* z */
1205         ib.ptr[ib.length_dw++] =
1206                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1207
1208         /* write CS partial flush packet */
1209         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1210         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1211
1212         /* SGPR1 */
1213         /* write the register state for the compute dispatch */
1214         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1215                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1216                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1217                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1218         }
1219         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1220         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1221         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1222         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1223         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1224         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1225
1226         /* write dispatch packet */
1227         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1228         ib.ptr[ib.length_dw++] = 8; /* x */
1229         ib.ptr[ib.length_dw++] = 1; /* y */
1230         ib.ptr[ib.length_dw++] = 1; /* z */
1231         ib.ptr[ib.length_dw++] =
1232                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1233
1234         /* write CS partial flush packet */
1235         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1236         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1237
1238         /* SGPR2 */
1239         /* write the register state for the compute dispatch */
1240         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1241                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1242                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1243                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1244         }
1245         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1246         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1247         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1248         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1249         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1250         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1251
1252         /* write dispatch packet */
1253         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1254         ib.ptr[ib.length_dw++] = 8; /* x */
1255         ib.ptr[ib.length_dw++] = 1; /* y */
1256         ib.ptr[ib.length_dw++] = 1; /* z */
1257         ib.ptr[ib.length_dw++] =
1258                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1259
1260         /* write CS partial flush packet */
1261         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1262         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1263
1264         /* shedule the ib on the ring */
1265         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1266         if (r) {
1267                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1268                 goto fail;
1269         }
1270
1271         /* wait for the GPU to finish processing the IB */
1272         r = fence_wait(f, false);
1273         if (r) {
1274                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1275                 goto fail;
1276         }
1277
1278         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1279         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1280         WREG32(mmGB_EDC_MODE, tmp);
1281
1282         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1283         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1284         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1285
1286
1287         /* read back registers to clear the counters */
1288         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1289                 RREG32(sec_ded_counter_registers[i]);
1290
1291 fail:
1292         fence_put(f);
1293         amdgpu_ib_free(adev, &ib, NULL);
1294         fence_put(f);
1295
1296         return r;
1297 }
1298
1299 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1300 {
1301         u32 gb_addr_config;
1302         u32 mc_shared_chmap, mc_arb_ramcfg;
1303         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1304         u32 tmp;
1305
1306         switch (adev->asic_type) {
1307         case CHIP_TOPAZ:
1308                 adev->gfx.config.max_shader_engines = 1;
1309                 adev->gfx.config.max_tile_pipes = 2;
1310                 adev->gfx.config.max_cu_per_sh = 6;
1311                 adev->gfx.config.max_sh_per_se = 1;
1312                 adev->gfx.config.max_backends_per_se = 2;
1313                 adev->gfx.config.max_texture_channel_caches = 2;
1314                 adev->gfx.config.max_gprs = 256;
1315                 adev->gfx.config.max_gs_threads = 32;
1316                 adev->gfx.config.max_hw_contexts = 8;
1317
1318                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1319                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1320                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1321                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1322                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1323                 break;
1324         case CHIP_FIJI:
1325                 adev->gfx.config.max_shader_engines = 4;
1326                 adev->gfx.config.max_tile_pipes = 16;
1327                 adev->gfx.config.max_cu_per_sh = 16;
1328                 adev->gfx.config.max_sh_per_se = 1;
1329                 adev->gfx.config.max_backends_per_se = 4;
1330                 adev->gfx.config.max_texture_channel_caches = 16;
1331                 adev->gfx.config.max_gprs = 256;
1332                 adev->gfx.config.max_gs_threads = 32;
1333                 adev->gfx.config.max_hw_contexts = 8;
1334
1335                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1336                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1337                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1338                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1339                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1340                 break;
1341         case CHIP_TONGA:
1342                 adev->gfx.config.max_shader_engines = 4;
1343                 adev->gfx.config.max_tile_pipes = 8;
1344                 adev->gfx.config.max_cu_per_sh = 8;
1345                 adev->gfx.config.max_sh_per_se = 1;
1346                 adev->gfx.config.max_backends_per_se = 2;
1347                 adev->gfx.config.max_texture_channel_caches = 8;
1348                 adev->gfx.config.max_gprs = 256;
1349                 adev->gfx.config.max_gs_threads = 32;
1350                 adev->gfx.config.max_hw_contexts = 8;
1351
1352                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1353                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1354                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1355                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1356                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1357                 break;
1358         case CHIP_CARRIZO:
1359                 adev->gfx.config.max_shader_engines = 1;
1360                 adev->gfx.config.max_tile_pipes = 2;
1361                 adev->gfx.config.max_sh_per_se = 1;
1362                 adev->gfx.config.max_backends_per_se = 2;
1363
1364                 switch (adev->pdev->revision) {
1365                 case 0xc4:
1366                 case 0x84:
1367                 case 0xc8:
1368                 case 0xcc:
1369                 case 0xe1:
1370                 case 0xe3:
1371                         /* B10 */
1372                         adev->gfx.config.max_cu_per_sh = 8;
1373                         break;
1374                 case 0xc5:
1375                 case 0x81:
1376                 case 0x85:
1377                 case 0xc9:
1378                 case 0xcd:
1379                 case 0xe2:
1380                 case 0xe4:
1381                         /* B8 */
1382                         adev->gfx.config.max_cu_per_sh = 6;
1383                         break;
1384                 case 0xc6:
1385                 case 0xca:
1386                 case 0xce:
1387                 case 0x88:
1388                         /* B6 */
1389                         adev->gfx.config.max_cu_per_sh = 6;
1390                         break;
1391                 case 0xc7:
1392                 case 0x87:
1393                 case 0xcb:
1394                 case 0xe5:
1395                 case 0x89:
1396                 default:
1397                         /* B4 */
1398                         adev->gfx.config.max_cu_per_sh = 4;
1399                         break;
1400                 }
1401
1402                 adev->gfx.config.max_texture_channel_caches = 2;
1403                 adev->gfx.config.max_gprs = 256;
1404                 adev->gfx.config.max_gs_threads = 32;
1405                 adev->gfx.config.max_hw_contexts = 8;
1406
1407                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1408                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1409                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1410                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1411                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1412                 break;
1413         case CHIP_STONEY:
1414                 adev->gfx.config.max_shader_engines = 1;
1415                 adev->gfx.config.max_tile_pipes = 2;
1416                 adev->gfx.config.max_sh_per_se = 1;
1417                 adev->gfx.config.max_backends_per_se = 1;
1418
1419                 switch (adev->pdev->revision) {
1420                 case 0xc0:
1421                 case 0xc1:
1422                 case 0xc2:
1423                 case 0xc4:
1424                 case 0xc8:
1425                 case 0xc9:
1426                         adev->gfx.config.max_cu_per_sh = 3;
1427                         break;
1428                 case 0xd0:
1429                 case 0xd1:
1430                 case 0xd2:
1431                 default:
1432                         adev->gfx.config.max_cu_per_sh = 2;
1433                         break;
1434                 }
1435
1436                 adev->gfx.config.max_texture_channel_caches = 2;
1437                 adev->gfx.config.max_gprs = 256;
1438                 adev->gfx.config.max_gs_threads = 16;
1439                 adev->gfx.config.max_hw_contexts = 8;
1440
1441                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1442                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1443                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1444                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1445                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1446                 break;
1447         default:
1448                 adev->gfx.config.max_shader_engines = 2;
1449                 adev->gfx.config.max_tile_pipes = 4;
1450                 adev->gfx.config.max_cu_per_sh = 2;
1451                 adev->gfx.config.max_sh_per_se = 1;
1452                 adev->gfx.config.max_backends_per_se = 2;
1453                 adev->gfx.config.max_texture_channel_caches = 4;
1454                 adev->gfx.config.max_gprs = 256;
1455                 adev->gfx.config.max_gs_threads = 32;
1456                 adev->gfx.config.max_hw_contexts = 8;
1457
1458                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1459                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1460                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1461                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1462                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1463                 break;
1464         }
1465
1466         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1467         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1468         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1469
1470         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1471         adev->gfx.config.mem_max_burst_length_bytes = 256;
1472         if (adev->flags & AMD_IS_APU) {
1473                 /* Get memory bank mapping mode. */
1474                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1475                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1476                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1477
1478                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1479                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1480                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1481
1482                 /* Validate settings in case only one DIMM installed. */
1483                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1484                         dimm00_addr_map = 0;
1485                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1486                         dimm01_addr_map = 0;
1487                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1488                         dimm10_addr_map = 0;
1489                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1490                         dimm11_addr_map = 0;
1491
1492                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1493                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1494                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1495                         adev->gfx.config.mem_row_size_in_kb = 2;
1496                 else
1497                         adev->gfx.config.mem_row_size_in_kb = 1;
1498         } else {
1499                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1500                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1501                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1502                         adev->gfx.config.mem_row_size_in_kb = 4;
1503         }
1504
1505         adev->gfx.config.shader_engine_tile_size = 32;
1506         adev->gfx.config.num_gpus = 1;
1507         adev->gfx.config.multi_gpu_tile_size = 64;
1508
1509         /* fix up row size */
1510         switch (adev->gfx.config.mem_row_size_in_kb) {
1511         case 1:
1512         default:
1513                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1514                 break;
1515         case 2:
1516                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1517                 break;
1518         case 4:
1519                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1520                 break;
1521         }
1522         adev->gfx.config.gb_addr_config = gb_addr_config;
1523 }
1524
1525 static int gfx_v8_0_sw_init(void *handle)
1526 {
1527         int i, r;
1528         struct amdgpu_ring *ring;
1529         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1530
1531         /* EOP Event */
1532         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1533         if (r)
1534                 return r;
1535
1536         /* Privileged reg */
1537         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1538         if (r)
1539                 return r;
1540
1541         /* Privileged inst */
1542         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1543         if (r)
1544                 return r;
1545
1546         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1547
1548         gfx_v8_0_scratch_init(adev);
1549
1550         r = gfx_v8_0_init_microcode(adev);
1551         if (r) {
1552                 DRM_ERROR("Failed to load gfx firmware!\n");
1553                 return r;
1554         }
1555
1556         r = gfx_v8_0_mec_init(adev);
1557         if (r) {
1558                 DRM_ERROR("Failed to init MEC BOs!\n");
1559                 return r;
1560         }
1561
1562         /* set up the gfx ring */
1563         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1564                 ring = &adev->gfx.gfx_ring[i];
1565                 ring->ring_obj = NULL;
1566                 sprintf(ring->name, "gfx");
1567                 /* no gfx doorbells on iceland */
1568                 if (adev->asic_type != CHIP_TOPAZ) {
1569                         ring->use_doorbell = true;
1570                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1571                 }
1572
1573                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1574                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1575                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1576                                      AMDGPU_RING_TYPE_GFX);
1577                 if (r)
1578                         return r;
1579         }
1580
1581         /* set up the compute queues */
1582         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1583                 unsigned irq_type;
1584
1585                 /* max 32 queues per MEC */
1586                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1587                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1588                         break;
1589                 }
1590                 ring = &adev->gfx.compute_ring[i];
1591                 ring->ring_obj = NULL;
1592                 ring->use_doorbell = true;
1593                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1594                 ring->me = 1; /* first MEC */
1595                 ring->pipe = i / 8;
1596                 ring->queue = i % 8;
1597                 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1598                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1599                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1600                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1601                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1602                                      &adev->gfx.eop_irq, irq_type,
1603                                      AMDGPU_RING_TYPE_COMPUTE);
1604                 if (r)
1605                         return r;
1606         }
1607
1608         /* reserve GDS, GWS and OA resource for gfx */
1609         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1610                         PAGE_SIZE, true,
1611                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1612                         NULL, &adev->gds.gds_gfx_bo);
1613         if (r)
1614                 return r;
1615
1616         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1617                 PAGE_SIZE, true,
1618                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1619                 NULL, &adev->gds.gws_gfx_bo);
1620         if (r)
1621                 return r;
1622
1623         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1624                         PAGE_SIZE, true,
1625                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1626                         NULL, &adev->gds.oa_gfx_bo);
1627         if (r)
1628                 return r;
1629
1630         adev->gfx.ce_ram_size = 0x8000;
1631
1632         gfx_v8_0_gpu_early_init(adev);
1633
1634         return 0;
1635 }
1636
1637 static int gfx_v8_0_sw_fini(void *handle)
1638 {
1639         int i;
1640         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1641
1642         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1643         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1644         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1645
1646         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1647                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1648         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1649                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1650
1651         gfx_v8_0_mec_fini(adev);
1652
1653         return 0;
1654 }
1655
1656 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1657 {
1658         uint32_t *modearray, *mod2array;
1659         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1660         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1661         u32 reg_offset;
1662
1663         modearray = adev->gfx.config.tile_mode_array;
1664         mod2array = adev->gfx.config.macrotile_mode_array;
1665
1666         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1667                 modearray[reg_offset] = 0;
1668
1669         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1670                 mod2array[reg_offset] = 0;
1671
1672         switch (adev->asic_type) {
1673         case CHIP_TOPAZ:
1674                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1675                                 PIPE_CONFIG(ADDR_SURF_P2) |
1676                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1677                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1678                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1679                                 PIPE_CONFIG(ADDR_SURF_P2) |
1680                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1681                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1682                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1683                                 PIPE_CONFIG(ADDR_SURF_P2) |
1684                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1685                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1686                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1687                                 PIPE_CONFIG(ADDR_SURF_P2) |
1688                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1689                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1690                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1691                                 PIPE_CONFIG(ADDR_SURF_P2) |
1692                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1693                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1694                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1695                                 PIPE_CONFIG(ADDR_SURF_P2) |
1696                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1697                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1698                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1699                                 PIPE_CONFIG(ADDR_SURF_P2) |
1700                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1702                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1703                                 PIPE_CONFIG(ADDR_SURF_P2));
1704                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1705                                 PIPE_CONFIG(ADDR_SURF_P2) |
1706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1707                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1708                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1709                                  PIPE_CONFIG(ADDR_SURF_P2) |
1710                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1711                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1712                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1713                                  PIPE_CONFIG(ADDR_SURF_P2) |
1714                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1715                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1716                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1717                                  PIPE_CONFIG(ADDR_SURF_P2) |
1718                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1719                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1720                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1721                                  PIPE_CONFIG(ADDR_SURF_P2) |
1722                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1723                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1724                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1725                                  PIPE_CONFIG(ADDR_SURF_P2) |
1726                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1727                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1728                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1729                                  PIPE_CONFIG(ADDR_SURF_P2) |
1730                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1731                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1732                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1733                                  PIPE_CONFIG(ADDR_SURF_P2) |
1734                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1735                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1737                                  PIPE_CONFIG(ADDR_SURF_P2) |
1738                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1739                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1740                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1741                                  PIPE_CONFIG(ADDR_SURF_P2) |
1742                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1743                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1744                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1745                                  PIPE_CONFIG(ADDR_SURF_P2) |
1746                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1749                                  PIPE_CONFIG(ADDR_SURF_P2) |
1750                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1751                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1752                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1753                                  PIPE_CONFIG(ADDR_SURF_P2) |
1754                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1755                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1756                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1757                                  PIPE_CONFIG(ADDR_SURF_P2) |
1758                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1759                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1760                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1761                                  PIPE_CONFIG(ADDR_SURF_P2) |
1762                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1763                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1764                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1765                                  PIPE_CONFIG(ADDR_SURF_P2) |
1766                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1767                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1768                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1769                                  PIPE_CONFIG(ADDR_SURF_P2) |
1770                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1771                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1772                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1773                                  PIPE_CONFIG(ADDR_SURF_P2) |
1774                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1775                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1776
1777                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1778                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1779                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1780                                 NUM_BANKS(ADDR_SURF_8_BANK));
1781                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1782                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1783                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1784                                 NUM_BANKS(ADDR_SURF_8_BANK));
1785                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1786                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1787                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1788                                 NUM_BANKS(ADDR_SURF_8_BANK));
1789                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1790                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1791                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1792                                 NUM_BANKS(ADDR_SURF_8_BANK));
1793                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1794                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1795                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1796                                 NUM_BANKS(ADDR_SURF_8_BANK));
1797                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1798                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1799                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1800                                 NUM_BANKS(ADDR_SURF_8_BANK));
1801                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1802                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1803                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1804                                 NUM_BANKS(ADDR_SURF_8_BANK));
1805                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1806                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1807                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1808                                 NUM_BANKS(ADDR_SURF_16_BANK));
1809                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1810                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1811                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1812                                 NUM_BANKS(ADDR_SURF_16_BANK));
1813                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1814                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1815                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1816                                  NUM_BANKS(ADDR_SURF_16_BANK));
1817                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1818                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1819                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1820                                  NUM_BANKS(ADDR_SURF_16_BANK));
1821                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1822                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1823                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1824                                  NUM_BANKS(ADDR_SURF_16_BANK));
1825                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1826                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1827                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1828                                  NUM_BANKS(ADDR_SURF_16_BANK));
1829                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1830                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1831                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1832                                  NUM_BANKS(ADDR_SURF_8_BANK));
1833
1834                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1835                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1836                             reg_offset != 23)
1837                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1838
1839                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1840                         if (reg_offset != 7)
1841                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1842
1843                 break;
1844         case CHIP_FIJI:
1845                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1847                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1849                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1850                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1851                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1853                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1854                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1855                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1857                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1858                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1859                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1861                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1862                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1863                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1865                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1866                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1867                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1869                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1870                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1871                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1873                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1875                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1876                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1877                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1878                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1879                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1880                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1884                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1885                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1886                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1887                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1888                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1889                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1890                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1891                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1892                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1893                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1894                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1895                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1896                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1897                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1898                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1899                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1900                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1901                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1902                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1903                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1904                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1905                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1906                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1907                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1908                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1909                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1910                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1911                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1912                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1913                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1914                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1915                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1916                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1917                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1918                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1919                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1920                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1921                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1922                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1923                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1924                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1925                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1926                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1927                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1928                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1929                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1930                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1931                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1932                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1933                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1934                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1935                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1936                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1937                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1938                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1939                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1940                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1941                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1942                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1943                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1944                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1945                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1946                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1947                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1948                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1949                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1950                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1951                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1952                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1953                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1954                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1955                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1956                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1957                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1958                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1959                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1960                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1961                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1962                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1963                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1964                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1965                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1966                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1967
1968                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1971                                 NUM_BANKS(ADDR_SURF_8_BANK));
1972                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1975                                 NUM_BANKS(ADDR_SURF_8_BANK));
1976                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1979                                 NUM_BANKS(ADDR_SURF_8_BANK));
1980                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1983                                 NUM_BANKS(ADDR_SURF_8_BANK));
1984                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1987                                 NUM_BANKS(ADDR_SURF_8_BANK));
1988                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1989                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1990                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1991                                 NUM_BANKS(ADDR_SURF_8_BANK));
1992                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1995                                 NUM_BANKS(ADDR_SURF_8_BANK));
1996                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1999                                 NUM_BANKS(ADDR_SURF_8_BANK));
2000                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2003                                 NUM_BANKS(ADDR_SURF_8_BANK));
2004                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2005                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2006                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2007                                  NUM_BANKS(ADDR_SURF_8_BANK));
2008                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2009                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2010                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2011                                  NUM_BANKS(ADDR_SURF_8_BANK));
2012                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2013                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2014                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2015                                  NUM_BANKS(ADDR_SURF_8_BANK));
2016                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2017                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2018                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2019                                  NUM_BANKS(ADDR_SURF_8_BANK));
2020                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2021                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2022                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2023                                  NUM_BANKS(ADDR_SURF_4_BANK));
2024
2025                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2026                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2027
2028                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2029                         if (reg_offset != 7)
2030                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2031
2032                 break;
2033         case CHIP_TONGA:
2034                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2036                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2037                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2038                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2039                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2040                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2044                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2045                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2046                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2047                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2048                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2049                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2050                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2051                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2052                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2053                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2054                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2055                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2056                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2057                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2058                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2059                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2060                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2061                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2062                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2063                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2064                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2065                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2066                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2067                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2068                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2069                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2070                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2071                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2073                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2074                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2075                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2077                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2078                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2079                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2080                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2081                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2083                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2084                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2085                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2086                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2087                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2088                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2089                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2090                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2093                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2094                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2095                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2097                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2099                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2100                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2101                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2102                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2103                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2104                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2105                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2106                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2107                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2108                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2109                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2110                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2111                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2112                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2113                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2114                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2115                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2116                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2117                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2118                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2119                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2120                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2121                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2122                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2123                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2124                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2125                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2126                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2127                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2128                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2129                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2130                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2132                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2133                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2134                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2135                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2136                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2137                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2140                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2152                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2153                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2156
2157                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2158                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2159                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2160                                 NUM_BANKS(ADDR_SURF_16_BANK));
2161                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2163                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2164                                 NUM_BANKS(ADDR_SURF_16_BANK));
2165                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2167                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2168                                 NUM_BANKS(ADDR_SURF_16_BANK));
2169                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2171                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2172                                 NUM_BANKS(ADDR_SURF_16_BANK));
2173                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176                                 NUM_BANKS(ADDR_SURF_16_BANK));
2177                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2179                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2180                                 NUM_BANKS(ADDR_SURF_16_BANK));
2181                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2182                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2183                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2184                                 NUM_BANKS(ADDR_SURF_16_BANK));
2185                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2187                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2188                                 NUM_BANKS(ADDR_SURF_16_BANK));
2189                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2192                                 NUM_BANKS(ADDR_SURF_16_BANK));
2193                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2195                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196                                  NUM_BANKS(ADDR_SURF_16_BANK));
2197                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2200                                  NUM_BANKS(ADDR_SURF_16_BANK));
2201                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2203                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2204                                  NUM_BANKS(ADDR_SURF_8_BANK));
2205                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2208                                  NUM_BANKS(ADDR_SURF_4_BANK));
2209                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2212                                  NUM_BANKS(ADDR_SURF_4_BANK));
2213
2214                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2215                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2216
2217                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2218                         if (reg_offset != 7)
2219                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2220
2221                 break;
2222         case CHIP_STONEY:
2223                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224                                 PIPE_CONFIG(ADDR_SURF_P2) |
2225                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2226                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228                                 PIPE_CONFIG(ADDR_SURF_P2) |
2229                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2230                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232                                 PIPE_CONFIG(ADDR_SURF_P2) |
2233                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2234                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236                                 PIPE_CONFIG(ADDR_SURF_P2) |
2237                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2238                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240                                 PIPE_CONFIG(ADDR_SURF_P2) |
2241                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2242                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2244                                 PIPE_CONFIG(ADDR_SURF_P2) |
2245                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2246                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2247                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2248                                 PIPE_CONFIG(ADDR_SURF_P2) |
2249                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2250                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2252                                 PIPE_CONFIG(ADDR_SURF_P2));
2253                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2254                                 PIPE_CONFIG(ADDR_SURF_P2) |
2255                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2256                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2257                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258                                  PIPE_CONFIG(ADDR_SURF_P2) |
2259                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2262                                  PIPE_CONFIG(ADDR_SURF_P2) |
2263                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2264                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2265                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2266                                  PIPE_CONFIG(ADDR_SURF_P2) |
2267                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2269                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270                                  PIPE_CONFIG(ADDR_SURF_P2) |
2271                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2274                                  PIPE_CONFIG(ADDR_SURF_P2) |
2275                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2277                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278                                  PIPE_CONFIG(ADDR_SURF_P2) |
2279                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2281                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2282                                  PIPE_CONFIG(ADDR_SURF_P2) |
2283                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286                                  PIPE_CONFIG(ADDR_SURF_P2) |
2287                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2288                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2290                                  PIPE_CONFIG(ADDR_SURF_P2) |
2291                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2294                                  PIPE_CONFIG(ADDR_SURF_P2) |
2295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2298                                  PIPE_CONFIG(ADDR_SURF_P2) |
2299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2302                                  PIPE_CONFIG(ADDR_SURF_P2) |
2303                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2306                                  PIPE_CONFIG(ADDR_SURF_P2) |
2307                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2308                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2310                                  PIPE_CONFIG(ADDR_SURF_P2) |
2311                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314                                  PIPE_CONFIG(ADDR_SURF_P2) |
2315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318                                  PIPE_CONFIG(ADDR_SURF_P2) |
2319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322                                  PIPE_CONFIG(ADDR_SURF_P2) |
2323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2325
2326                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2328                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2329                                 NUM_BANKS(ADDR_SURF_8_BANK));
2330                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2331                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2332                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2333                                 NUM_BANKS(ADDR_SURF_8_BANK));
2334                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2336                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337                                 NUM_BANKS(ADDR_SURF_8_BANK));
2338                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2340                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341                                 NUM_BANKS(ADDR_SURF_8_BANK));
2342                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2344                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2345                                 NUM_BANKS(ADDR_SURF_8_BANK));
2346                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2348                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349                                 NUM_BANKS(ADDR_SURF_8_BANK));
2350                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2352                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2353                                 NUM_BANKS(ADDR_SURF_8_BANK));
2354                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2355                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2356                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2357                                 NUM_BANKS(ADDR_SURF_16_BANK));
2358                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2359                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2360                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361                                 NUM_BANKS(ADDR_SURF_16_BANK));
2362                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2363                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2365                                  NUM_BANKS(ADDR_SURF_16_BANK));
2366                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2367                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2368                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2369                                  NUM_BANKS(ADDR_SURF_16_BANK));
2370                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2373                                  NUM_BANKS(ADDR_SURF_16_BANK));
2374                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377                                  NUM_BANKS(ADDR_SURF_16_BANK));
2378                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2380                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381                                  NUM_BANKS(ADDR_SURF_8_BANK));
2382
2383                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2384                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2385                             reg_offset != 23)
2386                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2387
2388                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2389                         if (reg_offset != 7)
2390                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2391
2392                 break;
2393         default:
2394                 dev_warn(adev->dev,
2395                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2396                          adev->asic_type);
2397
2398         case CHIP_CARRIZO:
2399                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                 PIPE_CONFIG(ADDR_SURF_P2) |
2401                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404                                 PIPE_CONFIG(ADDR_SURF_P2) |
2405                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2406                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408                                 PIPE_CONFIG(ADDR_SURF_P2) |
2409                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2410                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412                                 PIPE_CONFIG(ADDR_SURF_P2) |
2413                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2414                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2415                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416                                 PIPE_CONFIG(ADDR_SURF_P2) |
2417                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2418                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2419                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420                                 PIPE_CONFIG(ADDR_SURF_P2) |
2421                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2422                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2423                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424                                 PIPE_CONFIG(ADDR_SURF_P2) |
2425                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2426                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2428                                 PIPE_CONFIG(ADDR_SURF_P2));
2429                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430                                 PIPE_CONFIG(ADDR_SURF_P2) |
2431                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P2) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438                                  PIPE_CONFIG(ADDR_SURF_P2) |
2439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2442                                  PIPE_CONFIG(ADDR_SURF_P2) |
2443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2445                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446                                  PIPE_CONFIG(ADDR_SURF_P2) |
2447                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2448                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2450                                  PIPE_CONFIG(ADDR_SURF_P2) |
2451                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2452                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2454                                  PIPE_CONFIG(ADDR_SURF_P2) |
2455                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2456                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2457                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2458                                  PIPE_CONFIG(ADDR_SURF_P2) |
2459                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2460                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2461                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2462                                  PIPE_CONFIG(ADDR_SURF_P2) |
2463                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2464                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2465                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2466                                  PIPE_CONFIG(ADDR_SURF_P2) |
2467                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2468                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2469                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2470                                  PIPE_CONFIG(ADDR_SURF_P2) |
2471                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2472                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2474                                  PIPE_CONFIG(ADDR_SURF_P2) |
2475                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2476                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2477                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2478                                  PIPE_CONFIG(ADDR_SURF_P2) |
2479                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2481                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2482                                  PIPE_CONFIG(ADDR_SURF_P2) |
2483                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2484                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2485                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2486                                  PIPE_CONFIG(ADDR_SURF_P2) |
2487                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2488                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2489                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2490                                  PIPE_CONFIG(ADDR_SURF_P2) |
2491                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2492                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2493                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494                                  PIPE_CONFIG(ADDR_SURF_P2) |
2495                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2496                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2498                                  PIPE_CONFIG(ADDR_SURF_P2) |
2499                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2500                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2501
2502                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505                                 NUM_BANKS(ADDR_SURF_8_BANK));
2506                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2508                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2509                                 NUM_BANKS(ADDR_SURF_8_BANK));
2510                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2513                                 NUM_BANKS(ADDR_SURF_8_BANK));
2514                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2517                                 NUM_BANKS(ADDR_SURF_8_BANK));
2518                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2520                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2521                                 NUM_BANKS(ADDR_SURF_8_BANK));
2522                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525                                 NUM_BANKS(ADDR_SURF_8_BANK));
2526                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529                                 NUM_BANKS(ADDR_SURF_8_BANK));
2530                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2531                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2532                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2533                                 NUM_BANKS(ADDR_SURF_16_BANK));
2534                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2535                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2537                                 NUM_BANKS(ADDR_SURF_16_BANK));
2538                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2539                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2541                                  NUM_BANKS(ADDR_SURF_16_BANK));
2542                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2543                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2544                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2545                                  NUM_BANKS(ADDR_SURF_16_BANK));
2546                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2549                                  NUM_BANKS(ADDR_SURF_16_BANK));
2550                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2553                                  NUM_BANKS(ADDR_SURF_16_BANK));
2554                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2556                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557                                  NUM_BANKS(ADDR_SURF_8_BANK));
2558
2559                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2560                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2561                             reg_offset != 23)
2562                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2563
2564                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2565                         if (reg_offset != 7)
2566                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2567
2568                 break;
2569         }
2570 }
2571
2572 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2573 {
2574         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2575
2576         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2577                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2578                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2579         } else if (se_num == 0xffffffff) {
2580                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2581                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2582         } else if (sh_num == 0xffffffff) {
2583                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2584                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2585         } else {
2586                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2587                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2588         }
2589         WREG32(mmGRBM_GFX_INDEX, data);
2590 }
2591
2592 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2593 {
2594         return (u32)((1ULL << bit_width) - 1);
2595 }
2596
2597 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2598 {
2599         u32 data, mask;
2600
2601         data = RREG32(mmCC_RB_BACKEND_DISABLE);
2602         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2603
2604         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2605         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2606
2607         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2608                                        adev->gfx.config.max_sh_per_se);
2609
2610         return (~data) & mask;
2611 }
2612
2613 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2614 {
2615         int i, j;
2616         u32 data;
2617         u32 active_rbs = 0;
2618         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2619                                         adev->gfx.config.max_sh_per_se;
2620
2621         mutex_lock(&adev->grbm_idx_mutex);
2622         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2623                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2624                         gfx_v8_0_select_se_sh(adev, i, j);
2625                         data = gfx_v8_0_get_rb_active_bitmap(adev);
2626                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2627                                                rb_bitmap_width_per_sh);
2628                 }
2629         }
2630         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2631         mutex_unlock(&adev->grbm_idx_mutex);
2632
2633         adev->gfx.config.backend_enable_mask = active_rbs;
2634         adev->gfx.config.num_rbs = hweight32(active_rbs);
2635 }
2636
2637 /**
2638  * gfx_v8_0_init_compute_vmid - gart enable
2639  *
2640  * @rdev: amdgpu_device pointer
2641  *
2642  * Initialize compute vmid sh_mem registers
2643  *
2644  */
2645 #define DEFAULT_SH_MEM_BASES    (0x6000)
2646 #define FIRST_COMPUTE_VMID      (8)
2647 #define LAST_COMPUTE_VMID       (16)
2648 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2649 {
2650         int i;
2651         uint32_t sh_mem_config;
2652         uint32_t sh_mem_bases;
2653
2654         /*
2655          * Configure apertures:
2656          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2657          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2658          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2659          */
2660         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2661
2662         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2663                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2664                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2665                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2666                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2667                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2668
2669         mutex_lock(&adev->srbm_mutex);
2670         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2671                 vi_srbm_select(adev, 0, 0, 0, i);
2672                 /* CP and shaders */
2673                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2674                 WREG32(mmSH_MEM_APE1_BASE, 1);
2675                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2676                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2677         }
2678         vi_srbm_select(adev, 0, 0, 0, 0);
2679         mutex_unlock(&adev->srbm_mutex);
2680 }
2681
2682 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2683 {
2684         u32 tmp;
2685         int i;
2686
2687         tmp = RREG32(mmGRBM_CNTL);
2688         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2689         WREG32(mmGRBM_CNTL, tmp);
2690
2691         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2692         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2693         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2694
2695         gfx_v8_0_tiling_mode_table_init(adev);
2696
2697         gfx_v8_0_setup_rb(adev);
2698
2699         /* XXX SH_MEM regs */
2700         /* where to put LDS, scratch, GPUVM in FSA64 space */
2701         mutex_lock(&adev->srbm_mutex);
2702         for (i = 0; i < 16; i++) {
2703                 vi_srbm_select(adev, 0, 0, 0, i);
2704                 /* CP and shaders */
2705                 if (i == 0) {
2706                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2707                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2708                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2709                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2710                         WREG32(mmSH_MEM_CONFIG, tmp);
2711                 } else {
2712                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2713                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2714                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2715                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2716                         WREG32(mmSH_MEM_CONFIG, tmp);
2717                 }
2718
2719                 WREG32(mmSH_MEM_APE1_BASE, 1);
2720                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2721                 WREG32(mmSH_MEM_BASES, 0);
2722         }
2723         vi_srbm_select(adev, 0, 0, 0, 0);
2724         mutex_unlock(&adev->srbm_mutex);
2725
2726         gfx_v8_0_init_compute_vmid(adev);
2727
2728         mutex_lock(&adev->grbm_idx_mutex);
2729         /*
2730          * making sure that the following register writes will be broadcasted
2731          * to all the shaders
2732          */
2733         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2734
2735         WREG32(mmPA_SC_FIFO_SIZE,
2736                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
2737                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2738                    (adev->gfx.config.sc_prim_fifo_size_backend <<
2739                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2740                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
2741                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2742                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2743                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2744         mutex_unlock(&adev->grbm_idx_mutex);
2745
2746 }
2747
2748 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2749 {
2750         u32 i, j, k;
2751         u32 mask;
2752
2753         mutex_lock(&adev->grbm_idx_mutex);
2754         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2755                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2756                         gfx_v8_0_select_se_sh(adev, i, j);
2757                         for (k = 0; k < adev->usec_timeout; k++) {
2758                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2759                                         break;
2760                                 udelay(1);
2761                         }
2762                 }
2763         }
2764         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2765         mutex_unlock(&adev->grbm_idx_mutex);
2766
2767         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2768                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2769                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2770                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2771         for (k = 0; k < adev->usec_timeout; k++) {
2772                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2773                         break;
2774                 udelay(1);
2775         }
2776 }
2777
2778 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2779                                                bool enable)
2780 {
2781         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2782
2783         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2784         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2785         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2786         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2787
2788         WREG32(mmCP_INT_CNTL_RING0, tmp);
2789 }
2790
2791 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2792 {
2793         u32 tmp = RREG32(mmRLC_CNTL);
2794
2795         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2796         WREG32(mmRLC_CNTL, tmp);
2797
2798         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2799
2800         gfx_v8_0_wait_for_rlc_serdes(adev);
2801 }
2802
2803 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2804 {
2805         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2806
2807         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2808         WREG32(mmGRBM_SOFT_RESET, tmp);
2809         udelay(50);
2810         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2811         WREG32(mmGRBM_SOFT_RESET, tmp);
2812         udelay(50);
2813 }
2814
2815 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2816 {
2817         u32 tmp = RREG32(mmRLC_CNTL);
2818
2819         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2820         WREG32(mmRLC_CNTL, tmp);
2821
2822         /* carrizo do enable cp interrupt after cp inited */
2823         if (!(adev->flags & AMD_IS_APU))
2824                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2825
2826         udelay(50);
2827 }
2828
2829 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2830 {
2831         const struct rlc_firmware_header_v2_0 *hdr;
2832         const __le32 *fw_data;
2833         unsigned i, fw_size;
2834
2835         if (!adev->gfx.rlc_fw)
2836                 return -EINVAL;
2837
2838         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2839         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2840
2841         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2842                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2843         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2844
2845         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2846         for (i = 0; i < fw_size; i++)
2847                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2848         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2849
2850         return 0;
2851 }
2852
2853 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2854 {
2855         int r;
2856
2857         gfx_v8_0_rlc_stop(adev);
2858
2859         /* disable CG */
2860         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2861
2862         /* disable PG */
2863         WREG32(mmRLC_PG_CNTL, 0);
2864
2865         gfx_v8_0_rlc_reset(adev);
2866
2867         if (!adev->pp_enabled) {
2868                 if (!adev->firmware.smu_load) {
2869                         /* legacy rlc firmware loading */
2870                         r = gfx_v8_0_rlc_load_microcode(adev);
2871                         if (r)
2872                                 return r;
2873                 } else {
2874                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2875                                                         AMDGPU_UCODE_ID_RLC_G);
2876                         if (r)
2877                                 return -EINVAL;
2878                 }
2879         }
2880
2881         gfx_v8_0_rlc_start(adev);
2882
2883         return 0;
2884 }
2885
2886 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2887 {
2888         int i;
2889         u32 tmp = RREG32(mmCP_ME_CNTL);
2890
2891         if (enable) {
2892                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2893                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2894                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2895         } else {
2896                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2897                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2898                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2899                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2900                         adev->gfx.gfx_ring[i].ready = false;
2901         }
2902         WREG32(mmCP_ME_CNTL, tmp);
2903         udelay(50);
2904 }
2905
2906 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2907 {
2908         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2909         const struct gfx_firmware_header_v1_0 *ce_hdr;
2910         const struct gfx_firmware_header_v1_0 *me_hdr;
2911         const __le32 *fw_data;
2912         unsigned i, fw_size;
2913
2914         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2915                 return -EINVAL;
2916
2917         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2918                 adev->gfx.pfp_fw->data;
2919         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2920                 adev->gfx.ce_fw->data;
2921         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2922                 adev->gfx.me_fw->data;
2923
2924         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2925         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2926         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2927
2928         gfx_v8_0_cp_gfx_enable(adev, false);
2929
2930         /* PFP */
2931         fw_data = (const __le32 *)
2932                 (adev->gfx.pfp_fw->data +
2933                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2934         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2935         WREG32(mmCP_PFP_UCODE_ADDR, 0);
2936         for (i = 0; i < fw_size; i++)
2937                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2938         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2939
2940         /* CE */
2941         fw_data = (const __le32 *)
2942                 (adev->gfx.ce_fw->data +
2943                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2944         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2945         WREG32(mmCP_CE_UCODE_ADDR, 0);
2946         for (i = 0; i < fw_size; i++)
2947                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2948         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2949
2950         /* ME */
2951         fw_data = (const __le32 *)
2952                 (adev->gfx.me_fw->data +
2953                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2954         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2955         WREG32(mmCP_ME_RAM_WADDR, 0);
2956         for (i = 0; i < fw_size; i++)
2957                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2958         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2959
2960         return 0;
2961 }
2962
2963 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2964 {
2965         u32 count = 0;
2966         const struct cs_section_def *sect = NULL;
2967         const struct cs_extent_def *ext = NULL;
2968
2969         /* begin clear state */
2970         count += 2;
2971         /* context control state */
2972         count += 3;
2973
2974         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2975                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2976                         if (sect->id == SECT_CONTEXT)
2977                                 count += 2 + ext->reg_count;
2978                         else
2979                                 return 0;
2980                 }
2981         }
2982         /* pa_sc_raster_config/pa_sc_raster_config1 */
2983         count += 4;
2984         /* end clear state */
2985         count += 2;
2986         /* clear state */
2987         count += 2;
2988
2989         return count;
2990 }
2991
2992 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2993 {
2994         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2995         const struct cs_section_def *sect = NULL;
2996         const struct cs_extent_def *ext = NULL;
2997         int r, i;
2998
2999         /* init the CP */
3000         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3001         WREG32(mmCP_ENDIAN_SWAP, 0);
3002         WREG32(mmCP_DEVICE_ID, 1);
3003
3004         gfx_v8_0_cp_gfx_enable(adev, true);
3005
3006         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3007         if (r) {
3008                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3009                 return r;
3010         }
3011
3012         /* clear state buffer */
3013         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3014         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3015
3016         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3017         amdgpu_ring_write(ring, 0x80000000);
3018         amdgpu_ring_write(ring, 0x80000000);
3019
3020         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3021                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3022                         if (sect->id == SECT_CONTEXT) {
3023                                 amdgpu_ring_write(ring,
3024                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3025                                                ext->reg_count));
3026                                 amdgpu_ring_write(ring,
3027                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3028                                 for (i = 0; i < ext->reg_count; i++)
3029                                         amdgpu_ring_write(ring, ext->extent[i]);
3030                         }
3031                 }
3032         }
3033
3034         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3035         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3036         switch (adev->asic_type) {
3037         case CHIP_TONGA:
3038                 amdgpu_ring_write(ring, 0x16000012);
3039                 amdgpu_ring_write(ring, 0x0000002A);
3040                 break;
3041         case CHIP_FIJI:
3042                 amdgpu_ring_write(ring, 0x3a00161a);
3043                 amdgpu_ring_write(ring, 0x0000002e);
3044                 break;
3045         case CHIP_TOPAZ:
3046         case CHIP_CARRIZO:
3047                 amdgpu_ring_write(ring, 0x00000002);
3048                 amdgpu_ring_write(ring, 0x00000000);
3049                 break;
3050         case CHIP_STONEY:
3051                 amdgpu_ring_write(ring, 0x00000000);
3052                 amdgpu_ring_write(ring, 0x00000000);
3053                 break;
3054         default:
3055                 BUG();
3056         }
3057
3058         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3059         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3060
3061         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3062         amdgpu_ring_write(ring, 0);
3063
3064         /* init the CE partitions */
3065         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3066         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3067         amdgpu_ring_write(ring, 0x8000);
3068         amdgpu_ring_write(ring, 0x8000);
3069
3070         amdgpu_ring_commit(ring);
3071
3072         return 0;
3073 }
3074
3075 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3076 {
3077         struct amdgpu_ring *ring;
3078         u32 tmp;
3079         u32 rb_bufsz;
3080         u64 rb_addr, rptr_addr;
3081         int r;
3082
3083         /* Set the write pointer delay */
3084         WREG32(mmCP_RB_WPTR_DELAY, 0);
3085
3086         /* set the RB to use vmid 0 */
3087         WREG32(mmCP_RB_VMID, 0);
3088
3089         /* Set ring buffer size */
3090         ring = &adev->gfx.gfx_ring[0];
3091         rb_bufsz = order_base_2(ring->ring_size / 8);
3092         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3093         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3094         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3095         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3096 #ifdef __BIG_ENDIAN
3097         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3098 #endif
3099         WREG32(mmCP_RB0_CNTL, tmp);
3100
3101         /* Initialize the ring buffer's read and write pointers */
3102         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3103         ring->wptr = 0;
3104         WREG32(mmCP_RB0_WPTR, ring->wptr);
3105
3106         /* set the wb address wether it's enabled or not */
3107         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3108         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3109         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3110
3111         mdelay(1);
3112         WREG32(mmCP_RB0_CNTL, tmp);
3113
3114         rb_addr = ring->gpu_addr >> 8;
3115         WREG32(mmCP_RB0_BASE, rb_addr);
3116         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3117
3118         /* no gfx doorbells on iceland */
3119         if (adev->asic_type != CHIP_TOPAZ) {
3120                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3121                 if (ring->use_doorbell) {
3122                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3123                                             DOORBELL_OFFSET, ring->doorbell_index);
3124                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3125                                             DOORBELL_EN, 1);
3126                 } else {
3127                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3128                                             DOORBELL_EN, 0);
3129                 }
3130                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3131
3132                 if (adev->asic_type == CHIP_TONGA) {
3133                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3134                                             DOORBELL_RANGE_LOWER,
3135                                             AMDGPU_DOORBELL_GFX_RING0);
3136                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3137
3138                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3139                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3140                 }
3141
3142         }
3143
3144         /* start the ring */
3145         gfx_v8_0_cp_gfx_start(adev);
3146         ring->ready = true;
3147         r = amdgpu_ring_test_ring(ring);
3148         if (r) {
3149                 ring->ready = false;
3150                 return r;
3151         }
3152
3153         return 0;
3154 }
3155
3156 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3157 {
3158         int i;
3159
3160         if (enable) {
3161                 WREG32(mmCP_MEC_CNTL, 0);
3162         } else {
3163                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3164                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3165                         adev->gfx.compute_ring[i].ready = false;
3166         }
3167         udelay(50);
3168 }
3169
3170 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3171 {
3172         const struct gfx_firmware_header_v1_0 *mec_hdr;
3173         const __le32 *fw_data;
3174         unsigned i, fw_size;
3175
3176         if (!adev->gfx.mec_fw)
3177                 return -EINVAL;
3178
3179         gfx_v8_0_cp_compute_enable(adev, false);
3180
3181         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3182         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3183
3184         fw_data = (const __le32 *)
3185                 (adev->gfx.mec_fw->data +
3186                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3187         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3188
3189         /* MEC1 */
3190         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3191         for (i = 0; i < fw_size; i++)
3192                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3193         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3194
3195         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3196         if (adev->gfx.mec2_fw) {
3197                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3198
3199                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3200                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3201
3202                 fw_data = (const __le32 *)
3203                         (adev->gfx.mec2_fw->data +
3204                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3205                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3206
3207                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3208                 for (i = 0; i < fw_size; i++)
3209                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3210                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3211         }
3212
3213         return 0;
3214 }
3215
3216 struct vi_mqd {
3217         uint32_t header;  /* ordinal0 */
3218         uint32_t compute_dispatch_initiator;  /* ordinal1 */
3219         uint32_t compute_dim_x;  /* ordinal2 */
3220         uint32_t compute_dim_y;  /* ordinal3 */
3221         uint32_t compute_dim_z;  /* ordinal4 */
3222         uint32_t compute_start_x;  /* ordinal5 */
3223         uint32_t compute_start_y;  /* ordinal6 */
3224         uint32_t compute_start_z;  /* ordinal7 */
3225         uint32_t compute_num_thread_x;  /* ordinal8 */
3226         uint32_t compute_num_thread_y;  /* ordinal9 */
3227         uint32_t compute_num_thread_z;  /* ordinal10 */
3228         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3229         uint32_t compute_perfcount_enable;  /* ordinal12 */
3230         uint32_t compute_pgm_lo;  /* ordinal13 */
3231         uint32_t compute_pgm_hi;  /* ordinal14 */
3232         uint32_t compute_tba_lo;  /* ordinal15 */
3233         uint32_t compute_tba_hi;  /* ordinal16 */
3234         uint32_t compute_tma_lo;  /* ordinal17 */
3235         uint32_t compute_tma_hi;  /* ordinal18 */
3236         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3237         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3238         uint32_t compute_vmid;  /* ordinal21 */
3239         uint32_t compute_resource_limits;  /* ordinal22 */
3240         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3241         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3242         uint32_t compute_tmpring_size;  /* ordinal25 */
3243         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3244         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3245         uint32_t compute_restart_x;  /* ordinal28 */
3246         uint32_t compute_restart_y;  /* ordinal29 */
3247         uint32_t compute_restart_z;  /* ordinal30 */
3248         uint32_t compute_thread_trace_enable;  /* ordinal31 */
3249         uint32_t compute_misc_reserved;  /* ordinal32 */
3250         uint32_t compute_dispatch_id;  /* ordinal33 */
3251         uint32_t compute_threadgroup_id;  /* ordinal34 */
3252         uint32_t compute_relaunch;  /* ordinal35 */
3253         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3254         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3255         uint32_t compute_wave_restore_control;  /* ordinal38 */
3256         uint32_t reserved9;  /* ordinal39 */
3257         uint32_t reserved10;  /* ordinal40 */
3258         uint32_t reserved11;  /* ordinal41 */
3259         uint32_t reserved12;  /* ordinal42 */
3260         uint32_t reserved13;  /* ordinal43 */
3261         uint32_t reserved14;  /* ordinal44 */
3262         uint32_t reserved15;  /* ordinal45 */
3263         uint32_t reserved16;  /* ordinal46 */
3264         uint32_t reserved17;  /* ordinal47 */
3265         uint32_t reserved18;  /* ordinal48 */
3266         uint32_t reserved19;  /* ordinal49 */
3267         uint32_t reserved20;  /* ordinal50 */
3268         uint32_t reserved21;  /* ordinal51 */
3269         uint32_t reserved22;  /* ordinal52 */
3270         uint32_t reserved23;  /* ordinal53 */
3271         uint32_t reserved24;  /* ordinal54 */
3272         uint32_t reserved25;  /* ordinal55 */
3273         uint32_t reserved26;  /* ordinal56 */
3274         uint32_t reserved27;  /* ordinal57 */
3275         uint32_t reserved28;  /* ordinal58 */
3276         uint32_t reserved29;  /* ordinal59 */
3277         uint32_t reserved30;  /* ordinal60 */
3278         uint32_t reserved31;  /* ordinal61 */
3279         uint32_t reserved32;  /* ordinal62 */
3280         uint32_t reserved33;  /* ordinal63 */
3281         uint32_t reserved34;  /* ordinal64 */
3282         uint32_t compute_user_data_0;  /* ordinal65 */
3283         uint32_t compute_user_data_1;  /* ordinal66 */
3284         uint32_t compute_user_data_2;  /* ordinal67 */
3285         uint32_t compute_user_data_3;  /* ordinal68 */
3286         uint32_t compute_user_data_4;  /* ordinal69 */
3287         uint32_t compute_user_data_5;  /* ordinal70 */
3288         uint32_t compute_user_data_6;  /* ordinal71 */
3289         uint32_t compute_user_data_7;  /* ordinal72 */
3290         uint32_t compute_user_data_8;  /* ordinal73 */
3291         uint32_t compute_user_data_9;  /* ordinal74 */
3292         uint32_t compute_user_data_10;  /* ordinal75 */
3293         uint32_t compute_user_data_11;  /* ordinal76 */
3294         uint32_t compute_user_data_12;  /* ordinal77 */
3295         uint32_t compute_user_data_13;  /* ordinal78 */
3296         uint32_t compute_user_data_14;  /* ordinal79 */
3297         uint32_t compute_user_data_15;  /* ordinal80 */
3298         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3299         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3300         uint32_t reserved35;  /* ordinal83 */
3301         uint32_t reserved36;  /* ordinal84 */
3302         uint32_t reserved37;  /* ordinal85 */
3303         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3304         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3305         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3306         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3307         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3308         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3309         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3310         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3311         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3312         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3313         uint32_t reserved38;  /* ordinal96 */
3314         uint32_t reserved39;  /* ordinal97 */
3315         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3316         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3317         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3318         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3319         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3320         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3321         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3322         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3323         uint32_t reserved40;  /* ordinal106 */
3324         uint32_t reserved41;  /* ordinal107 */
3325         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3326         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3327         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3328         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3329         uint32_t reserved42;  /* ordinal112 */
3330         uint32_t reserved43;  /* ordinal113 */
3331         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3332         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3333         uint32_t cp_packet_id_lo;  /* ordinal116 */
3334         uint32_t cp_packet_id_hi;  /* ordinal117 */
3335         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3336         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3337         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3338         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3339         uint32_t gds_save_mask_lo;  /* ordinal122 */
3340         uint32_t gds_save_mask_hi;  /* ordinal123 */
3341         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3342         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3343         uint32_t reserved44;  /* ordinal126 */
3344         uint32_t reserved45;  /* ordinal127 */
3345         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3346         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3347         uint32_t cp_hqd_active;  /* ordinal130 */
3348         uint32_t cp_hqd_vmid;  /* ordinal131 */
3349         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3350         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3351         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3352         uint32_t cp_hqd_quantum;  /* ordinal135 */
3353         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3354         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3355         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3356         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3357         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3358         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3359         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3360         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3361         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3362         uint32_t cp_hqd_pq_control;  /* ordinal145 */
3363         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3364         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3365         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3366         uint32_t cp_hqd_ib_control;  /* ordinal149 */
3367         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3368         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3369         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3370         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3371         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3372         uint32_t cp_hqd_msg_type;  /* ordinal155 */
3373         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3374         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3375         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3376         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3377         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3378         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3379         uint32_t cp_mqd_control;  /* ordinal162 */
3380         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3381         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3382         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3383         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3384         uint32_t cp_hqd_eop_control;  /* ordinal167 */
3385         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3386         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3387         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3388         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3389         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3390         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3391         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3392         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3393         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3394         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3395         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3396         uint32_t cp_hqd_error;  /* ordinal179 */
3397         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3398         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3399         uint32_t reserved46;  /* ordinal182 */
3400         uint32_t reserved47;  /* ordinal183 */
3401         uint32_t reserved48;  /* ordinal184 */
3402         uint32_t reserved49;  /* ordinal185 */
3403         uint32_t reserved50;  /* ordinal186 */
3404         uint32_t reserved51;  /* ordinal187 */
3405         uint32_t reserved52;  /* ordinal188 */
3406         uint32_t reserved53;  /* ordinal189 */
3407         uint32_t reserved54;  /* ordinal190 */
3408         uint32_t reserved55;  /* ordinal191 */
3409         uint32_t iqtimer_pkt_header;  /* ordinal192 */
3410         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3411         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3412         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3413         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3414         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3415         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3416         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3417         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3418         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3419         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3420         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3421         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3422         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3423         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3424         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3425         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3426         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3427         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3428         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3429         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3430         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3431         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3432         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3433         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3434         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3435         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3436         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3437         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3438         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3439         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3440         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3441         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3442         uint32_t reserved56;  /* ordinal225 */
3443         uint32_t reserved57;  /* ordinal226 */
3444         uint32_t reserved58;  /* ordinal227 */
3445         uint32_t set_resources_header;  /* ordinal228 */
3446         uint32_t set_resources_dw1;  /* ordinal229 */
3447         uint32_t set_resources_dw2;  /* ordinal230 */
3448         uint32_t set_resources_dw3;  /* ordinal231 */
3449         uint32_t set_resources_dw4;  /* ordinal232 */
3450         uint32_t set_resources_dw5;  /* ordinal233 */
3451         uint32_t set_resources_dw6;  /* ordinal234 */
3452         uint32_t set_resources_dw7;  /* ordinal235 */
3453         uint32_t reserved59;  /* ordinal236 */
3454         uint32_t reserved60;  /* ordinal237 */
3455         uint32_t reserved61;  /* ordinal238 */
3456         uint32_t reserved62;  /* ordinal239 */
3457         uint32_t reserved63;  /* ordinal240 */
3458         uint32_t reserved64;  /* ordinal241 */
3459         uint32_t reserved65;  /* ordinal242 */
3460         uint32_t reserved66;  /* ordinal243 */
3461         uint32_t reserved67;  /* ordinal244 */
3462         uint32_t reserved68;  /* ordinal245 */
3463         uint32_t reserved69;  /* ordinal246 */
3464         uint32_t reserved70;  /* ordinal247 */
3465         uint32_t reserved71;  /* ordinal248 */
3466         uint32_t reserved72;  /* ordinal249 */
3467         uint32_t reserved73;  /* ordinal250 */
3468         uint32_t reserved74;  /* ordinal251 */
3469         uint32_t reserved75;  /* ordinal252 */
3470         uint32_t reserved76;  /* ordinal253 */
3471         uint32_t reserved77;  /* ordinal254 */
3472         uint32_t reserved78;  /* ordinal255 */
3473
3474         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3475 };
3476
3477 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3478 {
3479         int i, r;
3480
3481         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3482                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3483
3484                 if (ring->mqd_obj) {
3485                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3486                         if (unlikely(r != 0))
3487                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3488
3489                         amdgpu_bo_unpin(ring->mqd_obj);
3490                         amdgpu_bo_unreserve(ring->mqd_obj);
3491
3492                         amdgpu_bo_unref(&ring->mqd_obj);
3493                         ring->mqd_obj = NULL;
3494                 }
3495         }
3496 }
3497
3498 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3499 {
3500         int r, i, j;
3501         u32 tmp;
3502         bool use_doorbell = true;
3503         u64 hqd_gpu_addr;
3504         u64 mqd_gpu_addr;
3505         u64 eop_gpu_addr;
3506         u64 wb_gpu_addr;
3507         u32 *buf;
3508         struct vi_mqd *mqd;
3509
3510         /* init the pipes */
3511         mutex_lock(&adev->srbm_mutex);
3512         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3513                 int me = (i < 4) ? 1 : 2;
3514                 int pipe = (i < 4) ? i : (i - 4);
3515
3516                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3517                 eop_gpu_addr >>= 8;
3518
3519                 vi_srbm_select(adev, me, pipe, 0, 0);
3520
3521                 /* write the EOP addr */
3522                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3523                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3524
3525                 /* set the VMID assigned */
3526                 WREG32(mmCP_HQD_VMID, 0);
3527
3528                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3529                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3530                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3531                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
3532                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3533         }
3534         vi_srbm_select(adev, 0, 0, 0, 0);
3535         mutex_unlock(&adev->srbm_mutex);
3536
3537         /* init the queues.  Just two for now. */
3538         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3539                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3540
3541                 if (ring->mqd_obj == NULL) {
3542                         r = amdgpu_bo_create(adev,
3543                                              sizeof(struct vi_mqd),
3544                                              PAGE_SIZE, true,
3545                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3546                                              NULL, &ring->mqd_obj);
3547                         if (r) {
3548                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3549                                 return r;
3550                         }
3551                 }
3552
3553                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3554                 if (unlikely(r != 0)) {
3555                         gfx_v8_0_cp_compute_fini(adev);
3556                         return r;
3557                 }
3558                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3559                                   &mqd_gpu_addr);
3560                 if (r) {
3561                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3562                         gfx_v8_0_cp_compute_fini(adev);
3563                         return r;
3564                 }
3565                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3566                 if (r) {
3567                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3568                         gfx_v8_0_cp_compute_fini(adev);
3569                         return r;
3570                 }
3571
3572                 /* init the mqd struct */
3573                 memset(buf, 0, sizeof(struct vi_mqd));
3574
3575                 mqd = (struct vi_mqd *)buf;
3576                 mqd->header = 0xC0310800;
3577                 mqd->compute_pipelinestat_enable = 0x00000001;
3578                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3579                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3580                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3581                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3582                 mqd->compute_misc_reserved = 0x00000003;
3583
3584                 mutex_lock(&adev->srbm_mutex);
3585                 vi_srbm_select(adev, ring->me,
3586                                ring->pipe,
3587                                ring->queue, 0);
3588
3589                 /* disable wptr polling */
3590                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3591                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3592                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3593
3594                 mqd->cp_hqd_eop_base_addr_lo =
3595                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
3596                 mqd->cp_hqd_eop_base_addr_hi =
3597                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3598
3599                 /* enable doorbell? */
3600                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3601                 if (use_doorbell) {
3602                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3603                 } else {
3604                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3605                 }
3606                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3607                 mqd->cp_hqd_pq_doorbell_control = tmp;
3608
3609                 /* disable the queue if it's active */
3610                 mqd->cp_hqd_dequeue_request = 0;
3611                 mqd->cp_hqd_pq_rptr = 0;
3612                 mqd->cp_hqd_pq_wptr= 0;
3613                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3614                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3615                         for (j = 0; j < adev->usec_timeout; j++) {
3616                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3617                                         break;
3618                                 udelay(1);
3619                         }
3620                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3621                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3622                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3623                 }
3624
3625                 /* set the pointer to the MQD */
3626                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3627                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3628                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3629                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3630
3631                 /* set MQD vmid to 0 */
3632                 tmp = RREG32(mmCP_MQD_CONTROL);
3633                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3634                 WREG32(mmCP_MQD_CONTROL, tmp);
3635                 mqd->cp_mqd_control = tmp;
3636
3637                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3638                 hqd_gpu_addr = ring->gpu_addr >> 8;
3639                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3640                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3641                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3642                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3643
3644                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3645                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3646                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3647                                     (order_base_2(ring->ring_size / 4) - 1));
3648                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3649                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3650 #ifdef __BIG_ENDIAN
3651                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3652 #endif
3653                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3654                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3655                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3656                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3657                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3658                 mqd->cp_hqd_pq_control = tmp;
3659
3660                 /* set the wb address wether it's enabled or not */
3661                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3662                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3663                 mqd->cp_hqd_pq_rptr_report_addr_hi =
3664                         upper_32_bits(wb_gpu_addr) & 0xffff;
3665                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3666                        mqd->cp_hqd_pq_rptr_report_addr_lo);
3667                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3668                        mqd->cp_hqd_pq_rptr_report_addr_hi);
3669
3670                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3671                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3672                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3673                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3674                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3675                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3676                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
3677
3678                 /* enable the doorbell if requested */
3679                 if (use_doorbell) {
3680                         if ((adev->asic_type == CHIP_CARRIZO) ||
3681                             (adev->asic_type == CHIP_FIJI) ||
3682                             (adev->asic_type == CHIP_STONEY)) {
3683                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3684                                        AMDGPU_DOORBELL_KIQ << 2);
3685                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3686                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
3687                         }
3688                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3689                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3690                                             DOORBELL_OFFSET, ring->doorbell_index);
3691                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3692                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3693                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3694                         mqd->cp_hqd_pq_doorbell_control = tmp;
3695
3696                 } else {
3697                         mqd->cp_hqd_pq_doorbell_control = 0;
3698                 }
3699                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3700                        mqd->cp_hqd_pq_doorbell_control);
3701
3702                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3703                 ring->wptr = 0;
3704                 mqd->cp_hqd_pq_wptr = ring->wptr;
3705                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3706                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3707
3708                 /* set the vmid for the queue */
3709                 mqd->cp_hqd_vmid = 0;
3710                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3711
3712                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3713                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3714                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3715                 mqd->cp_hqd_persistent_state = tmp;
3716                 if (adev->asic_type == CHIP_STONEY) {
3717                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3718                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3719                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3720                 }
3721
3722                 /* activate the queue */
3723                 mqd->cp_hqd_active = 1;
3724                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3725
3726                 vi_srbm_select(adev, 0, 0, 0, 0);
3727                 mutex_unlock(&adev->srbm_mutex);
3728
3729                 amdgpu_bo_kunmap(ring->mqd_obj);
3730                 amdgpu_bo_unreserve(ring->mqd_obj);
3731         }
3732
3733         if (use_doorbell) {
3734                 tmp = RREG32(mmCP_PQ_STATUS);
3735                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3736                 WREG32(mmCP_PQ_STATUS, tmp);
3737         }
3738
3739         gfx_v8_0_cp_compute_enable(adev, true);
3740
3741         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3742                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3743
3744                 ring->ready = true;
3745                 r = amdgpu_ring_test_ring(ring);
3746                 if (r)
3747                         ring->ready = false;
3748         }
3749
3750         return 0;
3751 }
3752
3753 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3754 {
3755         int r;
3756
3757         if (!(adev->flags & AMD_IS_APU))
3758                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3759
3760         if (!adev->pp_enabled) {
3761                 if (!adev->firmware.smu_load) {
3762                         /* legacy firmware loading */
3763                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
3764                         if (r)
3765                                 return r;
3766
3767                         r = gfx_v8_0_cp_compute_load_microcode(adev);
3768                         if (r)
3769                                 return r;
3770                 } else {
3771                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3772                                                         AMDGPU_UCODE_ID_CP_CE);
3773                         if (r)
3774                                 return -EINVAL;
3775
3776                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3777                                                         AMDGPU_UCODE_ID_CP_PFP);
3778                         if (r)
3779                                 return -EINVAL;
3780
3781                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3782                                                         AMDGPU_UCODE_ID_CP_ME);
3783                         if (r)
3784                                 return -EINVAL;
3785
3786                         if (adev->asic_type == CHIP_TOPAZ) {
3787                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
3788                                 if (r)
3789                                         return r;
3790                         } else {
3791                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3792                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
3793                                 if (r)
3794                                         return -EINVAL;
3795                         }
3796                 }
3797         }
3798
3799         r = gfx_v8_0_cp_gfx_resume(adev);
3800         if (r)
3801                 return r;
3802
3803         r = gfx_v8_0_cp_compute_resume(adev);
3804         if (r)
3805                 return r;
3806
3807         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3808
3809         return 0;
3810 }
3811
3812 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3813 {
3814         gfx_v8_0_cp_gfx_enable(adev, enable);
3815         gfx_v8_0_cp_compute_enable(adev, enable);
3816 }
3817
3818 static int gfx_v8_0_hw_init(void *handle)
3819 {
3820         int r;
3821         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3822
3823         gfx_v8_0_init_golden_registers(adev);
3824
3825         gfx_v8_0_gpu_init(adev);
3826
3827         r = gfx_v8_0_rlc_resume(adev);
3828         if (r)
3829                 return r;
3830
3831         r = gfx_v8_0_cp_resume(adev);
3832         if (r)
3833                 return r;
3834
3835         return r;
3836 }
3837
3838 static int gfx_v8_0_hw_fini(void *handle)
3839 {
3840         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3841
3842         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3843         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3844         gfx_v8_0_cp_enable(adev, false);
3845         gfx_v8_0_rlc_stop(adev);
3846         gfx_v8_0_cp_compute_fini(adev);
3847
3848         return 0;
3849 }
3850
3851 static int gfx_v8_0_suspend(void *handle)
3852 {
3853         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3854
3855         return gfx_v8_0_hw_fini(adev);
3856 }
3857
3858 static int gfx_v8_0_resume(void *handle)
3859 {
3860         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3861
3862         return gfx_v8_0_hw_init(adev);
3863 }
3864
3865 static bool gfx_v8_0_is_idle(void *handle)
3866 {
3867         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3868
3869         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3870                 return false;
3871         else
3872                 return true;
3873 }
3874
3875 static int gfx_v8_0_wait_for_idle(void *handle)
3876 {
3877         unsigned i;
3878         u32 tmp;
3879         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3880
3881         for (i = 0; i < adev->usec_timeout; i++) {
3882                 /* read MC_STATUS */
3883                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3884
3885                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3886                         return 0;
3887                 udelay(1);
3888         }
3889         return -ETIMEDOUT;
3890 }
3891
3892 static void gfx_v8_0_print_status(void *handle)
3893 {
3894         int i;
3895         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3896
3897         dev_info(adev->dev, "GFX 8.x registers\n");
3898         dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3899                  RREG32(mmGRBM_STATUS));
3900         dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3901                  RREG32(mmGRBM_STATUS2));
3902         dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3903                  RREG32(mmGRBM_STATUS_SE0));
3904         dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3905                  RREG32(mmGRBM_STATUS_SE1));
3906         dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3907                  RREG32(mmGRBM_STATUS_SE2));
3908         dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3909                  RREG32(mmGRBM_STATUS_SE3));
3910         dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3911         dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3912                  RREG32(mmCP_STALLED_STAT1));
3913         dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3914                  RREG32(mmCP_STALLED_STAT2));
3915         dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3916                  RREG32(mmCP_STALLED_STAT3));
3917         dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3918                  RREG32(mmCP_CPF_BUSY_STAT));
3919         dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3920                  RREG32(mmCP_CPF_STALLED_STAT1));
3921         dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3922         dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3923         dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3924                  RREG32(mmCP_CPC_STALLED_STAT1));
3925         dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3926
3927         for (i = 0; i < 32; i++) {
3928                 dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3929                          i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3930         }
3931         for (i = 0; i < 16; i++) {
3932                 dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3933                          i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3934         }
3935         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3936                 dev_info(adev->dev, "  se: %d\n", i);
3937                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3938                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3939                          RREG32(mmPA_SC_RASTER_CONFIG));
3940                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3941                          RREG32(mmPA_SC_RASTER_CONFIG_1));
3942         }
3943         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3944
3945         dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3946                  RREG32(mmGB_ADDR_CONFIG));
3947         dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3948                  RREG32(mmHDP_ADDR_CONFIG));
3949         dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3950                  RREG32(mmDMIF_ADDR_CALC));
3951
3952         dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
3953                  RREG32(mmCP_MEQ_THRESHOLDS));
3954         dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
3955                  RREG32(mmSX_DEBUG_1));
3956         dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
3957                  RREG32(mmTA_CNTL_AUX));
3958         dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
3959                  RREG32(mmSPI_CONFIG_CNTL));
3960         dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
3961                  RREG32(mmSQ_CONFIG));
3962         dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
3963                  RREG32(mmDB_DEBUG));
3964         dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
3965                  RREG32(mmDB_DEBUG2));
3966         dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
3967                  RREG32(mmDB_DEBUG3));
3968         dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
3969                  RREG32(mmCB_HW_CONTROL));
3970         dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
3971                  RREG32(mmSPI_CONFIG_CNTL_1));
3972         dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
3973                  RREG32(mmPA_SC_FIFO_SIZE));
3974         dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
3975                  RREG32(mmVGT_NUM_INSTANCES));
3976         dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
3977                  RREG32(mmCP_PERFMON_CNTL));
3978         dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3979                  RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3980         dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
3981                  RREG32(mmVGT_CACHE_INVALIDATION));
3982         dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
3983                  RREG32(mmVGT_GS_VERTEX_REUSE));
3984         dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3985                  RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3986         dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
3987                  RREG32(mmPA_CL_ENHANCE));
3988         dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
3989                  RREG32(mmPA_SC_ENHANCE));
3990
3991         dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
3992                  RREG32(mmCP_ME_CNTL));
3993         dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
3994                  RREG32(mmCP_MAX_CONTEXT));
3995         dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
3996                  RREG32(mmCP_ENDIAN_SWAP));
3997         dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
3998                  RREG32(mmCP_DEVICE_ID));
3999
4000         dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4001                  RREG32(mmCP_SEM_WAIT_TIMER));
4002
4003         dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4004                  RREG32(mmCP_RB_WPTR_DELAY));
4005         dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4006                  RREG32(mmCP_RB_VMID));
4007         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4008                  RREG32(mmCP_RB0_CNTL));
4009         dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4010                  RREG32(mmCP_RB0_WPTR));
4011         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4012                  RREG32(mmCP_RB0_RPTR_ADDR));
4013         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4014                  RREG32(mmCP_RB0_RPTR_ADDR_HI));
4015         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4016                  RREG32(mmCP_RB0_CNTL));
4017         dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4018                  RREG32(mmCP_RB0_BASE));
4019         dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4020                  RREG32(mmCP_RB0_BASE_HI));
4021         dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4022                  RREG32(mmCP_MEC_CNTL));
4023         dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4024                  RREG32(mmCP_CPF_DEBUG));
4025
4026         dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4027                  RREG32(mmSCRATCH_ADDR));
4028         dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4029                  RREG32(mmSCRATCH_UMSK));
4030
4031         dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4032                  RREG32(mmCP_INT_CNTL_RING0));
4033         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4034                  RREG32(mmRLC_LB_CNTL));
4035         dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4036                  RREG32(mmRLC_CNTL));
4037         dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4038                  RREG32(mmRLC_CGCG_CGLS_CTRL));
4039         dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4040                  RREG32(mmRLC_LB_CNTR_INIT));
4041         dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4042                  RREG32(mmRLC_LB_CNTR_MAX));
4043         dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4044                  RREG32(mmRLC_LB_INIT_CU_MASK));
4045         dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4046                  RREG32(mmRLC_LB_PARAMS));
4047         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4048                  RREG32(mmRLC_LB_CNTL));
4049         dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4050                  RREG32(mmRLC_MC_CNTL));
4051         dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4052                  RREG32(mmRLC_UCODE_CNTL));
4053
4054         mutex_lock(&adev->srbm_mutex);
4055         for (i = 0; i < 16; i++) {
4056                 vi_srbm_select(adev, 0, 0, 0, i);
4057                 dev_info(adev->dev, "  VM %d:\n", i);
4058                 dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4059                          RREG32(mmSH_MEM_CONFIG));
4060                 dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4061                          RREG32(mmSH_MEM_APE1_BASE));
4062                 dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4063                          RREG32(mmSH_MEM_APE1_LIMIT));
4064                 dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4065                          RREG32(mmSH_MEM_BASES));
4066         }
4067         vi_srbm_select(adev, 0, 0, 0, 0);
4068         mutex_unlock(&adev->srbm_mutex);
4069 }
4070
4071 static int gfx_v8_0_soft_reset(void *handle)
4072 {
4073         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4074         u32 tmp;
4075         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4076
4077         /* GRBM_STATUS */
4078         tmp = RREG32(mmGRBM_STATUS);
4079         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4080                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4081                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4082                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4083                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4084                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4085                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4086                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4087                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4088                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4089         }
4090
4091         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4092                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4093                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4094                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4095                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4096         }
4097
4098         /* GRBM_STATUS2 */
4099         tmp = RREG32(mmGRBM_STATUS2);
4100         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4101                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4102                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4103
4104         /* SRBM_STATUS */
4105         tmp = RREG32(mmSRBM_STATUS);
4106         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4107                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4108                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4109
4110         if (grbm_soft_reset || srbm_soft_reset) {
4111                 gfx_v8_0_print_status((void *)adev);
4112                 /* stop the rlc */
4113                 gfx_v8_0_rlc_stop(adev);
4114
4115                 /* Disable GFX parsing/prefetching */
4116                 gfx_v8_0_cp_gfx_enable(adev, false);
4117
4118                 /* Disable MEC parsing/prefetching */
4119                 gfx_v8_0_cp_compute_enable(adev, false);
4120
4121                 if (grbm_soft_reset || srbm_soft_reset) {
4122                         tmp = RREG32(mmGMCON_DEBUG);
4123                         tmp = REG_SET_FIELD(tmp,
4124                                             GMCON_DEBUG, GFX_STALL, 1);
4125                         tmp = REG_SET_FIELD(tmp,
4126                                             GMCON_DEBUG, GFX_CLEAR, 1);
4127                         WREG32(mmGMCON_DEBUG, tmp);
4128
4129                         udelay(50);
4130                 }
4131
4132                 if (grbm_soft_reset) {
4133                         tmp = RREG32(mmGRBM_SOFT_RESET);
4134                         tmp |= grbm_soft_reset;
4135                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4136                         WREG32(mmGRBM_SOFT_RESET, tmp);
4137                         tmp = RREG32(mmGRBM_SOFT_RESET);
4138
4139                         udelay(50);
4140
4141                         tmp &= ~grbm_soft_reset;
4142                         WREG32(mmGRBM_SOFT_RESET, tmp);
4143                         tmp = RREG32(mmGRBM_SOFT_RESET);
4144                 }
4145
4146                 if (srbm_soft_reset) {
4147                         tmp = RREG32(mmSRBM_SOFT_RESET);
4148                         tmp |= srbm_soft_reset;
4149                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4150                         WREG32(mmSRBM_SOFT_RESET, tmp);
4151                         tmp = RREG32(mmSRBM_SOFT_RESET);
4152
4153                         udelay(50);
4154
4155                         tmp &= ~srbm_soft_reset;
4156                         WREG32(mmSRBM_SOFT_RESET, tmp);
4157                         tmp = RREG32(mmSRBM_SOFT_RESET);
4158                 }
4159
4160                 if (grbm_soft_reset || srbm_soft_reset) {
4161                         tmp = RREG32(mmGMCON_DEBUG);
4162                         tmp = REG_SET_FIELD(tmp,
4163                                             GMCON_DEBUG, GFX_STALL, 0);
4164                         tmp = REG_SET_FIELD(tmp,
4165                                             GMCON_DEBUG, GFX_CLEAR, 0);
4166                         WREG32(mmGMCON_DEBUG, tmp);
4167                 }
4168
4169                 /* Wait a little for things to settle down */
4170                 udelay(50);
4171                 gfx_v8_0_print_status((void *)adev);
4172         }
4173         return 0;
4174 }
4175
4176 /**
4177  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4178  *
4179  * @adev: amdgpu_device pointer
4180  *
4181  * Fetches a GPU clock counter snapshot.
4182  * Returns the 64 bit clock counter snapshot.
4183  */
4184 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4185 {
4186         uint64_t clock;
4187
4188         mutex_lock(&adev->gfx.gpu_clock_mutex);
4189         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4190         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4191                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4192         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4193         return clock;
4194 }
4195
4196 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4197                                           uint32_t vmid,
4198                                           uint32_t gds_base, uint32_t gds_size,
4199                                           uint32_t gws_base, uint32_t gws_size,
4200                                           uint32_t oa_base, uint32_t oa_size)
4201 {
4202         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4203         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4204
4205         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4206         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4207
4208         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4209         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4210
4211         /* GDS Base */
4212         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4213         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4214                                 WRITE_DATA_DST_SEL(0)));
4215         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4216         amdgpu_ring_write(ring, 0);
4217         amdgpu_ring_write(ring, gds_base);
4218
4219         /* GDS Size */
4220         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4221         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4222                                 WRITE_DATA_DST_SEL(0)));
4223         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4224         amdgpu_ring_write(ring, 0);
4225         amdgpu_ring_write(ring, gds_size);
4226
4227         /* GWS */
4228         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4229         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4230                                 WRITE_DATA_DST_SEL(0)));
4231         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4232         amdgpu_ring_write(ring, 0);
4233         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4234
4235         /* OA */
4236         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4237         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4238                                 WRITE_DATA_DST_SEL(0)));
4239         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4240         amdgpu_ring_write(ring, 0);
4241         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4242 }
4243
4244 static int gfx_v8_0_early_init(void *handle)
4245 {
4246         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4247
4248         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4249         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4250         gfx_v8_0_set_ring_funcs(adev);
4251         gfx_v8_0_set_irq_funcs(adev);
4252         gfx_v8_0_set_gds_init(adev);
4253
4254         return 0;
4255 }
4256
4257 static int gfx_v8_0_late_init(void *handle)
4258 {
4259         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4260         int r;
4261
4262         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4263         if (r)
4264                 return r;
4265
4266         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4267         if (r)
4268                 return r;
4269
4270         /* requires IBs so do in late init after IB pool is initialized */
4271         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4272         if (r)
4273                 return r;
4274
4275         return 0;
4276 }
4277
4278 static int gfx_v8_0_set_powergating_state(void *handle,
4279                                           enum amd_powergating_state state)
4280 {
4281         return 0;
4282 }
4283
4284 static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4285                 uint32_t reg_addr, uint32_t cmd)
4286 {
4287         uint32_t data;
4288
4289         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4290
4291         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4292         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4293
4294         data = RREG32(mmRLC_SERDES_WR_CTRL);
4295         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4296                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4297                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4298                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4299                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4300                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4301                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4302                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4303                         RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4304                         RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4305                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4306         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4307                         (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4308                         (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4309                         (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4310
4311         WREG32(mmRLC_SERDES_WR_CTRL, data);
4312 }
4313
4314 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4315                 bool enable)
4316 {
4317         uint32_t temp, data;
4318
4319         /* It is disabled by HW by default */
4320         if (enable) {
4321                 /* 1 - RLC memory Light sleep */
4322                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4323                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4324                 if (temp != data)
4325                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4326
4327                 /* 2 - CP memory Light sleep */
4328                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4329                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4330                 if (temp != data)
4331                         WREG32(mmCP_MEM_SLP_CNTL, data);
4332
4333                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4334                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4335                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4336                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4337                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4338                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4339
4340                 if (temp != data)
4341                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4342
4343                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4344                 gfx_v8_0_wait_for_rlc_serdes(adev);
4345
4346                 /* 5 - clear mgcg override */
4347                 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4348
4349                 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4350                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4351                 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4352                 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4353                 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4354                 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4355                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4356                 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4357                 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4358                 if (temp != data)
4359                         WREG32(mmCGTS_SM_CTRL_REG, data);
4360                 udelay(50);
4361
4362                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4363                 gfx_v8_0_wait_for_rlc_serdes(adev);
4364         } else {
4365                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4366                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4367                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4368                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4369                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4370                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4371                 if (temp != data)
4372                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4373
4374                 /* 2 - disable MGLS in RLC */
4375                 data = RREG32(mmRLC_MEM_SLP_CNTL);
4376                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4377                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4378                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4379                 }
4380
4381                 /* 3 - disable MGLS in CP */
4382                 data = RREG32(mmCP_MEM_SLP_CNTL);
4383                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4384                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4385                         WREG32(mmCP_MEM_SLP_CNTL, data);
4386                 }
4387
4388                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4389                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4390                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4391                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4392                 if (temp != data)
4393                         WREG32(mmCGTS_SM_CTRL_REG, data);
4394
4395                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4396                 gfx_v8_0_wait_for_rlc_serdes(adev);
4397
4398                 /* 6 - set mgcg override */
4399                 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4400
4401                 udelay(50);
4402
4403                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4404                 gfx_v8_0_wait_for_rlc_serdes(adev);
4405         }
4406 }
4407
4408 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4409                 bool enable)
4410 {
4411         uint32_t temp, temp1, data, data1;
4412
4413         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4414
4415         if (enable) {
4416                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4417                  * Cmp_busy/GFX_Idle interrupts
4418                  */
4419                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4420
4421                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4422                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4423                 if (temp1 != data1)
4424                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4425
4426                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4427                 gfx_v8_0_wait_for_rlc_serdes(adev);
4428
4429                 /* 3 - clear cgcg override */
4430                 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4431
4432                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4433                 gfx_v8_0_wait_for_rlc_serdes(adev);
4434
4435                 /* 4 - write cmd to set CGLS */
4436                 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4437
4438                 /* 5 - enable cgcg */
4439                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4440
4441                 /* enable cgls*/
4442                 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4443
4444                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4445                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4446
4447                 if (temp1 != data1)
4448                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4449
4450                 if (temp != data)
4451                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4452         } else {
4453                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
4454                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4455
4456                 /* TEST CGCG */
4457                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4458                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4459                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4460                 if (temp1 != data1)
4461                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4462
4463                 /* read gfx register to wake up cgcg */
4464                 RREG32(mmCB_CGTT_SCLK_CTRL);
4465                 RREG32(mmCB_CGTT_SCLK_CTRL);
4466                 RREG32(mmCB_CGTT_SCLK_CTRL);
4467                 RREG32(mmCB_CGTT_SCLK_CTRL);
4468
4469                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4470                 gfx_v8_0_wait_for_rlc_serdes(adev);
4471
4472                 /* write cmd to Set CGCG Overrride */
4473                 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4474
4475                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4476                 gfx_v8_0_wait_for_rlc_serdes(adev);
4477
4478                 /* write cmd to Clear CGLS */
4479                 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4480
4481                 /* disable cgcg, cgls should be disabled too. */
4482                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4483                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4484                 if (temp != data)
4485                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4486         }
4487 }
4488 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4489                 bool enable)
4490 {
4491         if (enable) {
4492                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4493                  * ===  MGCG + MGLS + TS(CG/LS) ===
4494                  */
4495                 fiji_update_medium_grain_clock_gating(adev, enable);
4496                 fiji_update_coarse_grain_clock_gating(adev, enable);
4497         } else {
4498                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4499                  * ===  CGCG + CGLS ===
4500                  */
4501                 fiji_update_coarse_grain_clock_gating(adev, enable);
4502                 fiji_update_medium_grain_clock_gating(adev, enable);
4503         }
4504         return 0;
4505 }
4506
4507 static int gfx_v8_0_set_clockgating_state(void *handle,
4508                                           enum amd_clockgating_state state)
4509 {
4510         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4511
4512         switch (adev->asic_type) {
4513         case CHIP_FIJI:
4514                 fiji_update_gfx_clock_gating(adev,
4515                                 state == AMD_CG_STATE_GATE ? true : false);
4516                 break;
4517         default:
4518                 break;
4519         }
4520         return 0;
4521 }
4522
4523 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4524 {
4525         u32 rptr;
4526
4527         rptr = ring->adev->wb.wb[ring->rptr_offs];
4528
4529         return rptr;
4530 }
4531
4532 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4533 {
4534         struct amdgpu_device *adev = ring->adev;
4535         u32 wptr;
4536
4537         if (ring->use_doorbell)
4538                 /* XXX check if swapping is necessary on BE */
4539                 wptr = ring->adev->wb.wb[ring->wptr_offs];
4540         else
4541                 wptr = RREG32(mmCP_RB0_WPTR);
4542
4543         return wptr;
4544 }
4545
4546 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4547 {
4548         struct amdgpu_device *adev = ring->adev;
4549
4550         if (ring->use_doorbell) {
4551                 /* XXX check if swapping is necessary on BE */
4552                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4553                 WDOORBELL32(ring->doorbell_index, ring->wptr);
4554         } else {
4555                 WREG32(mmCP_RB0_WPTR, ring->wptr);
4556                 (void)RREG32(mmCP_RB0_WPTR);
4557         }
4558 }
4559
4560 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4561 {
4562         u32 ref_and_mask, reg_mem_engine;
4563
4564         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4565                 switch (ring->me) {
4566                 case 1:
4567                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4568                         break;
4569                 case 2:
4570                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4571                         break;
4572                 default:
4573                         return;
4574                 }
4575                 reg_mem_engine = 0;
4576         } else {
4577                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4578                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4579         }
4580
4581         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4582         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4583                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
4584                                  reg_mem_engine));
4585         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4586         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4587         amdgpu_ring_write(ring, ref_and_mask);
4588         amdgpu_ring_write(ring, ref_and_mask);
4589         amdgpu_ring_write(ring, 0x20); /* poll interval */
4590 }
4591
4592 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
4593 {
4594         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4595         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4596                                  WRITE_DATA_DST_SEL(0) |
4597                                  WR_CONFIRM));
4598         amdgpu_ring_write(ring, mmHDP_DEBUG0);
4599         amdgpu_ring_write(ring, 0);
4600         amdgpu_ring_write(ring, 1);
4601
4602 }
4603
4604 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4605                                   struct amdgpu_ib *ib)
4606 {
4607         bool need_ctx_switch = ring->current_ctx != ib->ctx;
4608         u32 header, control = 0;
4609         u32 next_rptr = ring->wptr + 5;
4610
4611         /* drop the CE preamble IB for the same context */
4612         if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4613                 return;
4614
4615         if (need_ctx_switch)
4616                 next_rptr += 2;
4617
4618         next_rptr += 4;
4619         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4620         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4621         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4622         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4623         amdgpu_ring_write(ring, next_rptr);
4624
4625         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4626         if (need_ctx_switch) {
4627                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4628                 amdgpu_ring_write(ring, 0);
4629         }
4630
4631         if (ib->flags & AMDGPU_IB_FLAG_CE)
4632                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4633         else
4634                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4635
4636         control |= ib->length_dw | (ib->vm_id << 24);
4637
4638         amdgpu_ring_write(ring, header);
4639         amdgpu_ring_write(ring,
4640 #ifdef __BIG_ENDIAN
4641                           (2 << 0) |
4642 #endif
4643                           (ib->gpu_addr & 0xFFFFFFFC));
4644         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4645         amdgpu_ring_write(ring, control);
4646 }
4647
4648 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4649                                   struct amdgpu_ib *ib)
4650 {
4651         u32 header, control = 0;
4652         u32 next_rptr = ring->wptr + 5;
4653
4654         control |= INDIRECT_BUFFER_VALID;
4655
4656         next_rptr += 4;
4657         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4658         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4659         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4660         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4661         amdgpu_ring_write(ring, next_rptr);
4662
4663         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4664
4665         control |= ib->length_dw | (ib->vm_id << 24);
4666
4667         amdgpu_ring_write(ring, header);
4668         amdgpu_ring_write(ring,
4669 #ifdef __BIG_ENDIAN
4670                                           (2 << 0) |
4671 #endif
4672                                           (ib->gpu_addr & 0xFFFFFFFC));
4673         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4674         amdgpu_ring_write(ring, control);
4675 }
4676
4677 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4678                                          u64 seq, unsigned flags)
4679 {
4680         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4681         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4682
4683         /* EVENT_WRITE_EOP - flush caches, send int */
4684         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4685         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4686                                  EOP_TC_ACTION_EN |
4687                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4688                                  EVENT_INDEX(5)));
4689         amdgpu_ring_write(ring, addr & 0xfffffffc);
4690         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4691                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4692         amdgpu_ring_write(ring, lower_32_bits(seq));
4693         amdgpu_ring_write(ring, upper_32_bits(seq));
4694
4695 }
4696
4697 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4698 {
4699         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4700         uint32_t seq = ring->fence_drv.sync_seq;
4701         uint64_t addr = ring->fence_drv.gpu_addr;
4702
4703         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4704         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4705                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
4706                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
4707         amdgpu_ring_write(ring, addr & 0xfffffffc);
4708         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4709         amdgpu_ring_write(ring, seq);
4710         amdgpu_ring_write(ring, 0xffffffff);
4711         amdgpu_ring_write(ring, 4); /* poll interval */
4712
4713         if (usepfp) {
4714                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4715                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4716                 amdgpu_ring_write(ring, 0);
4717                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4718                 amdgpu_ring_write(ring, 0);
4719         }
4720 }
4721
4722 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4723                                         unsigned vm_id, uint64_t pd_addr)
4724 {
4725         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4726
4727         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4728         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4729                                  WRITE_DATA_DST_SEL(0)) |
4730                                  WR_CONFIRM);
4731         if (vm_id < 8) {
4732                 amdgpu_ring_write(ring,
4733                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4734         } else {
4735                 amdgpu_ring_write(ring,
4736                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4737         }
4738         amdgpu_ring_write(ring, 0);
4739         amdgpu_ring_write(ring, pd_addr >> 12);
4740
4741         /* bits 0-15 are the VM contexts0-15 */
4742         /* invalidate the cache */
4743         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4744         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4745                                  WRITE_DATA_DST_SEL(0)));
4746         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4747         amdgpu_ring_write(ring, 0);
4748         amdgpu_ring_write(ring, 1 << vm_id);
4749
4750         /* wait for the invalidate to complete */
4751         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4752         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4753                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
4754                                  WAIT_REG_MEM_ENGINE(0))); /* me */
4755         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4756         amdgpu_ring_write(ring, 0);
4757         amdgpu_ring_write(ring, 0); /* ref */
4758         amdgpu_ring_write(ring, 0); /* mask */
4759         amdgpu_ring_write(ring, 0x20); /* poll interval */
4760
4761         /* compute doesn't have PFP */
4762         if (usepfp) {
4763                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4764                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4765                 amdgpu_ring_write(ring, 0x0);
4766                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4767                 amdgpu_ring_write(ring, 0);
4768                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4769                 amdgpu_ring_write(ring, 0);
4770         }
4771 }
4772
4773 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4774 {
4775         return ring->adev->wb.wb[ring->rptr_offs];
4776 }
4777
4778 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4779 {
4780         return ring->adev->wb.wb[ring->wptr_offs];
4781 }
4782
4783 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4784 {
4785         struct amdgpu_device *adev = ring->adev;
4786
4787         /* XXX check if swapping is necessary on BE */
4788         adev->wb.wb[ring->wptr_offs] = ring->wptr;
4789         WDOORBELL32(ring->doorbell_index, ring->wptr);
4790 }
4791
4792 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4793                                              u64 addr, u64 seq,
4794                                              unsigned flags)
4795 {
4796         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4797         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4798
4799         /* RELEASE_MEM - flush caches, send int */
4800         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4801         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4802                                  EOP_TC_ACTION_EN |
4803                                  EOP_TC_WB_ACTION_EN |
4804                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4805                                  EVENT_INDEX(5)));
4806         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4807         amdgpu_ring_write(ring, addr & 0xfffffffc);
4808         amdgpu_ring_write(ring, upper_32_bits(addr));
4809         amdgpu_ring_write(ring, lower_32_bits(seq));
4810         amdgpu_ring_write(ring, upper_32_bits(seq));
4811 }
4812
4813 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4814                                                  enum amdgpu_interrupt_state state)
4815 {
4816         u32 cp_int_cntl;
4817
4818         switch (state) {
4819         case AMDGPU_IRQ_STATE_DISABLE:
4820                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4821                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4822                                             TIME_STAMP_INT_ENABLE, 0);
4823                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4824                 break;
4825         case AMDGPU_IRQ_STATE_ENABLE:
4826                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4827                 cp_int_cntl =
4828                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4829                                       TIME_STAMP_INT_ENABLE, 1);
4830                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4831                 break;
4832         default:
4833                 break;
4834         }
4835 }
4836
4837 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4838                                                      int me, int pipe,
4839                                                      enum amdgpu_interrupt_state state)
4840 {
4841         u32 mec_int_cntl, mec_int_cntl_reg;
4842
4843         /*
4844          * amdgpu controls only pipe 0 of MEC1. That's why this function only
4845          * handles the setting of interrupts for this specific pipe. All other
4846          * pipes' interrupts are set by amdkfd.
4847          */
4848
4849         if (me == 1) {
4850                 switch (pipe) {
4851                 case 0:
4852                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4853                         break;
4854                 default:
4855                         DRM_DEBUG("invalid pipe %d\n", pipe);
4856                         return;
4857                 }
4858         } else {
4859                 DRM_DEBUG("invalid me %d\n", me);
4860                 return;
4861         }
4862
4863         switch (state) {
4864         case AMDGPU_IRQ_STATE_DISABLE:
4865                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4866                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4867                                              TIME_STAMP_INT_ENABLE, 0);
4868                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4869                 break;
4870         case AMDGPU_IRQ_STATE_ENABLE:
4871                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4872                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4873                                              TIME_STAMP_INT_ENABLE, 1);
4874                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4875                 break;
4876         default:
4877                 break;
4878         }
4879 }
4880
4881 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4882                                              struct amdgpu_irq_src *source,
4883                                              unsigned type,
4884                                              enum amdgpu_interrupt_state state)
4885 {
4886         u32 cp_int_cntl;
4887
4888         switch (state) {
4889         case AMDGPU_IRQ_STATE_DISABLE:
4890                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4891                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4892                                             PRIV_REG_INT_ENABLE, 0);
4893                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4894                 break;
4895         case AMDGPU_IRQ_STATE_ENABLE:
4896                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4897                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4898                                             PRIV_REG_INT_ENABLE, 1);
4899                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4900                 break;
4901         default:
4902                 break;
4903         }
4904
4905         return 0;
4906 }
4907
4908 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4909                                               struct amdgpu_irq_src *source,
4910                                               unsigned type,
4911                                               enum amdgpu_interrupt_state state)
4912 {
4913         u32 cp_int_cntl;
4914
4915         switch (state) {
4916         case AMDGPU_IRQ_STATE_DISABLE:
4917                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4918                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4919                                             PRIV_INSTR_INT_ENABLE, 0);
4920                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4921                 break;
4922         case AMDGPU_IRQ_STATE_ENABLE:
4923                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4924                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4925                                             PRIV_INSTR_INT_ENABLE, 1);
4926                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4927                 break;
4928         default:
4929                 break;
4930         }
4931
4932         return 0;
4933 }
4934
4935 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4936                                             struct amdgpu_irq_src *src,
4937                                             unsigned type,
4938                                             enum amdgpu_interrupt_state state)
4939 {
4940         switch (type) {
4941         case AMDGPU_CP_IRQ_GFX_EOP:
4942                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4943                 break;
4944         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4945                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4946                 break;
4947         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4948                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4949                 break;
4950         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4951                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4952                 break;
4953         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4954                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4955                 break;
4956         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4957                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4958                 break;
4959         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4960                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4961                 break;
4962         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4963                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4964                 break;
4965         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4966                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4967                 break;
4968         default:
4969                 break;
4970         }
4971         return 0;
4972 }
4973
4974 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4975                             struct amdgpu_irq_src *source,
4976                             struct amdgpu_iv_entry *entry)
4977 {
4978         int i;
4979         u8 me_id, pipe_id, queue_id;
4980         struct amdgpu_ring *ring;
4981
4982         DRM_DEBUG("IH: CP EOP\n");
4983         me_id = (entry->ring_id & 0x0c) >> 2;
4984         pipe_id = (entry->ring_id & 0x03) >> 0;
4985         queue_id = (entry->ring_id & 0x70) >> 4;
4986
4987         switch (me_id) {
4988         case 0:
4989                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4990                 break;
4991         case 1:
4992         case 2:
4993                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4994                         ring = &adev->gfx.compute_ring[i];
4995                         /* Per-queue interrupt is supported for MEC starting from VI.
4996                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
4997                           */
4998                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4999                                 amdgpu_fence_process(ring);
5000                 }
5001                 break;
5002         }
5003         return 0;
5004 }
5005
5006 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5007                                  struct amdgpu_irq_src *source,
5008                                  struct amdgpu_iv_entry *entry)
5009 {
5010         DRM_ERROR("Illegal register access in command stream\n");
5011         schedule_work(&adev->reset_work);
5012         return 0;
5013 }
5014
5015 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5016                                   struct amdgpu_irq_src *source,
5017                                   struct amdgpu_iv_entry *entry)
5018 {
5019         DRM_ERROR("Illegal instruction in command stream\n");
5020         schedule_work(&adev->reset_work);
5021         return 0;
5022 }
5023
5024 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5025         .early_init = gfx_v8_0_early_init,
5026         .late_init = gfx_v8_0_late_init,
5027         .sw_init = gfx_v8_0_sw_init,
5028         .sw_fini = gfx_v8_0_sw_fini,
5029         .hw_init = gfx_v8_0_hw_init,
5030         .hw_fini = gfx_v8_0_hw_fini,
5031         .suspend = gfx_v8_0_suspend,
5032         .resume = gfx_v8_0_resume,
5033         .is_idle = gfx_v8_0_is_idle,
5034         .wait_for_idle = gfx_v8_0_wait_for_idle,
5035         .soft_reset = gfx_v8_0_soft_reset,
5036         .print_status = gfx_v8_0_print_status,
5037         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5038         .set_powergating_state = gfx_v8_0_set_powergating_state,
5039 };
5040
5041 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5042         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5043         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5044         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5045         .parse_cs = NULL,
5046         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5047         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5048         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5049         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5050         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5051         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5052         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5053         .test_ring = gfx_v8_0_ring_test_ring,
5054         .test_ib = gfx_v8_0_ring_test_ib,
5055         .insert_nop = amdgpu_ring_insert_nop,
5056         .pad_ib = amdgpu_ring_generic_pad_ib,
5057 };
5058
5059 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5060         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5061         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5062         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5063         .parse_cs = NULL,
5064         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5065         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5066         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5067         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5068         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5069         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5070         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5071         .test_ring = gfx_v8_0_ring_test_ring,
5072         .test_ib = gfx_v8_0_ring_test_ib,
5073         .insert_nop = amdgpu_ring_insert_nop,
5074         .pad_ib = amdgpu_ring_generic_pad_ib,
5075 };
5076
5077 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5078 {
5079         int i;
5080
5081         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5082                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5083
5084         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5085                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5086 }
5087
5088 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5089         .set = gfx_v8_0_set_eop_interrupt_state,
5090         .process = gfx_v8_0_eop_irq,
5091 };
5092
5093 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5094         .set = gfx_v8_0_set_priv_reg_fault_state,
5095         .process = gfx_v8_0_priv_reg_irq,
5096 };
5097
5098 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5099         .set = gfx_v8_0_set_priv_inst_fault_state,
5100         .process = gfx_v8_0_priv_inst_irq,
5101 };
5102
5103 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5104 {
5105         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5106         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5107
5108         adev->gfx.priv_reg_irq.num_types = 1;
5109         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5110
5111         adev->gfx.priv_inst_irq.num_types = 1;
5112         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5113 }
5114
5115 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5116 {
5117         /* init asci gds info */
5118         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5119         adev->gds.gws.total_size = 64;
5120         adev->gds.oa.total_size = 16;
5121
5122         if (adev->gds.mem.total_size == 64 * 1024) {
5123                 adev->gds.mem.gfx_partition_size = 4096;
5124                 adev->gds.mem.cs_partition_size = 4096;
5125
5126                 adev->gds.gws.gfx_partition_size = 4;
5127                 adev->gds.gws.cs_partition_size = 4;
5128
5129                 adev->gds.oa.gfx_partition_size = 4;
5130                 adev->gds.oa.cs_partition_size = 1;
5131         } else {
5132                 adev->gds.mem.gfx_partition_size = 1024;
5133                 adev->gds.mem.cs_partition_size = 1024;
5134
5135                 adev->gds.gws.gfx_partition_size = 16;
5136                 adev->gds.gws.cs_partition_size = 16;
5137
5138                 adev->gds.oa.gfx_partition_size = 4;
5139                 adev->gds.oa.cs_partition_size = 4;
5140         }
5141 }
5142
5143 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5144 {
5145         u32 data, mask;
5146
5147         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5148         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5149
5150         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5151         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5152
5153         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
5154
5155         return (~data) & mask;
5156 }
5157
5158 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5159                          struct amdgpu_cu_info *cu_info)
5160 {
5161         int i, j, k, counter, active_cu_number = 0;
5162         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5163
5164         if (!adev || !cu_info)
5165                 return -EINVAL;
5166
5167         memset(cu_info, 0, sizeof(*cu_info));
5168
5169         mutex_lock(&adev->grbm_idx_mutex);
5170         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5171                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5172                         mask = 1;
5173                         ao_bitmap = 0;
5174                         counter = 0;
5175                         gfx_v8_0_select_se_sh(adev, i, j);
5176                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5177                         cu_info->bitmap[i][j] = bitmap;
5178
5179                         for (k = 0; k < 16; k ++) {
5180                                 if (bitmap & mask) {
5181                                         if (counter < 2)
5182                                                 ao_bitmap |= mask;
5183                                         counter ++;
5184                                 }
5185                                 mask <<= 1;
5186                         }
5187                         active_cu_number += counter;
5188                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5189                 }
5190         }
5191         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5192         mutex_unlock(&adev->grbm_idx_mutex);
5193
5194         cu_info->number = active_cu_number;
5195         cu_info->ao_cu_mask = ao_cu_mask;
5196
5197         return 0;
5198 }