2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
50 #define GFX8_NUM_GFX_RINGS 1
51 #define GFX8_NUM_COMPUTE_RINGS 8
53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
55 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
59 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
60 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
61 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
62 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
63 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
64 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
65 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
66 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
76 #define SET_BPM_SERDES_CMD 1
77 #define CLE_BPM_SERDES_CMD 0
79 /* BPM Register Address*/
81 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
82 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
83 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
84 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
85 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
89 #define RLC_FormatDirectRegListLength 14
91 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
124 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
125 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
126 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
138 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
140 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
141 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
142 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
143 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
144 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
145 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
146 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
147 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
148 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
149 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
150 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
151 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
152 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
153 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
154 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
155 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
158 static const u32 golden_settings_tonga_a11[] =
160 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
161 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
162 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
163 mmGB_GPU_ID, 0x0000000f, 0x00000000,
164 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
165 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
166 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
167 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
168 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
169 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
170 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
171 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
172 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
173 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
174 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
177 static const u32 tonga_golden_common_all[] =
179 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
181 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
182 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
183 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
184 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
185 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
186 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
189 static const u32 tonga_mgcg_cgcg_init[] =
191 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
192 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
193 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
194 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
195 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
196 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
197 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
198 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
199 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
200 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
201 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
202 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
203 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
204 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
205 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
209 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
210 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
211 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
212 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
213 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
214 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
216 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
217 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
218 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
219 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
220 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
221 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
222 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
225 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
230 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
235 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
240 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
245 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
250 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
255 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
260 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
263 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
264 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
265 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
268 static const u32 golden_settings_polaris11_a11[] =
270 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
271 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
272 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
273 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
274 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
275 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
276 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
277 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
278 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
279 mmSQ_CONFIG, 0x07f80000, 0x07180000,
280 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
281 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
282 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
283 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
284 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
287 static const u32 polaris11_golden_common_all[] =
289 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
290 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
291 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
292 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
293 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
294 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
297 static const u32 golden_settings_polaris10_a11[] =
299 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
300 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
301 mmCB_HW_CONTROL_2, 0, 0x0f000000,
302 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
303 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
304 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
305 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
306 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
307 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
308 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
309 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
310 mmSQ_CONFIG, 0x07f80000, 0x07180000,
311 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
312 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
313 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
314 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
317 static const u32 polaris10_golden_common_all[] =
319 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
320 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
321 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
322 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
323 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
324 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
325 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
326 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
329 static const u32 fiji_golden_common_all[] =
331 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
333 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
334 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
335 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
336 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
337 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
338 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
339 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
343 static const u32 golden_settings_fiji_a10[] =
345 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
346 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
347 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
348 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
349 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
351 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
354 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
355 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 static const u32 fiji_mgcg_cgcg_init[] =
360 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
361 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
363 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
364 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
365 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
366 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
367 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
369 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
371 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
372 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
373 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
374 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
375 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
376 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
378 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
379 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
380 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
381 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
382 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
384 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
385 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
386 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
387 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
389 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
390 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
391 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
392 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
393 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
394 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
397 static const u32 golden_settings_iceland_a11[] =
399 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
400 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
401 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
402 mmGB_GPU_ID, 0x0000000f, 0x00000000,
403 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
404 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
405 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
406 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
407 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
409 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
410 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
411 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
412 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
413 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
416 static const u32 iceland_golden_common_all[] =
418 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
419 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
420 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
421 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
422 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
423 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
424 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
425 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
428 static const u32 iceland_mgcg_cgcg_init[] =
430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
462 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
463 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
464 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
465 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
466 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
469 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
474 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
479 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
484 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
492 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
493 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
496 static const u32 cz_golden_settings_a11[] =
498 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
499 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
500 mmGB_GPU_ID, 0x0000000f, 0x00000000,
501 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
502 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
503 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
504 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
505 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
506 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
507 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
510 static const u32 cz_golden_common_all[] =
512 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
513 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
514 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
515 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
516 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
517 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
518 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
519 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
522 static const u32 cz_mgcg_cgcg_init[] =
524 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
525 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
526 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
528 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
529 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
530 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
531 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
532 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
533 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
534 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
535 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
537 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
538 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
539 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
540 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
541 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
542 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
543 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
544 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
545 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
546 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
549 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
550 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
551 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
552 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
553 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
554 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
555 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
558 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
563 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
568 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
573 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
578 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
583 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
588 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
593 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
596 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
597 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
598 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
601 static const u32 stoney_golden_settings_a11[] =
603 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
604 mmGB_GPU_ID, 0x0000000f, 0x00000000,
605 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
606 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
607 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
608 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
609 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
610 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
611 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
612 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
615 static const u32 stoney_golden_common_all[] =
617 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
618 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
619 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
620 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
621 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
622 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
623 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
624 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
627 static const u32 stoney_mgcg_cgcg_init[] =
629 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
630 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
632 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
633 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
634 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
637 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
638 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
639 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
640 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
641 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
642 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
644 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
646 switch (adev->asic_type) {
648 amdgpu_program_register_sequence(adev,
649 iceland_mgcg_cgcg_init,
650 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
651 amdgpu_program_register_sequence(adev,
652 golden_settings_iceland_a11,
653 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
654 amdgpu_program_register_sequence(adev,
655 iceland_golden_common_all,
656 (const u32)ARRAY_SIZE(iceland_golden_common_all));
659 amdgpu_program_register_sequence(adev,
661 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
662 amdgpu_program_register_sequence(adev,
663 golden_settings_fiji_a10,
664 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
665 amdgpu_program_register_sequence(adev,
666 fiji_golden_common_all,
667 (const u32)ARRAY_SIZE(fiji_golden_common_all));
671 amdgpu_program_register_sequence(adev,
672 tonga_mgcg_cgcg_init,
673 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
674 amdgpu_program_register_sequence(adev,
675 golden_settings_tonga_a11,
676 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
677 amdgpu_program_register_sequence(adev,
678 tonga_golden_common_all,
679 (const u32)ARRAY_SIZE(tonga_golden_common_all));
682 amdgpu_program_register_sequence(adev,
683 golden_settings_polaris11_a11,
684 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
685 amdgpu_program_register_sequence(adev,
686 polaris11_golden_common_all,
687 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
690 amdgpu_program_register_sequence(adev,
691 golden_settings_polaris10_a11,
692 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
693 amdgpu_program_register_sequence(adev,
694 polaris10_golden_common_all,
695 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
698 amdgpu_program_register_sequence(adev,
700 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
701 amdgpu_program_register_sequence(adev,
702 cz_golden_settings_a11,
703 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
704 amdgpu_program_register_sequence(adev,
705 cz_golden_common_all,
706 (const u32)ARRAY_SIZE(cz_golden_common_all));
709 amdgpu_program_register_sequence(adev,
710 stoney_mgcg_cgcg_init,
711 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
712 amdgpu_program_register_sequence(adev,
713 stoney_golden_settings_a11,
714 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
715 amdgpu_program_register_sequence(adev,
716 stoney_golden_common_all,
717 (const u32)ARRAY_SIZE(stoney_golden_common_all));
724 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
728 adev->gfx.scratch.num_reg = 7;
729 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
730 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
731 adev->gfx.scratch.free[i] = true;
732 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
736 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
738 struct amdgpu_device *adev = ring->adev;
744 r = amdgpu_gfx_scratch_get(adev, &scratch);
746 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
749 WREG32(scratch, 0xCAFEDEAD);
750 r = amdgpu_ring_alloc(ring, 3);
752 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
754 amdgpu_gfx_scratch_free(adev, scratch);
757 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
758 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
759 amdgpu_ring_write(ring, 0xDEADBEEF);
760 amdgpu_ring_commit(ring);
762 for (i = 0; i < adev->usec_timeout; i++) {
763 tmp = RREG32(scratch);
764 if (tmp == 0xDEADBEEF)
768 if (i < adev->usec_timeout) {
769 DRM_INFO("ring test on %d succeeded in %d usecs\n",
772 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
773 ring->idx, scratch, tmp);
776 amdgpu_gfx_scratch_free(adev, scratch);
780 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
782 struct amdgpu_device *adev = ring->adev;
784 struct fence *f = NULL;
790 r = amdgpu_gfx_scratch_get(adev, &scratch);
792 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
795 WREG32(scratch, 0xCAFEDEAD);
796 memset(&ib, 0, sizeof(ib));
797 r = amdgpu_ib_get(adev, NULL, 256, &ib);
799 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
802 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
803 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
804 ib.ptr[2] = 0xDEADBEEF;
807 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
811 r = fence_wait(f, false);
813 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
816 for (i = 0; i < adev->usec_timeout; i++) {
817 tmp = RREG32(scratch);
818 if (tmp == 0xDEADBEEF)
822 if (i < adev->usec_timeout) {
823 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
827 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
833 amdgpu_ib_free(adev, &ib, NULL);
836 amdgpu_gfx_scratch_free(adev, scratch);
841 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
842 release_firmware(adev->gfx.pfp_fw);
843 adev->gfx.pfp_fw = NULL;
844 release_firmware(adev->gfx.me_fw);
845 adev->gfx.me_fw = NULL;
846 release_firmware(adev->gfx.ce_fw);
847 adev->gfx.ce_fw = NULL;
848 release_firmware(adev->gfx.rlc_fw);
849 adev->gfx.rlc_fw = NULL;
850 release_firmware(adev->gfx.mec_fw);
851 adev->gfx.mec_fw = NULL;
852 if ((adev->asic_type != CHIP_STONEY) &&
853 (adev->asic_type != CHIP_TOPAZ))
854 release_firmware(adev->gfx.mec2_fw);
855 adev->gfx.mec2_fw = NULL;
857 kfree(adev->gfx.rlc.register_list_format);
860 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
862 const char *chip_name;
865 struct amdgpu_firmware_info *info = NULL;
866 const struct common_firmware_header *header = NULL;
867 const struct gfx_firmware_header_v1_0 *cp_hdr;
868 const struct rlc_firmware_header_v2_0 *rlc_hdr;
869 unsigned int *tmp = NULL, i;
873 switch (adev->asic_type) {
881 chip_name = "carrizo";
887 chip_name = "polaris11";
890 chip_name = "polaris10";
893 chip_name = "stoney";
899 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
900 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
903 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
906 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
907 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
908 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
910 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
911 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
914 err = amdgpu_ucode_validate(adev->gfx.me_fw);
917 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
918 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
919 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
922 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
925 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
928 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
929 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
933 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
936 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
937 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
938 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
939 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
941 adev->gfx.rlc.save_and_restore_offset =
942 le32_to_cpu(rlc_hdr->save_and_restore_offset);
943 adev->gfx.rlc.clear_state_descriptor_offset =
944 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
945 adev->gfx.rlc.avail_scratch_ram_locations =
946 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
947 adev->gfx.rlc.reg_restore_list_size =
948 le32_to_cpu(rlc_hdr->reg_restore_list_size);
949 adev->gfx.rlc.reg_list_format_start =
950 le32_to_cpu(rlc_hdr->reg_list_format_start);
951 adev->gfx.rlc.reg_list_format_separate_start =
952 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
953 adev->gfx.rlc.starting_offsets_start =
954 le32_to_cpu(rlc_hdr->starting_offsets_start);
955 adev->gfx.rlc.reg_list_format_size_bytes =
956 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
957 adev->gfx.rlc.reg_list_size_bytes =
958 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
960 adev->gfx.rlc.register_list_format =
961 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
962 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
964 if (!adev->gfx.rlc.register_list_format) {
969 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
970 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
971 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
972 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
974 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
976 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
977 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
978 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
979 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
981 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
982 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
985 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
988 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
989 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
990 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
992 if ((adev->asic_type != CHIP_STONEY) &&
993 (adev->asic_type != CHIP_TOPAZ)) {
994 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
995 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
997 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1000 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1001 adev->gfx.mec2_fw->data;
1002 adev->gfx.mec2_fw_version =
1003 le32_to_cpu(cp_hdr->header.ucode_version);
1004 adev->gfx.mec2_feature_version =
1005 le32_to_cpu(cp_hdr->ucode_feature_version);
1008 adev->gfx.mec2_fw = NULL;
1012 if (adev->firmware.smu_load) {
1013 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1014 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1015 info->fw = adev->gfx.pfp_fw;
1016 header = (const struct common_firmware_header *)info->fw->data;
1017 adev->firmware.fw_size +=
1018 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1020 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1021 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1022 info->fw = adev->gfx.me_fw;
1023 header = (const struct common_firmware_header *)info->fw->data;
1024 adev->firmware.fw_size +=
1025 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1027 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1028 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1029 info->fw = adev->gfx.ce_fw;
1030 header = (const struct common_firmware_header *)info->fw->data;
1031 adev->firmware.fw_size +=
1032 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1034 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1035 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1036 info->fw = adev->gfx.rlc_fw;
1037 header = (const struct common_firmware_header *)info->fw->data;
1038 adev->firmware.fw_size +=
1039 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1041 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1042 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1043 info->fw = adev->gfx.mec_fw;
1044 header = (const struct common_firmware_header *)info->fw->data;
1045 adev->firmware.fw_size +=
1046 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1048 if (adev->gfx.mec2_fw) {
1049 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1050 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1051 info->fw = adev->gfx.mec2_fw;
1052 header = (const struct common_firmware_header *)info->fw->data;
1053 adev->firmware.fw_size +=
1054 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1062 "gfx8: Failed to load firmware \"%s\"\n",
1064 release_firmware(adev->gfx.pfp_fw);
1065 adev->gfx.pfp_fw = NULL;
1066 release_firmware(adev->gfx.me_fw);
1067 adev->gfx.me_fw = NULL;
1068 release_firmware(adev->gfx.ce_fw);
1069 adev->gfx.ce_fw = NULL;
1070 release_firmware(adev->gfx.rlc_fw);
1071 adev->gfx.rlc_fw = NULL;
1072 release_firmware(adev->gfx.mec_fw);
1073 adev->gfx.mec_fw = NULL;
1074 release_firmware(adev->gfx.mec2_fw);
1075 adev->gfx.mec2_fw = NULL;
1080 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1081 volatile u32 *buffer)
1084 const struct cs_section_def *sect = NULL;
1085 const struct cs_extent_def *ext = NULL;
1087 if (adev->gfx.rlc.cs_data == NULL)
1092 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1093 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1095 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1096 buffer[count++] = cpu_to_le32(0x80000000);
1097 buffer[count++] = cpu_to_le32(0x80000000);
1099 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1100 for (ext = sect->section; ext->extent != NULL; ++ext) {
1101 if (sect->id == SECT_CONTEXT) {
1103 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1104 buffer[count++] = cpu_to_le32(ext->reg_index -
1105 PACKET3_SET_CONTEXT_REG_START);
1106 for (i = 0; i < ext->reg_count; i++)
1107 buffer[count++] = cpu_to_le32(ext->extent[i]);
1114 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1115 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1116 PACKET3_SET_CONTEXT_REG_START);
1117 switch (adev->asic_type) {
1119 case CHIP_POLARIS10:
1120 buffer[count++] = cpu_to_le32(0x16000012);
1121 buffer[count++] = cpu_to_le32(0x0000002A);
1123 case CHIP_POLARIS11:
1124 buffer[count++] = cpu_to_le32(0x16000012);
1125 buffer[count++] = cpu_to_le32(0x00000000);
1128 buffer[count++] = cpu_to_le32(0x3a00161a);
1129 buffer[count++] = cpu_to_le32(0x0000002e);
1133 buffer[count++] = cpu_to_le32(0x00000002);
1134 buffer[count++] = cpu_to_le32(0x00000000);
1137 buffer[count++] = cpu_to_le32(0x00000000);
1138 buffer[count++] = cpu_to_le32(0x00000000);
1141 buffer[count++] = cpu_to_le32(0x00000000);
1142 buffer[count++] = cpu_to_le32(0x00000000);
1146 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1147 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1149 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1150 buffer[count++] = cpu_to_le32(0);
1153 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1155 const __le32 *fw_data;
1156 volatile u32 *dst_ptr;
1157 int me, i, max_me = 4;
1159 u32 table_offset, table_size;
1161 if (adev->asic_type == CHIP_CARRIZO)
1164 /* write the cp table buffer */
1165 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1166 for (me = 0; me < max_me; me++) {
1168 const struct gfx_firmware_header_v1_0 *hdr =
1169 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1170 fw_data = (const __le32 *)
1171 (adev->gfx.ce_fw->data +
1172 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1173 table_offset = le32_to_cpu(hdr->jt_offset);
1174 table_size = le32_to_cpu(hdr->jt_size);
1175 } else if (me == 1) {
1176 const struct gfx_firmware_header_v1_0 *hdr =
1177 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1178 fw_data = (const __le32 *)
1179 (adev->gfx.pfp_fw->data +
1180 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1181 table_offset = le32_to_cpu(hdr->jt_offset);
1182 table_size = le32_to_cpu(hdr->jt_size);
1183 } else if (me == 2) {
1184 const struct gfx_firmware_header_v1_0 *hdr =
1185 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1186 fw_data = (const __le32 *)
1187 (adev->gfx.me_fw->data +
1188 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1189 table_offset = le32_to_cpu(hdr->jt_offset);
1190 table_size = le32_to_cpu(hdr->jt_size);
1191 } else if (me == 3) {
1192 const struct gfx_firmware_header_v1_0 *hdr =
1193 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1194 fw_data = (const __le32 *)
1195 (adev->gfx.mec_fw->data +
1196 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1197 table_offset = le32_to_cpu(hdr->jt_offset);
1198 table_size = le32_to_cpu(hdr->jt_size);
1199 } else if (me == 4) {
1200 const struct gfx_firmware_header_v1_0 *hdr =
1201 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1202 fw_data = (const __le32 *)
1203 (adev->gfx.mec2_fw->data +
1204 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1205 table_offset = le32_to_cpu(hdr->jt_offset);
1206 table_size = le32_to_cpu(hdr->jt_size);
1209 for (i = 0; i < table_size; i ++) {
1210 dst_ptr[bo_offset + i] =
1211 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1214 bo_offset += table_size;
1218 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1222 /* clear state block */
1223 if (adev->gfx.rlc.clear_state_obj) {
1224 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1225 if (unlikely(r != 0))
1226 dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1227 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1228 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1230 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1231 adev->gfx.rlc.clear_state_obj = NULL;
1234 /* jump table block */
1235 if (adev->gfx.rlc.cp_table_obj) {
1236 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1237 if (unlikely(r != 0))
1238 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1239 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1240 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1242 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1243 adev->gfx.rlc.cp_table_obj = NULL;
1247 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1249 volatile u32 *dst_ptr;
1251 const struct cs_section_def *cs_data;
1254 adev->gfx.rlc.cs_data = vi_cs_data;
1256 cs_data = adev->gfx.rlc.cs_data;
1259 /* clear state block */
1260 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1262 if (adev->gfx.rlc.clear_state_obj == NULL) {
1263 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1264 AMDGPU_GEM_DOMAIN_VRAM,
1265 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1267 &adev->gfx.rlc.clear_state_obj);
1269 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1270 gfx_v8_0_rlc_fini(adev);
1274 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1275 if (unlikely(r != 0)) {
1276 gfx_v8_0_rlc_fini(adev);
1279 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1280 &adev->gfx.rlc.clear_state_gpu_addr);
1282 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1283 dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1284 gfx_v8_0_rlc_fini(adev);
1288 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1290 dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1291 gfx_v8_0_rlc_fini(adev);
1294 /* set up the cs buffer */
1295 dst_ptr = adev->gfx.rlc.cs_ptr;
1296 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1297 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1298 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1301 if ((adev->asic_type == CHIP_CARRIZO) ||
1302 (adev->asic_type == CHIP_STONEY)) {
1303 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1304 if (adev->gfx.rlc.cp_table_obj == NULL) {
1305 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1306 AMDGPU_GEM_DOMAIN_VRAM,
1307 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1309 &adev->gfx.rlc.cp_table_obj);
1311 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1316 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1317 if (unlikely(r != 0)) {
1318 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1321 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1322 &adev->gfx.rlc.cp_table_gpu_addr);
1324 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1325 dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
1328 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1330 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1334 cz_init_cp_jump_table(adev);
1336 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1337 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1344 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1348 if (adev->gfx.mec.hpd_eop_obj) {
1349 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1350 if (unlikely(r != 0))
1351 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1352 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1353 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1355 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1356 adev->gfx.mec.hpd_eop_obj = NULL;
1360 #define MEC_HPD_SIZE 2048
1362 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1368 * we assign only 1 pipe because all other pipes will
1371 adev->gfx.mec.num_mec = 1;
1372 adev->gfx.mec.num_pipe = 1;
1373 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1375 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1376 r = amdgpu_bo_create(adev,
1377 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1379 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1380 &adev->gfx.mec.hpd_eop_obj);
1382 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1387 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1388 if (unlikely(r != 0)) {
1389 gfx_v8_0_mec_fini(adev);
1392 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1393 &adev->gfx.mec.hpd_eop_gpu_addr);
1395 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1396 gfx_v8_0_mec_fini(adev);
1399 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1401 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1402 gfx_v8_0_mec_fini(adev);
1406 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1408 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1409 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1414 static const u32 vgpr_init_compute_shader[] =
1416 0x7e000209, 0x7e020208,
1417 0x7e040207, 0x7e060206,
1418 0x7e080205, 0x7e0a0204,
1419 0x7e0c0203, 0x7e0e0202,
1420 0x7e100201, 0x7e120200,
1421 0x7e140209, 0x7e160208,
1422 0x7e180207, 0x7e1a0206,
1423 0x7e1c0205, 0x7e1e0204,
1424 0x7e200203, 0x7e220202,
1425 0x7e240201, 0x7e260200,
1426 0x7e280209, 0x7e2a0208,
1427 0x7e2c0207, 0x7e2e0206,
1428 0x7e300205, 0x7e320204,
1429 0x7e340203, 0x7e360202,
1430 0x7e380201, 0x7e3a0200,
1431 0x7e3c0209, 0x7e3e0208,
1432 0x7e400207, 0x7e420206,
1433 0x7e440205, 0x7e460204,
1434 0x7e480203, 0x7e4a0202,
1435 0x7e4c0201, 0x7e4e0200,
1436 0x7e500209, 0x7e520208,
1437 0x7e540207, 0x7e560206,
1438 0x7e580205, 0x7e5a0204,
1439 0x7e5c0203, 0x7e5e0202,
1440 0x7e600201, 0x7e620200,
1441 0x7e640209, 0x7e660208,
1442 0x7e680207, 0x7e6a0206,
1443 0x7e6c0205, 0x7e6e0204,
1444 0x7e700203, 0x7e720202,
1445 0x7e740201, 0x7e760200,
1446 0x7e780209, 0x7e7a0208,
1447 0x7e7c0207, 0x7e7e0206,
1448 0xbf8a0000, 0xbf810000,
1451 static const u32 sgpr_init_compute_shader[] =
1453 0xbe8a0100, 0xbe8c0102,
1454 0xbe8e0104, 0xbe900106,
1455 0xbe920108, 0xbe940100,
1456 0xbe960102, 0xbe980104,
1457 0xbe9a0106, 0xbe9c0108,
1458 0xbe9e0100, 0xbea00102,
1459 0xbea20104, 0xbea40106,
1460 0xbea60108, 0xbea80100,
1461 0xbeaa0102, 0xbeac0104,
1462 0xbeae0106, 0xbeb00108,
1463 0xbeb20100, 0xbeb40102,
1464 0xbeb60104, 0xbeb80106,
1465 0xbeba0108, 0xbebc0100,
1466 0xbebe0102, 0xbec00104,
1467 0xbec20106, 0xbec40108,
1468 0xbec60100, 0xbec80102,
1469 0xbee60004, 0xbee70005,
1470 0xbeea0006, 0xbeeb0007,
1471 0xbee80008, 0xbee90009,
1472 0xbefc0000, 0xbf8a0000,
1473 0xbf810000, 0x00000000,
1476 static const u32 vgpr_init_regs[] =
1478 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1479 mmCOMPUTE_RESOURCE_LIMITS, 0,
1480 mmCOMPUTE_NUM_THREAD_X, 256*4,
1481 mmCOMPUTE_NUM_THREAD_Y, 1,
1482 mmCOMPUTE_NUM_THREAD_Z, 1,
1483 mmCOMPUTE_PGM_RSRC2, 20,
1484 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1485 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1486 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1487 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1488 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1489 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1490 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1491 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1492 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1493 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1496 static const u32 sgpr1_init_regs[] =
1498 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1499 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1500 mmCOMPUTE_NUM_THREAD_X, 256*5,
1501 mmCOMPUTE_NUM_THREAD_Y, 1,
1502 mmCOMPUTE_NUM_THREAD_Z, 1,
1503 mmCOMPUTE_PGM_RSRC2, 20,
1504 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1505 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1506 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1507 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1508 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1509 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1510 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1511 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1512 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1513 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1516 static const u32 sgpr2_init_regs[] =
1518 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1519 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1520 mmCOMPUTE_NUM_THREAD_X, 256*5,
1521 mmCOMPUTE_NUM_THREAD_Y, 1,
1522 mmCOMPUTE_NUM_THREAD_Z, 1,
1523 mmCOMPUTE_PGM_RSRC2, 20,
1524 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1525 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1526 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1527 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1528 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1529 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1530 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1531 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1532 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1533 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1536 static const u32 sec_ded_counter_registers[] =
1539 mmCPC_EDC_SCRATCH_CNT,
1540 mmCPC_EDC_UCODE_CNT,
1547 mmDC_EDC_CSINVOC_CNT,
1548 mmDC_EDC_RESTORE_CNT,
1554 mmSQC_ATC_EDC_GATCL1_CNT,
1560 mmTCP_ATC_EDC_GATCL1_CNT,
1565 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1567 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1568 struct amdgpu_ib ib;
1569 struct fence *f = NULL;
1572 unsigned total_size, vgpr_offset, sgpr_offset;
1575 /* only supported on CZ */
1576 if (adev->asic_type != CHIP_CARRIZO)
1579 /* bail if the compute ring is not ready */
1583 tmp = RREG32(mmGB_EDC_MODE);
1584 WREG32(mmGB_EDC_MODE, 0);
1587 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1589 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1591 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1592 total_size = ALIGN(total_size, 256);
1593 vgpr_offset = total_size;
1594 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1595 sgpr_offset = total_size;
1596 total_size += sizeof(sgpr_init_compute_shader);
1598 /* allocate an indirect buffer to put the commands in */
1599 memset(&ib, 0, sizeof(ib));
1600 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1602 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1606 /* load the compute shaders */
1607 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1608 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1610 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1611 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1613 /* init the ib length to 0 */
1617 /* write the register state for the compute dispatch */
1618 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1619 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1620 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1621 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1623 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1624 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1625 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1626 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1627 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1628 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1630 /* write dispatch packet */
1631 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1632 ib.ptr[ib.length_dw++] = 8; /* x */
1633 ib.ptr[ib.length_dw++] = 1; /* y */
1634 ib.ptr[ib.length_dw++] = 1; /* z */
1635 ib.ptr[ib.length_dw++] =
1636 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1638 /* write CS partial flush packet */
1639 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1640 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1643 /* write the register state for the compute dispatch */
1644 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1645 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1646 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1647 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1649 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1650 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1651 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1652 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1653 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1654 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1656 /* write dispatch packet */
1657 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1658 ib.ptr[ib.length_dw++] = 8; /* x */
1659 ib.ptr[ib.length_dw++] = 1; /* y */
1660 ib.ptr[ib.length_dw++] = 1; /* z */
1661 ib.ptr[ib.length_dw++] =
1662 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1664 /* write CS partial flush packet */
1665 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1666 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1669 /* write the register state for the compute dispatch */
1670 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1671 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1672 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1673 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1675 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1676 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1677 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1678 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1679 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1680 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1682 /* write dispatch packet */
1683 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1684 ib.ptr[ib.length_dw++] = 8; /* x */
1685 ib.ptr[ib.length_dw++] = 1; /* y */
1686 ib.ptr[ib.length_dw++] = 1; /* z */
1687 ib.ptr[ib.length_dw++] =
1688 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1690 /* write CS partial flush packet */
1691 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1692 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1694 /* shedule the ib on the ring */
1695 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1697 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1701 /* wait for the GPU to finish processing the IB */
1702 r = fence_wait(f, false);
1704 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1708 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1709 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1710 WREG32(mmGB_EDC_MODE, tmp);
1712 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1713 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1714 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1717 /* read back registers to clear the counters */
1718 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1719 RREG32(sec_ded_counter_registers[i]);
1723 amdgpu_ib_free(adev, &ib, NULL);
1729 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1732 u32 mc_shared_chmap, mc_arb_ramcfg;
1733 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1737 switch (adev->asic_type) {
1739 adev->gfx.config.max_shader_engines = 1;
1740 adev->gfx.config.max_tile_pipes = 2;
1741 adev->gfx.config.max_cu_per_sh = 6;
1742 adev->gfx.config.max_sh_per_se = 1;
1743 adev->gfx.config.max_backends_per_se = 2;
1744 adev->gfx.config.max_texture_channel_caches = 2;
1745 adev->gfx.config.max_gprs = 256;
1746 adev->gfx.config.max_gs_threads = 32;
1747 adev->gfx.config.max_hw_contexts = 8;
1749 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1750 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1751 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1752 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1753 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1756 adev->gfx.config.max_shader_engines = 4;
1757 adev->gfx.config.max_tile_pipes = 16;
1758 adev->gfx.config.max_cu_per_sh = 16;
1759 adev->gfx.config.max_sh_per_se = 1;
1760 adev->gfx.config.max_backends_per_se = 4;
1761 adev->gfx.config.max_texture_channel_caches = 16;
1762 adev->gfx.config.max_gprs = 256;
1763 adev->gfx.config.max_gs_threads = 32;
1764 adev->gfx.config.max_hw_contexts = 8;
1766 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1767 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1768 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1769 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1770 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1772 case CHIP_POLARIS11:
1773 ret = amdgpu_atombios_get_gfx_info(adev);
1776 adev->gfx.config.max_gprs = 256;
1777 adev->gfx.config.max_gs_threads = 32;
1778 adev->gfx.config.max_hw_contexts = 8;
1780 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1784 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1786 case CHIP_POLARIS10:
1787 ret = amdgpu_atombios_get_gfx_info(adev);
1790 adev->gfx.config.max_gprs = 256;
1791 adev->gfx.config.max_gs_threads = 32;
1792 adev->gfx.config.max_hw_contexts = 8;
1794 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1801 adev->gfx.config.max_shader_engines = 4;
1802 adev->gfx.config.max_tile_pipes = 8;
1803 adev->gfx.config.max_cu_per_sh = 8;
1804 adev->gfx.config.max_sh_per_se = 1;
1805 adev->gfx.config.max_backends_per_se = 2;
1806 adev->gfx.config.max_texture_channel_caches = 8;
1807 adev->gfx.config.max_gprs = 256;
1808 adev->gfx.config.max_gs_threads = 32;
1809 adev->gfx.config.max_hw_contexts = 8;
1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1818 adev->gfx.config.max_shader_engines = 1;
1819 adev->gfx.config.max_tile_pipes = 2;
1820 adev->gfx.config.max_sh_per_se = 1;
1821 adev->gfx.config.max_backends_per_se = 2;
1823 switch (adev->pdev->revision) {
1831 adev->gfx.config.max_cu_per_sh = 8;
1841 adev->gfx.config.max_cu_per_sh = 6;
1848 adev->gfx.config.max_cu_per_sh = 6;
1857 adev->gfx.config.max_cu_per_sh = 4;
1861 adev->gfx.config.max_texture_channel_caches = 2;
1862 adev->gfx.config.max_gprs = 256;
1863 adev->gfx.config.max_gs_threads = 32;
1864 adev->gfx.config.max_hw_contexts = 8;
1866 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1867 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1868 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1869 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1870 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1873 adev->gfx.config.max_shader_engines = 1;
1874 adev->gfx.config.max_tile_pipes = 2;
1875 adev->gfx.config.max_sh_per_se = 1;
1876 adev->gfx.config.max_backends_per_se = 1;
1878 switch (adev->pdev->revision) {
1885 adev->gfx.config.max_cu_per_sh = 3;
1891 adev->gfx.config.max_cu_per_sh = 2;
1895 adev->gfx.config.max_texture_channel_caches = 2;
1896 adev->gfx.config.max_gprs = 256;
1897 adev->gfx.config.max_gs_threads = 16;
1898 adev->gfx.config.max_hw_contexts = 8;
1900 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1901 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1902 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1903 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1904 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1907 adev->gfx.config.max_shader_engines = 2;
1908 adev->gfx.config.max_tile_pipes = 4;
1909 adev->gfx.config.max_cu_per_sh = 2;
1910 adev->gfx.config.max_sh_per_se = 1;
1911 adev->gfx.config.max_backends_per_se = 2;
1912 adev->gfx.config.max_texture_channel_caches = 4;
1913 adev->gfx.config.max_gprs = 256;
1914 adev->gfx.config.max_gs_threads = 32;
1915 adev->gfx.config.max_hw_contexts = 8;
1917 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1918 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1919 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1920 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1921 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1925 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1926 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1927 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1929 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1930 adev->gfx.config.mem_max_burst_length_bytes = 256;
1931 if (adev->flags & AMD_IS_APU) {
1932 /* Get memory bank mapping mode. */
1933 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1934 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1935 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1937 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1938 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1939 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1941 /* Validate settings in case only one DIMM installed. */
1942 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1943 dimm00_addr_map = 0;
1944 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1945 dimm01_addr_map = 0;
1946 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1947 dimm10_addr_map = 0;
1948 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1949 dimm11_addr_map = 0;
1951 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1952 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1953 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1954 adev->gfx.config.mem_row_size_in_kb = 2;
1956 adev->gfx.config.mem_row_size_in_kb = 1;
1958 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1959 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1960 if (adev->gfx.config.mem_row_size_in_kb > 4)
1961 adev->gfx.config.mem_row_size_in_kb = 4;
1964 adev->gfx.config.shader_engine_tile_size = 32;
1965 adev->gfx.config.num_gpus = 1;
1966 adev->gfx.config.multi_gpu_tile_size = 64;
1968 /* fix up row size */
1969 switch (adev->gfx.config.mem_row_size_in_kb) {
1972 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1975 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1978 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1981 adev->gfx.config.gb_addr_config = gb_addr_config;
1986 static int gfx_v8_0_sw_init(void *handle)
1989 struct amdgpu_ring *ring;
1990 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1993 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1997 /* Privileged reg */
1998 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2002 /* Privileged inst */
2003 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2007 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2009 gfx_v8_0_scratch_init(adev);
2011 r = gfx_v8_0_init_microcode(adev);
2013 DRM_ERROR("Failed to load gfx firmware!\n");
2017 r = gfx_v8_0_rlc_init(adev);
2019 DRM_ERROR("Failed to init rlc BOs!\n");
2023 r = gfx_v8_0_mec_init(adev);
2025 DRM_ERROR("Failed to init MEC BOs!\n");
2029 /* set up the gfx ring */
2030 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2031 ring = &adev->gfx.gfx_ring[i];
2032 ring->ring_obj = NULL;
2033 sprintf(ring->name, "gfx");
2034 /* no gfx doorbells on iceland */
2035 if (adev->asic_type != CHIP_TOPAZ) {
2036 ring->use_doorbell = true;
2037 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2040 r = amdgpu_ring_init(adev, ring, 1024,
2041 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2042 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2043 AMDGPU_RING_TYPE_GFX);
2048 /* set up the compute queues */
2049 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2052 /* max 32 queues per MEC */
2053 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2054 DRM_ERROR("Too many (%d) compute rings!\n", i);
2057 ring = &adev->gfx.compute_ring[i];
2058 ring->ring_obj = NULL;
2059 ring->use_doorbell = true;
2060 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2061 ring->me = 1; /* first MEC */
2063 ring->queue = i % 8;
2064 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2065 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2066 /* type-2 packets are deprecated on MEC, use type-3 instead */
2067 r = amdgpu_ring_init(adev, ring, 1024,
2068 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2069 &adev->gfx.eop_irq, irq_type,
2070 AMDGPU_RING_TYPE_COMPUTE);
2075 /* reserve GDS, GWS and OA resource for gfx */
2076 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
2078 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
2079 NULL, &adev->gds.gds_gfx_bo);
2083 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
2085 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
2086 NULL, &adev->gds.gws_gfx_bo);
2090 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
2092 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
2093 NULL, &adev->gds.oa_gfx_bo);
2097 adev->gfx.ce_ram_size = 0x8000;
2099 r = gfx_v8_0_gpu_early_init(adev);
2106 static int gfx_v8_0_sw_fini(void *handle)
2109 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2111 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2112 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2113 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2115 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2116 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2117 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2118 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2120 gfx_v8_0_mec_fini(adev);
2122 gfx_v8_0_rlc_fini(adev);
2124 gfx_v8_0_free_microcode(adev);
2129 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2131 uint32_t *modearray, *mod2array;
2132 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2133 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2136 modearray = adev->gfx.config.tile_mode_array;
2137 mod2array = adev->gfx.config.macrotile_mode_array;
2139 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2140 modearray[reg_offset] = 0;
2142 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2143 mod2array[reg_offset] = 0;
2145 switch (adev->asic_type) {
2147 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148 PIPE_CONFIG(ADDR_SURF_P2) |
2149 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2150 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152 PIPE_CONFIG(ADDR_SURF_P2) |
2153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2154 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156 PIPE_CONFIG(ADDR_SURF_P2) |
2157 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 PIPE_CONFIG(ADDR_SURF_P2) |
2161 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2162 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2164 PIPE_CONFIG(ADDR_SURF_P2) |
2165 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2168 PIPE_CONFIG(ADDR_SURF_P2) |
2169 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2172 PIPE_CONFIG(ADDR_SURF_P2) |
2173 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2175 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2176 PIPE_CONFIG(ADDR_SURF_P2));
2177 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178 PIPE_CONFIG(ADDR_SURF_P2) |
2179 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182 PIPE_CONFIG(ADDR_SURF_P2) |
2183 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2186 PIPE_CONFIG(ADDR_SURF_P2) |
2187 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2189 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2190 PIPE_CONFIG(ADDR_SURF_P2) |
2191 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194 PIPE_CONFIG(ADDR_SURF_P2) |
2195 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2198 PIPE_CONFIG(ADDR_SURF_P2) |
2199 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2201 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2202 PIPE_CONFIG(ADDR_SURF_P2) |
2203 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2205 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2206 PIPE_CONFIG(ADDR_SURF_P2) |
2207 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2210 PIPE_CONFIG(ADDR_SURF_P2) |
2211 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2214 PIPE_CONFIG(ADDR_SURF_P2) |
2215 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2216 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2218 PIPE_CONFIG(ADDR_SURF_P2) |
2219 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2222 PIPE_CONFIG(ADDR_SURF_P2) |
2223 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2226 PIPE_CONFIG(ADDR_SURF_P2) |
2227 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2228 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2230 PIPE_CONFIG(ADDR_SURF_P2) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2234 PIPE_CONFIG(ADDR_SURF_P2) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2237 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2238 PIPE_CONFIG(ADDR_SURF_P2) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242 PIPE_CONFIG(ADDR_SURF_P2) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2245 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2246 PIPE_CONFIG(ADDR_SURF_P2) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2253 NUM_BANKS(ADDR_SURF_8_BANK));
2254 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 NUM_BANKS(ADDR_SURF_8_BANK));
2258 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261 NUM_BANKS(ADDR_SURF_8_BANK));
2262 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2265 NUM_BANKS(ADDR_SURF_8_BANK));
2266 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269 NUM_BANKS(ADDR_SURF_8_BANK));
2270 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273 NUM_BANKS(ADDR_SURF_8_BANK));
2274 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2276 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2277 NUM_BANKS(ADDR_SURF_8_BANK));
2278 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281 NUM_BANKS(ADDR_SURF_16_BANK));
2282 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285 NUM_BANKS(ADDR_SURF_16_BANK));
2286 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2287 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2288 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289 NUM_BANKS(ADDR_SURF_16_BANK));
2290 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293 NUM_BANKS(ADDR_SURF_16_BANK));
2294 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2296 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297 NUM_BANKS(ADDR_SURF_16_BANK));
2298 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2300 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2301 NUM_BANKS(ADDR_SURF_16_BANK));
2302 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2305 NUM_BANKS(ADDR_SURF_8_BANK));
2307 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2308 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2310 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2312 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2313 if (reg_offset != 7)
2314 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2318 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2321 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2333 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2339 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2342 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2347 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2348 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2350 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2351 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2352 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2356 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2364 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2368 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2372 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2377 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2381 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2384 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2385 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2388 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2389 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2393 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2397 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2401 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2405 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2409 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2424 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2436 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444 NUM_BANKS(ADDR_SURF_8_BANK));
2445 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 NUM_BANKS(ADDR_SURF_8_BANK));
2449 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 NUM_BANKS(ADDR_SURF_8_BANK));
2453 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456 NUM_BANKS(ADDR_SURF_8_BANK));
2457 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 NUM_BANKS(ADDR_SURF_8_BANK));
2461 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 NUM_BANKS(ADDR_SURF_8_BANK));
2465 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 NUM_BANKS(ADDR_SURF_8_BANK));
2469 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2472 NUM_BANKS(ADDR_SURF_8_BANK));
2473 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 NUM_BANKS(ADDR_SURF_8_BANK));
2477 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2480 NUM_BANKS(ADDR_SURF_8_BANK));
2481 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 NUM_BANKS(ADDR_SURF_8_BANK));
2485 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2488 NUM_BANKS(ADDR_SURF_8_BANK));
2489 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2492 NUM_BANKS(ADDR_SURF_8_BANK));
2493 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 NUM_BANKS(ADDR_SURF_4_BANK));
2498 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2499 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2501 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2502 if (reg_offset != 7)
2503 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2507 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2528 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2531 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2532 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2536 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2537 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2539 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2540 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2541 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2545 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2548 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2553 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2555 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2557 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2565 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2572 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2573 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2574 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2575 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2577 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2582 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2586 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2590 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2592 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2594 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2598 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2613 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2625 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2630 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633 NUM_BANKS(ADDR_SURF_16_BANK));
2634 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637 NUM_BANKS(ADDR_SURF_16_BANK));
2638 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2641 NUM_BANKS(ADDR_SURF_16_BANK));
2642 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2644 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2645 NUM_BANKS(ADDR_SURF_16_BANK));
2646 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2649 NUM_BANKS(ADDR_SURF_16_BANK));
2650 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653 NUM_BANKS(ADDR_SURF_16_BANK));
2654 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657 NUM_BANKS(ADDR_SURF_16_BANK));
2658 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2661 NUM_BANKS(ADDR_SURF_16_BANK));
2662 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2665 NUM_BANKS(ADDR_SURF_16_BANK));
2666 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2668 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2669 NUM_BANKS(ADDR_SURF_16_BANK));
2670 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2673 NUM_BANKS(ADDR_SURF_16_BANK));
2674 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2677 NUM_BANKS(ADDR_SURF_8_BANK));
2678 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681 NUM_BANKS(ADDR_SURF_4_BANK));
2682 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2685 NUM_BANKS(ADDR_SURF_4_BANK));
2687 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2688 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2690 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2691 if (reg_offset != 7)
2692 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2695 case CHIP_POLARIS11:
2696 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2700 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2708 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2712 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2713 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2716 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2720 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2721 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2724 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2725 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2728 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2729 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2730 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2742 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2746 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2754 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2755 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2757 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2758 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2759 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2761 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2762 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2763 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2766 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2767 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2769 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2771 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2773 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2774 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2775 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2778 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2782 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2783 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2786 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2787 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2790 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2791 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2794 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2795 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2798 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2799 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2801 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2802 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2814 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2819 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2821 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2822 NUM_BANKS(ADDR_SURF_16_BANK));
2824 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827 NUM_BANKS(ADDR_SURF_16_BANK));
2829 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832 NUM_BANKS(ADDR_SURF_16_BANK));
2834 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837 NUM_BANKS(ADDR_SURF_16_BANK));
2839 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2842 NUM_BANKS(ADDR_SURF_16_BANK));
2844 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847 NUM_BANKS(ADDR_SURF_16_BANK));
2849 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852 NUM_BANKS(ADDR_SURF_16_BANK));
2854 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2855 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2856 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2857 NUM_BANKS(ADDR_SURF_16_BANK));
2859 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2860 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2861 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2862 NUM_BANKS(ADDR_SURF_16_BANK));
2864 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867 NUM_BANKS(ADDR_SURF_16_BANK));
2869 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2871 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872 NUM_BANKS(ADDR_SURF_16_BANK));
2874 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2876 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2877 NUM_BANKS(ADDR_SURF_16_BANK));
2879 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2882 NUM_BANKS(ADDR_SURF_8_BANK));
2884 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887 NUM_BANKS(ADDR_SURF_4_BANK));
2889 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2892 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2893 if (reg_offset != 7)
2894 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2897 case CHIP_POLARIS10:
2898 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2902 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2906 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2914 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2915 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2916 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2918 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2920 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2921 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2922 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2923 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2924 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2925 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2926 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2927 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2928 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2930 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2931 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2932 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2944 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2945 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2948 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2953 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2956 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2961 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2964 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2968 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2973 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2975 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2977 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2979 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2981 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2987 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2988 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2989 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2991 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2992 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2993 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2996 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2997 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3000 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3001 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3003 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3004 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3008 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3012 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3016 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3017 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3021 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3023 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024 NUM_BANKS(ADDR_SURF_16_BANK));
3026 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029 NUM_BANKS(ADDR_SURF_16_BANK));
3031 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3033 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034 NUM_BANKS(ADDR_SURF_16_BANK));
3036 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3038 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039 NUM_BANKS(ADDR_SURF_16_BANK));
3041 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3043 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3044 NUM_BANKS(ADDR_SURF_16_BANK));
3046 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3049 NUM_BANKS(ADDR_SURF_16_BANK));
3051 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054 NUM_BANKS(ADDR_SURF_16_BANK));
3056 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059 NUM_BANKS(ADDR_SURF_16_BANK));
3061 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3063 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064 NUM_BANKS(ADDR_SURF_16_BANK));
3066 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3068 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3069 NUM_BANKS(ADDR_SURF_16_BANK));
3071 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3072 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3073 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3074 NUM_BANKS(ADDR_SURF_16_BANK));
3076 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3078 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3079 NUM_BANKS(ADDR_SURF_8_BANK));
3081 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3084 NUM_BANKS(ADDR_SURF_4_BANK));
3086 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3089 NUM_BANKS(ADDR_SURF_4_BANK));
3091 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3092 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3094 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3095 if (reg_offset != 7)
3096 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3100 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3101 PIPE_CONFIG(ADDR_SURF_P2) |
3102 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3104 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3105 PIPE_CONFIG(ADDR_SURF_P2) |
3106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3108 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3109 PIPE_CONFIG(ADDR_SURF_P2) |
3110 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3112 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3113 PIPE_CONFIG(ADDR_SURF_P2) |
3114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3116 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117 PIPE_CONFIG(ADDR_SURF_P2) |
3118 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3119 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3120 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3121 PIPE_CONFIG(ADDR_SURF_P2) |
3122 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3124 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3125 PIPE_CONFIG(ADDR_SURF_P2) |
3126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3127 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3128 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3129 PIPE_CONFIG(ADDR_SURF_P2));
3130 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3131 PIPE_CONFIG(ADDR_SURF_P2) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135 PIPE_CONFIG(ADDR_SURF_P2) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3138 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3139 PIPE_CONFIG(ADDR_SURF_P2) |
3140 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3142 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3143 PIPE_CONFIG(ADDR_SURF_P2) |
3144 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3146 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3147 PIPE_CONFIG(ADDR_SURF_P2) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3150 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3151 PIPE_CONFIG(ADDR_SURF_P2) |
3152 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3154 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3155 PIPE_CONFIG(ADDR_SURF_P2) |
3156 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3158 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3159 PIPE_CONFIG(ADDR_SURF_P2) |
3160 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3162 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3163 PIPE_CONFIG(ADDR_SURF_P2) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3166 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3167 PIPE_CONFIG(ADDR_SURF_P2) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3170 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3171 PIPE_CONFIG(ADDR_SURF_P2) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3174 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3175 PIPE_CONFIG(ADDR_SURF_P2) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3178 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3179 PIPE_CONFIG(ADDR_SURF_P2) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3182 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3183 PIPE_CONFIG(ADDR_SURF_P2) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3186 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3187 PIPE_CONFIG(ADDR_SURF_P2) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3190 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3191 PIPE_CONFIG(ADDR_SURF_P2) |
3192 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3194 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195 PIPE_CONFIG(ADDR_SURF_P2) |
3196 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3199 PIPE_CONFIG(ADDR_SURF_P2) |
3200 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3203 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3206 NUM_BANKS(ADDR_SURF_8_BANK));
3207 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3209 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3210 NUM_BANKS(ADDR_SURF_8_BANK));
3211 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3214 NUM_BANKS(ADDR_SURF_8_BANK));
3215 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3218 NUM_BANKS(ADDR_SURF_8_BANK));
3219 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3220 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3221 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3222 NUM_BANKS(ADDR_SURF_8_BANK));
3223 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3226 NUM_BANKS(ADDR_SURF_8_BANK));
3227 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3230 NUM_BANKS(ADDR_SURF_8_BANK));
3231 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234 NUM_BANKS(ADDR_SURF_16_BANK));
3235 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3238 NUM_BANKS(ADDR_SURF_16_BANK));
3239 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3240 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3241 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3242 NUM_BANKS(ADDR_SURF_16_BANK));
3243 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3246 NUM_BANKS(ADDR_SURF_16_BANK));
3247 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3250 NUM_BANKS(ADDR_SURF_16_BANK));
3251 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3252 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3253 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254 NUM_BANKS(ADDR_SURF_16_BANK));
3255 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3258 NUM_BANKS(ADDR_SURF_8_BANK));
3260 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3261 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3263 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3265 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3266 if (reg_offset != 7)
3267 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3272 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3276 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3280 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3283 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3284 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3287 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3288 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3289 PIPE_CONFIG(ADDR_SURF_P2) |
3290 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3291 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3292 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3293 PIPE_CONFIG(ADDR_SURF_P2) |
3294 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3295 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3296 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297 PIPE_CONFIG(ADDR_SURF_P2) |
3298 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3299 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3300 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3301 PIPE_CONFIG(ADDR_SURF_P2) |
3302 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3303 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3304 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3305 PIPE_CONFIG(ADDR_SURF_P2));
3306 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3307 PIPE_CONFIG(ADDR_SURF_P2) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3311 PIPE_CONFIG(ADDR_SURF_P2) |
3312 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3314 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3315 PIPE_CONFIG(ADDR_SURF_P2) |
3316 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3318 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3319 PIPE_CONFIG(ADDR_SURF_P2) |
3320 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3322 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3323 PIPE_CONFIG(ADDR_SURF_P2) |
3324 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3326 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3327 PIPE_CONFIG(ADDR_SURF_P2) |
3328 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3330 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3331 PIPE_CONFIG(ADDR_SURF_P2) |
3332 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3334 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3335 PIPE_CONFIG(ADDR_SURF_P2) |
3336 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3338 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3339 PIPE_CONFIG(ADDR_SURF_P2) |
3340 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3342 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3343 PIPE_CONFIG(ADDR_SURF_P2) |
3344 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3346 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3347 PIPE_CONFIG(ADDR_SURF_P2) |
3348 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3350 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3351 PIPE_CONFIG(ADDR_SURF_P2) |
3352 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3354 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3355 PIPE_CONFIG(ADDR_SURF_P2) |
3356 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3358 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3359 PIPE_CONFIG(ADDR_SURF_P2) |
3360 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3362 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3363 PIPE_CONFIG(ADDR_SURF_P2) |
3364 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3366 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3367 PIPE_CONFIG(ADDR_SURF_P2) |
3368 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3370 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3371 PIPE_CONFIG(ADDR_SURF_P2) |
3372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3374 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3375 PIPE_CONFIG(ADDR_SURF_P2) |
3376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3379 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3382 NUM_BANKS(ADDR_SURF_8_BANK));
3383 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3386 NUM_BANKS(ADDR_SURF_8_BANK));
3387 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3390 NUM_BANKS(ADDR_SURF_8_BANK));
3391 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3394 NUM_BANKS(ADDR_SURF_8_BANK));
3395 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3398 NUM_BANKS(ADDR_SURF_8_BANK));
3399 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3402 NUM_BANKS(ADDR_SURF_8_BANK));
3403 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3406 NUM_BANKS(ADDR_SURF_8_BANK));
3407 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3410 NUM_BANKS(ADDR_SURF_16_BANK));
3411 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3412 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3413 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3414 NUM_BANKS(ADDR_SURF_16_BANK));
3415 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3418 NUM_BANKS(ADDR_SURF_16_BANK));
3419 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3420 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3421 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3422 NUM_BANKS(ADDR_SURF_16_BANK));
3423 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3424 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3425 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3426 NUM_BANKS(ADDR_SURF_16_BANK));
3427 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3430 NUM_BANKS(ADDR_SURF_16_BANK));
3431 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3432 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3433 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3434 NUM_BANKS(ADDR_SURF_8_BANK));
3436 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3437 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3439 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3441 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3442 if (reg_offset != 7)
3443 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3449 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3451 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3453 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3454 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3455 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3456 } else if (se_num == 0xffffffff) {
3457 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3458 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3459 } else if (sh_num == 0xffffffff) {
3460 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3461 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3463 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3464 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3466 WREG32(mmGRBM_GFX_INDEX, data);
3469 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3471 return (u32)((1ULL << bit_width) - 1);
3474 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3478 data = RREG32(mmCC_RB_BACKEND_DISABLE);
3479 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3481 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3482 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3484 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3485 adev->gfx.config.max_sh_per_se);
3487 return (~data) & mask;
3490 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3495 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3496 adev->gfx.config.max_sh_per_se;
3498 mutex_lock(&adev->grbm_idx_mutex);
3499 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3500 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3501 gfx_v8_0_select_se_sh(adev, i, j);
3502 data = gfx_v8_0_get_rb_active_bitmap(adev);
3503 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3504 rb_bitmap_width_per_sh);
3507 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3508 mutex_unlock(&adev->grbm_idx_mutex);
3510 adev->gfx.config.backend_enable_mask = active_rbs;
3511 adev->gfx.config.num_rbs = hweight32(active_rbs);
3515 * gfx_v8_0_init_compute_vmid - gart enable
3517 * @rdev: amdgpu_device pointer
3519 * Initialize compute vmid sh_mem registers
3522 #define DEFAULT_SH_MEM_BASES (0x6000)
3523 #define FIRST_COMPUTE_VMID (8)
3524 #define LAST_COMPUTE_VMID (16)
3525 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3528 uint32_t sh_mem_config;
3529 uint32_t sh_mem_bases;
3532 * Configure apertures:
3533 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3534 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3535 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3537 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3539 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3540 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3541 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3542 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3543 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3544 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3546 mutex_lock(&adev->srbm_mutex);
3547 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3548 vi_srbm_select(adev, 0, 0, 0, i);
3549 /* CP and shaders */
3550 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3551 WREG32(mmSH_MEM_APE1_BASE, 1);
3552 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3553 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3555 vi_srbm_select(adev, 0, 0, 0, 0);
3556 mutex_unlock(&adev->srbm_mutex);
3559 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3564 tmp = RREG32(mmGRBM_CNTL);
3565 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3566 WREG32(mmGRBM_CNTL, tmp);
3568 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3569 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3570 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3572 gfx_v8_0_tiling_mode_table_init(adev);
3574 gfx_v8_0_setup_rb(adev);
3575 gfx_v8_0_get_cu_info(adev);
3577 /* XXX SH_MEM regs */
3578 /* where to put LDS, scratch, GPUVM in FSA64 space */
3579 mutex_lock(&adev->srbm_mutex);
3580 for (i = 0; i < 16; i++) {
3581 vi_srbm_select(adev, 0, 0, 0, i);
3582 /* CP and shaders */
3584 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3585 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3586 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3587 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3588 WREG32(mmSH_MEM_CONFIG, tmp);
3590 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3591 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3592 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3593 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3594 WREG32(mmSH_MEM_CONFIG, tmp);
3597 WREG32(mmSH_MEM_APE1_BASE, 1);
3598 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3599 WREG32(mmSH_MEM_BASES, 0);
3601 vi_srbm_select(adev, 0, 0, 0, 0);
3602 mutex_unlock(&adev->srbm_mutex);
3604 gfx_v8_0_init_compute_vmid(adev);
3606 mutex_lock(&adev->grbm_idx_mutex);
3608 * making sure that the following register writes will be broadcasted
3609 * to all the shaders
3611 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3613 WREG32(mmPA_SC_FIFO_SIZE,
3614 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3615 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3616 (adev->gfx.config.sc_prim_fifo_size_backend <<
3617 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3618 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3619 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3620 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3621 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3622 mutex_unlock(&adev->grbm_idx_mutex);
3626 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3631 mutex_lock(&adev->grbm_idx_mutex);
3632 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3633 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3634 gfx_v8_0_select_se_sh(adev, i, j);
3635 for (k = 0; k < adev->usec_timeout; k++) {
3636 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3642 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3643 mutex_unlock(&adev->grbm_idx_mutex);
3645 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3646 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3647 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3648 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3649 for (k = 0; k < adev->usec_timeout; k++) {
3650 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3656 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3659 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3661 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3662 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3663 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3664 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3666 WREG32(mmCP_INT_CNTL_RING0, tmp);
3669 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3672 WREG32(mmRLC_CSIB_ADDR_HI,
3673 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3674 WREG32(mmRLC_CSIB_ADDR_LO,
3675 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3676 WREG32(mmRLC_CSIB_LENGTH,
3677 adev->gfx.rlc.clear_state_size);
3680 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3683 int *unique_indices,
3686 int *ind_start_offsets,
3691 bool new_entry = true;
3693 for (; ind_offset < list_size; ind_offset++) {
3697 ind_start_offsets[*offset_count] = ind_offset;
3698 *offset_count = *offset_count + 1;
3699 BUG_ON(*offset_count >= max_offset);
3702 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3709 /* look for the matching indice */
3711 indices < *indices_count;
3713 if (unique_indices[indices] ==
3714 register_list_format[ind_offset])
3718 if (indices >= *indices_count) {
3719 unique_indices[*indices_count] =
3720 register_list_format[ind_offset];
3721 indices = *indices_count;
3722 *indices_count = *indices_count + 1;
3723 BUG_ON(*indices_count >= max_indices);
3726 register_list_format[ind_offset] = indices;
3730 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3733 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3734 int indices_count = 0;
3735 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3736 int offset_count = 0;
3739 unsigned int *register_list_format =
3740 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3741 if (register_list_format == NULL)
3743 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3744 adev->gfx.rlc.reg_list_format_size_bytes);
3746 gfx_v8_0_parse_ind_reg_list(register_list_format,
3747 RLC_FormatDirectRegListLength,
3748 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3751 sizeof(unique_indices) / sizeof(int),
3752 indirect_start_offsets,
3754 sizeof(indirect_start_offsets)/sizeof(int));
3756 /* save and restore list */
3757 temp = RREG32(mmRLC_SRM_CNTL);
3758 temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3759 WREG32(mmRLC_SRM_CNTL, temp);
3761 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3762 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3763 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3766 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3767 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3768 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3770 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3771 list_size = list_size >> 1;
3772 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3773 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3775 /* starting offsets starts */
3776 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3777 adev->gfx.rlc.starting_offsets_start);
3778 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3779 WREG32(mmRLC_GPM_SCRATCH_DATA,
3780 indirect_start_offsets[i]);
3782 /* unique indices */
3783 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3784 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3785 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3786 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3787 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3789 kfree(register_list_format);
3794 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3798 data = RREG32(mmRLC_SRM_CNTL);
3799 data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3800 WREG32(mmRLC_SRM_CNTL, data);
3803 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3807 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3808 AMD_PG_SUPPORT_GFX_SMG |
3809 AMD_PG_SUPPORT_GFX_DMG)) {
3810 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3811 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3812 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3813 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3816 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3817 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3818 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3819 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3820 WREG32(mmRLC_PG_DELAY, data);
3822 data = RREG32(mmRLC_PG_DELAY_2);
3823 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3824 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3825 WREG32(mmRLC_PG_DELAY_2, data);
3827 data = RREG32(mmRLC_AUTO_PG_CTRL);
3828 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3829 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3830 WREG32(mmRLC_AUTO_PG_CTRL, data);
3834 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3839 orig = data = RREG32(mmRLC_PG_CNTL);
3842 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3844 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3847 WREG32(mmRLC_PG_CNTL, data);
3850 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3855 orig = data = RREG32(mmRLC_PG_CNTL);
3858 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3860 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3863 WREG32(mmRLC_PG_CNTL, data);
3866 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3870 orig = data = RREG32(mmRLC_PG_CNTL);
3873 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3875 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3878 WREG32(mmRLC_PG_CNTL, data);
3881 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3883 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3884 AMD_PG_SUPPORT_GFX_SMG |
3885 AMD_PG_SUPPORT_GFX_DMG |
3887 AMD_PG_SUPPORT_GDS |
3888 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3889 gfx_v8_0_init_csb(adev);
3890 gfx_v8_0_init_save_restore_list(adev);
3891 gfx_v8_0_enable_save_restore_machine(adev);
3893 if ((adev->asic_type == CHIP_CARRIZO) ||
3894 (adev->asic_type == CHIP_STONEY)) {
3895 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3896 gfx_v8_0_init_power_gating(adev);
3897 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3898 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3899 cz_enable_sck_slow_down_on_power_up(adev, true);
3900 cz_enable_sck_slow_down_on_power_down(adev, true);
3902 cz_enable_sck_slow_down_on_power_up(adev, false);
3903 cz_enable_sck_slow_down_on_power_down(adev, false);
3905 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3906 cz_enable_cp_power_gating(adev, true);
3908 cz_enable_cp_power_gating(adev, false);
3909 } else if (adev->asic_type == CHIP_POLARIS11) {
3910 gfx_v8_0_init_power_gating(adev);
3915 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3917 u32 tmp = RREG32(mmRLC_CNTL);
3919 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3920 WREG32(mmRLC_CNTL, tmp);
3922 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3924 gfx_v8_0_wait_for_rlc_serdes(adev);
3927 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3929 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3931 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3932 WREG32(mmGRBM_SOFT_RESET, tmp);
3934 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3935 WREG32(mmGRBM_SOFT_RESET, tmp);
3939 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3941 u32 tmp = RREG32(mmRLC_CNTL);
3943 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3944 WREG32(mmRLC_CNTL, tmp);
3946 /* carrizo do enable cp interrupt after cp inited */
3947 if (!(adev->flags & AMD_IS_APU))
3948 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3953 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3955 const struct rlc_firmware_header_v2_0 *hdr;
3956 const __le32 *fw_data;
3957 unsigned i, fw_size;
3959 if (!adev->gfx.rlc_fw)
3962 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3963 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3965 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3966 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3967 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3969 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3970 for (i = 0; i < fw_size; i++)
3971 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3972 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3977 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3981 gfx_v8_0_rlc_stop(adev);
3984 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3985 if (adev->asic_type == CHIP_POLARIS11 ||
3986 adev->asic_type == CHIP_POLARIS10)
3987 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3990 WREG32(mmRLC_PG_CNTL, 0);
3992 gfx_v8_0_rlc_reset(adev);
3994 gfx_v8_0_init_pg(adev);
3996 if (!adev->pp_enabled) {
3997 if (!adev->firmware.smu_load) {
3998 /* legacy rlc firmware loading */
3999 r = gfx_v8_0_rlc_load_microcode(adev);
4003 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4004 AMDGPU_UCODE_ID_RLC_G);
4010 gfx_v8_0_rlc_start(adev);
4015 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4018 u32 tmp = RREG32(mmCP_ME_CNTL);
4021 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4022 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4023 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4025 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4026 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4027 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4028 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4029 adev->gfx.gfx_ring[i].ready = false;
4031 WREG32(mmCP_ME_CNTL, tmp);
4035 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4037 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4038 const struct gfx_firmware_header_v1_0 *ce_hdr;
4039 const struct gfx_firmware_header_v1_0 *me_hdr;
4040 const __le32 *fw_data;
4041 unsigned i, fw_size;
4043 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4046 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4047 adev->gfx.pfp_fw->data;
4048 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4049 adev->gfx.ce_fw->data;
4050 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4051 adev->gfx.me_fw->data;
4053 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4054 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4055 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4057 gfx_v8_0_cp_gfx_enable(adev, false);
4060 fw_data = (const __le32 *)
4061 (adev->gfx.pfp_fw->data +
4062 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4063 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4064 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4065 for (i = 0; i < fw_size; i++)
4066 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4067 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4070 fw_data = (const __le32 *)
4071 (adev->gfx.ce_fw->data +
4072 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4073 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4074 WREG32(mmCP_CE_UCODE_ADDR, 0);
4075 for (i = 0; i < fw_size; i++)
4076 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4077 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4080 fw_data = (const __le32 *)
4081 (adev->gfx.me_fw->data +
4082 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4083 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4084 WREG32(mmCP_ME_RAM_WADDR, 0);
4085 for (i = 0; i < fw_size; i++)
4086 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4087 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4092 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4095 const struct cs_section_def *sect = NULL;
4096 const struct cs_extent_def *ext = NULL;
4098 /* begin clear state */
4100 /* context control state */
4103 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4104 for (ext = sect->section; ext->extent != NULL; ++ext) {
4105 if (sect->id == SECT_CONTEXT)
4106 count += 2 + ext->reg_count;
4111 /* pa_sc_raster_config/pa_sc_raster_config1 */
4113 /* end clear state */
4121 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4123 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4124 const struct cs_section_def *sect = NULL;
4125 const struct cs_extent_def *ext = NULL;
4129 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4130 WREG32(mmCP_ENDIAN_SWAP, 0);
4131 WREG32(mmCP_DEVICE_ID, 1);
4133 gfx_v8_0_cp_gfx_enable(adev, true);
4135 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4137 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4141 /* clear state buffer */
4142 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4143 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4145 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4146 amdgpu_ring_write(ring, 0x80000000);
4147 amdgpu_ring_write(ring, 0x80000000);
4149 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4150 for (ext = sect->section; ext->extent != NULL; ++ext) {
4151 if (sect->id == SECT_CONTEXT) {
4152 amdgpu_ring_write(ring,
4153 PACKET3(PACKET3_SET_CONTEXT_REG,
4155 amdgpu_ring_write(ring,
4156 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4157 for (i = 0; i < ext->reg_count; i++)
4158 amdgpu_ring_write(ring, ext->extent[i]);
4163 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4164 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4165 switch (adev->asic_type) {
4167 case CHIP_POLARIS10:
4168 amdgpu_ring_write(ring, 0x16000012);
4169 amdgpu_ring_write(ring, 0x0000002A);
4171 case CHIP_POLARIS11:
4172 amdgpu_ring_write(ring, 0x16000012);
4173 amdgpu_ring_write(ring, 0x00000000);
4176 amdgpu_ring_write(ring, 0x3a00161a);
4177 amdgpu_ring_write(ring, 0x0000002e);
4180 amdgpu_ring_write(ring, 0x00000002);
4181 amdgpu_ring_write(ring, 0x00000000);
4184 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4185 0x00000000 : 0x00000002);
4186 amdgpu_ring_write(ring, 0x00000000);
4189 amdgpu_ring_write(ring, 0x00000000);
4190 amdgpu_ring_write(ring, 0x00000000);
4196 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4197 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4199 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4200 amdgpu_ring_write(ring, 0);
4202 /* init the CE partitions */
4203 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4204 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4205 amdgpu_ring_write(ring, 0x8000);
4206 amdgpu_ring_write(ring, 0x8000);
4208 amdgpu_ring_commit(ring);
4213 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4215 struct amdgpu_ring *ring;
4218 u64 rb_addr, rptr_addr;
4221 /* Set the write pointer delay */
4222 WREG32(mmCP_RB_WPTR_DELAY, 0);
4224 /* set the RB to use vmid 0 */
4225 WREG32(mmCP_RB_VMID, 0);
4227 /* Set ring buffer size */
4228 ring = &adev->gfx.gfx_ring[0];
4229 rb_bufsz = order_base_2(ring->ring_size / 8);
4230 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4231 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4232 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4233 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4235 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4237 WREG32(mmCP_RB0_CNTL, tmp);
4239 /* Initialize the ring buffer's read and write pointers */
4240 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4242 WREG32(mmCP_RB0_WPTR, ring->wptr);
4244 /* set the wb address wether it's enabled or not */
4245 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4246 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4247 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4250 WREG32(mmCP_RB0_CNTL, tmp);
4252 rb_addr = ring->gpu_addr >> 8;
4253 WREG32(mmCP_RB0_BASE, rb_addr);
4254 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4256 /* no gfx doorbells on iceland */
4257 if (adev->asic_type != CHIP_TOPAZ) {
4258 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4259 if (ring->use_doorbell) {
4260 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4261 DOORBELL_OFFSET, ring->doorbell_index);
4262 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4264 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4267 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4270 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4272 if (adev->asic_type == CHIP_TONGA) {
4273 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4274 DOORBELL_RANGE_LOWER,
4275 AMDGPU_DOORBELL_GFX_RING0);
4276 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4278 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4279 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4284 /* start the ring */
4285 gfx_v8_0_cp_gfx_start(adev);
4287 r = amdgpu_ring_test_ring(ring);
4289 ring->ready = false;
4296 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4301 WREG32(mmCP_MEC_CNTL, 0);
4303 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4304 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4305 adev->gfx.compute_ring[i].ready = false;
4310 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4312 const struct gfx_firmware_header_v1_0 *mec_hdr;
4313 const __le32 *fw_data;
4314 unsigned i, fw_size;
4316 if (!adev->gfx.mec_fw)
4319 gfx_v8_0_cp_compute_enable(adev, false);
4321 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4322 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4324 fw_data = (const __le32 *)
4325 (adev->gfx.mec_fw->data +
4326 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4327 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4330 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4331 for (i = 0; i < fw_size; i++)
4332 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4333 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4335 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4336 if (adev->gfx.mec2_fw) {
4337 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4339 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4340 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4342 fw_data = (const __le32 *)
4343 (adev->gfx.mec2_fw->data +
4344 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4345 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4347 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4348 for (i = 0; i < fw_size; i++)
4349 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4350 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4357 uint32_t header; /* ordinal0 */
4358 uint32_t compute_dispatch_initiator; /* ordinal1 */
4359 uint32_t compute_dim_x; /* ordinal2 */
4360 uint32_t compute_dim_y; /* ordinal3 */
4361 uint32_t compute_dim_z; /* ordinal4 */
4362 uint32_t compute_start_x; /* ordinal5 */
4363 uint32_t compute_start_y; /* ordinal6 */
4364 uint32_t compute_start_z; /* ordinal7 */
4365 uint32_t compute_num_thread_x; /* ordinal8 */
4366 uint32_t compute_num_thread_y; /* ordinal9 */
4367 uint32_t compute_num_thread_z; /* ordinal10 */
4368 uint32_t compute_pipelinestat_enable; /* ordinal11 */
4369 uint32_t compute_perfcount_enable; /* ordinal12 */
4370 uint32_t compute_pgm_lo; /* ordinal13 */
4371 uint32_t compute_pgm_hi; /* ordinal14 */
4372 uint32_t compute_tba_lo; /* ordinal15 */
4373 uint32_t compute_tba_hi; /* ordinal16 */
4374 uint32_t compute_tma_lo; /* ordinal17 */
4375 uint32_t compute_tma_hi; /* ordinal18 */
4376 uint32_t compute_pgm_rsrc1; /* ordinal19 */
4377 uint32_t compute_pgm_rsrc2; /* ordinal20 */
4378 uint32_t compute_vmid; /* ordinal21 */
4379 uint32_t compute_resource_limits; /* ordinal22 */
4380 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
4381 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
4382 uint32_t compute_tmpring_size; /* ordinal25 */
4383 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
4384 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
4385 uint32_t compute_restart_x; /* ordinal28 */
4386 uint32_t compute_restart_y; /* ordinal29 */
4387 uint32_t compute_restart_z; /* ordinal30 */
4388 uint32_t compute_thread_trace_enable; /* ordinal31 */
4389 uint32_t compute_misc_reserved; /* ordinal32 */
4390 uint32_t compute_dispatch_id; /* ordinal33 */
4391 uint32_t compute_threadgroup_id; /* ordinal34 */
4392 uint32_t compute_relaunch; /* ordinal35 */
4393 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
4394 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
4395 uint32_t compute_wave_restore_control; /* ordinal38 */
4396 uint32_t reserved9; /* ordinal39 */
4397 uint32_t reserved10; /* ordinal40 */
4398 uint32_t reserved11; /* ordinal41 */
4399 uint32_t reserved12; /* ordinal42 */
4400 uint32_t reserved13; /* ordinal43 */
4401 uint32_t reserved14; /* ordinal44 */
4402 uint32_t reserved15; /* ordinal45 */
4403 uint32_t reserved16; /* ordinal46 */
4404 uint32_t reserved17; /* ordinal47 */
4405 uint32_t reserved18; /* ordinal48 */
4406 uint32_t reserved19; /* ordinal49 */
4407 uint32_t reserved20; /* ordinal50 */
4408 uint32_t reserved21; /* ordinal51 */
4409 uint32_t reserved22; /* ordinal52 */
4410 uint32_t reserved23; /* ordinal53 */
4411 uint32_t reserved24; /* ordinal54 */
4412 uint32_t reserved25; /* ordinal55 */
4413 uint32_t reserved26; /* ordinal56 */
4414 uint32_t reserved27; /* ordinal57 */
4415 uint32_t reserved28; /* ordinal58 */
4416 uint32_t reserved29; /* ordinal59 */
4417 uint32_t reserved30; /* ordinal60 */
4418 uint32_t reserved31; /* ordinal61 */
4419 uint32_t reserved32; /* ordinal62 */
4420 uint32_t reserved33; /* ordinal63 */
4421 uint32_t reserved34; /* ordinal64 */
4422 uint32_t compute_user_data_0; /* ordinal65 */
4423 uint32_t compute_user_data_1; /* ordinal66 */
4424 uint32_t compute_user_data_2; /* ordinal67 */
4425 uint32_t compute_user_data_3; /* ordinal68 */
4426 uint32_t compute_user_data_4; /* ordinal69 */
4427 uint32_t compute_user_data_5; /* ordinal70 */
4428 uint32_t compute_user_data_6; /* ordinal71 */
4429 uint32_t compute_user_data_7; /* ordinal72 */
4430 uint32_t compute_user_data_8; /* ordinal73 */
4431 uint32_t compute_user_data_9; /* ordinal74 */
4432 uint32_t compute_user_data_10; /* ordinal75 */
4433 uint32_t compute_user_data_11; /* ordinal76 */
4434 uint32_t compute_user_data_12; /* ordinal77 */
4435 uint32_t compute_user_data_13; /* ordinal78 */
4436 uint32_t compute_user_data_14; /* ordinal79 */
4437 uint32_t compute_user_data_15; /* ordinal80 */
4438 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
4439 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
4440 uint32_t reserved35; /* ordinal83 */
4441 uint32_t reserved36; /* ordinal84 */
4442 uint32_t reserved37; /* ordinal85 */
4443 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
4444 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
4445 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
4446 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
4447 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
4448 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
4449 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
4450 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
4451 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
4452 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
4453 uint32_t reserved38; /* ordinal96 */
4454 uint32_t reserved39; /* ordinal97 */
4455 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
4456 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
4457 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
4458 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
4459 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
4460 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
4461 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
4462 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
4463 uint32_t reserved40; /* ordinal106 */
4464 uint32_t reserved41; /* ordinal107 */
4465 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
4466 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
4467 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
4468 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
4469 uint32_t reserved42; /* ordinal112 */
4470 uint32_t reserved43; /* ordinal113 */
4471 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
4472 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
4473 uint32_t cp_packet_id_lo; /* ordinal116 */
4474 uint32_t cp_packet_id_hi; /* ordinal117 */
4475 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
4476 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
4477 uint32_t gds_save_base_addr_lo; /* ordinal120 */
4478 uint32_t gds_save_base_addr_hi; /* ordinal121 */
4479 uint32_t gds_save_mask_lo; /* ordinal122 */
4480 uint32_t gds_save_mask_hi; /* ordinal123 */
4481 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
4482 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
4483 uint32_t reserved44; /* ordinal126 */
4484 uint32_t reserved45; /* ordinal127 */
4485 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
4486 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
4487 uint32_t cp_hqd_active; /* ordinal130 */
4488 uint32_t cp_hqd_vmid; /* ordinal131 */
4489 uint32_t cp_hqd_persistent_state; /* ordinal132 */
4490 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
4491 uint32_t cp_hqd_queue_priority; /* ordinal134 */
4492 uint32_t cp_hqd_quantum; /* ordinal135 */
4493 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
4494 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
4495 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
4496 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
4497 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
4498 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
4499 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
4500 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
4501 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
4502 uint32_t cp_hqd_pq_control; /* ordinal145 */
4503 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
4504 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
4505 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
4506 uint32_t cp_hqd_ib_control; /* ordinal149 */
4507 uint32_t cp_hqd_iq_timer; /* ordinal150 */
4508 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
4509 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
4510 uint32_t cp_hqd_dma_offload; /* ordinal153 */
4511 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
4512 uint32_t cp_hqd_msg_type; /* ordinal155 */
4513 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
4514 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
4515 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
4516 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
4517 uint32_t cp_hqd_hq_status0; /* ordinal160 */
4518 uint32_t cp_hqd_hq_control0; /* ordinal161 */
4519 uint32_t cp_mqd_control; /* ordinal162 */
4520 uint32_t cp_hqd_hq_status1; /* ordinal163 */
4521 uint32_t cp_hqd_hq_control1; /* ordinal164 */
4522 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
4523 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
4524 uint32_t cp_hqd_eop_control; /* ordinal167 */
4525 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
4526 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
4527 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
4528 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
4529 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
4530 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
4531 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
4532 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
4533 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
4534 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
4535 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
4536 uint32_t cp_hqd_error; /* ordinal179 */
4537 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
4538 uint32_t cp_hqd_eop_dones; /* ordinal181 */
4539 uint32_t reserved46; /* ordinal182 */
4540 uint32_t reserved47; /* ordinal183 */
4541 uint32_t reserved48; /* ordinal184 */
4542 uint32_t reserved49; /* ordinal185 */
4543 uint32_t reserved50; /* ordinal186 */
4544 uint32_t reserved51; /* ordinal187 */
4545 uint32_t reserved52; /* ordinal188 */
4546 uint32_t reserved53; /* ordinal189 */
4547 uint32_t reserved54; /* ordinal190 */
4548 uint32_t reserved55; /* ordinal191 */
4549 uint32_t iqtimer_pkt_header; /* ordinal192 */
4550 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
4551 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
4552 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
4553 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
4554 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
4555 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
4556 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
4557 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
4558 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
4559 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
4560 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
4561 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
4562 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
4563 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
4564 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
4565 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
4566 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
4567 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
4568 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
4569 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
4570 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
4571 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
4572 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
4573 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
4574 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
4575 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
4576 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
4577 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
4578 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
4579 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
4580 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
4581 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
4582 uint32_t reserved56; /* ordinal225 */
4583 uint32_t reserved57; /* ordinal226 */
4584 uint32_t reserved58; /* ordinal227 */
4585 uint32_t set_resources_header; /* ordinal228 */
4586 uint32_t set_resources_dw1; /* ordinal229 */
4587 uint32_t set_resources_dw2; /* ordinal230 */
4588 uint32_t set_resources_dw3; /* ordinal231 */
4589 uint32_t set_resources_dw4; /* ordinal232 */
4590 uint32_t set_resources_dw5; /* ordinal233 */
4591 uint32_t set_resources_dw6; /* ordinal234 */
4592 uint32_t set_resources_dw7; /* ordinal235 */
4593 uint32_t reserved59; /* ordinal236 */
4594 uint32_t reserved60; /* ordinal237 */
4595 uint32_t reserved61; /* ordinal238 */
4596 uint32_t reserved62; /* ordinal239 */
4597 uint32_t reserved63; /* ordinal240 */
4598 uint32_t reserved64; /* ordinal241 */
4599 uint32_t reserved65; /* ordinal242 */
4600 uint32_t reserved66; /* ordinal243 */
4601 uint32_t reserved67; /* ordinal244 */
4602 uint32_t reserved68; /* ordinal245 */
4603 uint32_t reserved69; /* ordinal246 */
4604 uint32_t reserved70; /* ordinal247 */
4605 uint32_t reserved71; /* ordinal248 */
4606 uint32_t reserved72; /* ordinal249 */
4607 uint32_t reserved73; /* ordinal250 */
4608 uint32_t reserved74; /* ordinal251 */
4609 uint32_t reserved75; /* ordinal252 */
4610 uint32_t reserved76; /* ordinal253 */
4611 uint32_t reserved77; /* ordinal254 */
4612 uint32_t reserved78; /* ordinal255 */
4614 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4617 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4621 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4622 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4624 if (ring->mqd_obj) {
4625 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4626 if (unlikely(r != 0))
4627 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4629 amdgpu_bo_unpin(ring->mqd_obj);
4630 amdgpu_bo_unreserve(ring->mqd_obj);
4632 amdgpu_bo_unref(&ring->mqd_obj);
4633 ring->mqd_obj = NULL;
4638 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4642 bool use_doorbell = true;
4650 /* init the pipes */
4651 mutex_lock(&adev->srbm_mutex);
4652 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4653 int me = (i < 4) ? 1 : 2;
4654 int pipe = (i < 4) ? i : (i - 4);
4656 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4659 vi_srbm_select(adev, me, pipe, 0, 0);
4661 /* write the EOP addr */
4662 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4663 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4665 /* set the VMID assigned */
4666 WREG32(mmCP_HQD_VMID, 0);
4668 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4669 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4670 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4671 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4672 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4674 vi_srbm_select(adev, 0, 0, 0, 0);
4675 mutex_unlock(&adev->srbm_mutex);
4677 /* init the queues. Just two for now. */
4678 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4679 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4681 if (ring->mqd_obj == NULL) {
4682 r = amdgpu_bo_create(adev,
4683 sizeof(struct vi_mqd),
4685 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4686 NULL, &ring->mqd_obj);
4688 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4693 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4694 if (unlikely(r != 0)) {
4695 gfx_v8_0_cp_compute_fini(adev);
4698 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4701 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4702 gfx_v8_0_cp_compute_fini(adev);
4705 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4707 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4708 gfx_v8_0_cp_compute_fini(adev);
4712 /* init the mqd struct */
4713 memset(buf, 0, sizeof(struct vi_mqd));
4715 mqd = (struct vi_mqd *)buf;
4716 mqd->header = 0xC0310800;
4717 mqd->compute_pipelinestat_enable = 0x00000001;
4718 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4719 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4720 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4721 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4722 mqd->compute_misc_reserved = 0x00000003;
4724 mutex_lock(&adev->srbm_mutex);
4725 vi_srbm_select(adev, ring->me,
4729 /* disable wptr polling */
4730 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4731 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4732 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4734 mqd->cp_hqd_eop_base_addr_lo =
4735 RREG32(mmCP_HQD_EOP_BASE_ADDR);
4736 mqd->cp_hqd_eop_base_addr_hi =
4737 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4739 /* enable doorbell? */
4740 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4742 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4744 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4746 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4747 mqd->cp_hqd_pq_doorbell_control = tmp;
4749 /* disable the queue if it's active */
4750 mqd->cp_hqd_dequeue_request = 0;
4751 mqd->cp_hqd_pq_rptr = 0;
4752 mqd->cp_hqd_pq_wptr= 0;
4753 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4754 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4755 for (j = 0; j < adev->usec_timeout; j++) {
4756 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4760 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4761 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4762 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4765 /* set the pointer to the MQD */
4766 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4767 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4768 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4769 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4771 /* set MQD vmid to 0 */
4772 tmp = RREG32(mmCP_MQD_CONTROL);
4773 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4774 WREG32(mmCP_MQD_CONTROL, tmp);
4775 mqd->cp_mqd_control = tmp;
4777 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4778 hqd_gpu_addr = ring->gpu_addr >> 8;
4779 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4780 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4781 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4782 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4784 /* set up the HQD, this is similar to CP_RB0_CNTL */
4785 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4786 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4787 (order_base_2(ring->ring_size / 4) - 1));
4788 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4789 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4791 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4793 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4794 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4795 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4796 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4797 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4798 mqd->cp_hqd_pq_control = tmp;
4800 /* set the wb address wether it's enabled or not */
4801 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4802 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4803 mqd->cp_hqd_pq_rptr_report_addr_hi =
4804 upper_32_bits(wb_gpu_addr) & 0xffff;
4805 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4806 mqd->cp_hqd_pq_rptr_report_addr_lo);
4807 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4808 mqd->cp_hqd_pq_rptr_report_addr_hi);
4810 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4811 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4812 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4813 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4814 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4815 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4816 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4818 /* enable the doorbell if requested */
4820 if ((adev->asic_type == CHIP_CARRIZO) ||
4821 (adev->asic_type == CHIP_FIJI) ||
4822 (adev->asic_type == CHIP_STONEY) ||
4823 (adev->asic_type == CHIP_POLARIS11) ||
4824 (adev->asic_type == CHIP_POLARIS10)) {
4825 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4826 AMDGPU_DOORBELL_KIQ << 2);
4827 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4828 AMDGPU_DOORBELL_MEC_RING7 << 2);
4830 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4831 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4832 DOORBELL_OFFSET, ring->doorbell_index);
4833 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4834 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4835 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4836 mqd->cp_hqd_pq_doorbell_control = tmp;
4839 mqd->cp_hqd_pq_doorbell_control = 0;
4841 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4842 mqd->cp_hqd_pq_doorbell_control);
4844 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4846 mqd->cp_hqd_pq_wptr = ring->wptr;
4847 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4848 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4850 /* set the vmid for the queue */
4851 mqd->cp_hqd_vmid = 0;
4852 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4854 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4855 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4856 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4857 mqd->cp_hqd_persistent_state = tmp;
4858 if (adev->asic_type == CHIP_STONEY ||
4859 adev->asic_type == CHIP_POLARIS11 ||
4860 adev->asic_type == CHIP_POLARIS10) {
4861 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4862 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4863 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4866 /* activate the queue */
4867 mqd->cp_hqd_active = 1;
4868 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4870 vi_srbm_select(adev, 0, 0, 0, 0);
4871 mutex_unlock(&adev->srbm_mutex);
4873 amdgpu_bo_kunmap(ring->mqd_obj);
4874 amdgpu_bo_unreserve(ring->mqd_obj);
4878 tmp = RREG32(mmCP_PQ_STATUS);
4879 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4880 WREG32(mmCP_PQ_STATUS, tmp);
4883 gfx_v8_0_cp_compute_enable(adev, true);
4885 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4886 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4889 r = amdgpu_ring_test_ring(ring);
4891 ring->ready = false;
4897 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4901 if (!(adev->flags & AMD_IS_APU))
4902 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4904 if (!adev->pp_enabled) {
4905 if (!adev->firmware.smu_load) {
4906 /* legacy firmware loading */
4907 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4911 r = gfx_v8_0_cp_compute_load_microcode(adev);
4915 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4916 AMDGPU_UCODE_ID_CP_CE);
4920 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4921 AMDGPU_UCODE_ID_CP_PFP);
4925 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4926 AMDGPU_UCODE_ID_CP_ME);
4930 if (adev->asic_type == CHIP_TOPAZ) {
4931 r = gfx_v8_0_cp_compute_load_microcode(adev);
4935 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4936 AMDGPU_UCODE_ID_CP_MEC1);
4943 r = gfx_v8_0_cp_gfx_resume(adev);
4947 r = gfx_v8_0_cp_compute_resume(adev);
4951 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4956 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4958 gfx_v8_0_cp_gfx_enable(adev, enable);
4959 gfx_v8_0_cp_compute_enable(adev, enable);
4962 static int gfx_v8_0_hw_init(void *handle)
4965 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4967 gfx_v8_0_init_golden_registers(adev);
4969 gfx_v8_0_gpu_init(adev);
4971 r = gfx_v8_0_rlc_resume(adev);
4975 r = gfx_v8_0_cp_resume(adev);
4982 static int gfx_v8_0_hw_fini(void *handle)
4984 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4986 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4987 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4988 gfx_v8_0_cp_enable(adev, false);
4989 gfx_v8_0_rlc_stop(adev);
4990 gfx_v8_0_cp_compute_fini(adev);
4992 amdgpu_set_powergating_state(adev,
4993 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4998 static int gfx_v8_0_suspend(void *handle)
5000 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5002 return gfx_v8_0_hw_fini(adev);
5005 static int gfx_v8_0_resume(void *handle)
5007 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5009 return gfx_v8_0_hw_init(adev);
5012 static bool gfx_v8_0_is_idle(void *handle)
5014 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5016 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5022 static int gfx_v8_0_wait_for_idle(void *handle)
5026 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5028 for (i = 0; i < adev->usec_timeout; i++) {
5029 /* read MC_STATUS */
5030 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
5032 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5039 static int gfx_v8_0_soft_reset(void *handle)
5041 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5043 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5046 tmp = RREG32(mmGRBM_STATUS);
5047 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5048 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5049 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5050 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5051 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5052 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
5053 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5054 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5055 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5056 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5059 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5060 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5061 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5062 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5063 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5067 tmp = RREG32(mmGRBM_STATUS2);
5068 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5069 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5070 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5073 tmp = RREG32(mmSRBM_STATUS);
5074 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5075 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5076 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5078 if (grbm_soft_reset || srbm_soft_reset) {
5080 gfx_v8_0_rlc_stop(adev);
5082 /* Disable GFX parsing/prefetching */
5083 gfx_v8_0_cp_gfx_enable(adev, false);
5085 /* Disable MEC parsing/prefetching */
5086 gfx_v8_0_cp_compute_enable(adev, false);
5088 if (grbm_soft_reset || srbm_soft_reset) {
5089 tmp = RREG32(mmGMCON_DEBUG);
5090 tmp = REG_SET_FIELD(tmp,
5091 GMCON_DEBUG, GFX_STALL, 1);
5092 tmp = REG_SET_FIELD(tmp,
5093 GMCON_DEBUG, GFX_CLEAR, 1);
5094 WREG32(mmGMCON_DEBUG, tmp);
5099 if (grbm_soft_reset) {
5100 tmp = RREG32(mmGRBM_SOFT_RESET);
5101 tmp |= grbm_soft_reset;
5102 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5103 WREG32(mmGRBM_SOFT_RESET, tmp);
5104 tmp = RREG32(mmGRBM_SOFT_RESET);
5108 tmp &= ~grbm_soft_reset;
5109 WREG32(mmGRBM_SOFT_RESET, tmp);
5110 tmp = RREG32(mmGRBM_SOFT_RESET);
5113 if (srbm_soft_reset) {
5114 tmp = RREG32(mmSRBM_SOFT_RESET);
5115 tmp |= srbm_soft_reset;
5116 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5117 WREG32(mmSRBM_SOFT_RESET, tmp);
5118 tmp = RREG32(mmSRBM_SOFT_RESET);
5122 tmp &= ~srbm_soft_reset;
5123 WREG32(mmSRBM_SOFT_RESET, tmp);
5124 tmp = RREG32(mmSRBM_SOFT_RESET);
5127 if (grbm_soft_reset || srbm_soft_reset) {
5128 tmp = RREG32(mmGMCON_DEBUG);
5129 tmp = REG_SET_FIELD(tmp,
5130 GMCON_DEBUG, GFX_STALL, 0);
5131 tmp = REG_SET_FIELD(tmp,
5132 GMCON_DEBUG, GFX_CLEAR, 0);
5133 WREG32(mmGMCON_DEBUG, tmp);
5136 /* Wait a little for things to settle down */
5143 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5145 * @adev: amdgpu_device pointer
5147 * Fetches a GPU clock counter snapshot.
5148 * Returns the 64 bit clock counter snapshot.
5150 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5154 mutex_lock(&adev->gfx.gpu_clock_mutex);
5155 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5156 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5157 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5158 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5162 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5164 uint32_t gds_base, uint32_t gds_size,
5165 uint32_t gws_base, uint32_t gws_size,
5166 uint32_t oa_base, uint32_t oa_size)
5168 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5169 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5171 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5172 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5174 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5175 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5178 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5179 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5180 WRITE_DATA_DST_SEL(0)));
5181 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5182 amdgpu_ring_write(ring, 0);
5183 amdgpu_ring_write(ring, gds_base);
5186 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5187 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5188 WRITE_DATA_DST_SEL(0)));
5189 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5190 amdgpu_ring_write(ring, 0);
5191 amdgpu_ring_write(ring, gds_size);
5194 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5195 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5196 WRITE_DATA_DST_SEL(0)));
5197 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5198 amdgpu_ring_write(ring, 0);
5199 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5202 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5203 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5204 WRITE_DATA_DST_SEL(0)));
5205 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5206 amdgpu_ring_write(ring, 0);
5207 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5210 static int gfx_v8_0_early_init(void *handle)
5212 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5214 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5215 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5216 gfx_v8_0_set_ring_funcs(adev);
5217 gfx_v8_0_set_irq_funcs(adev);
5218 gfx_v8_0_set_gds_init(adev);
5219 gfx_v8_0_set_rlc_funcs(adev);
5224 static int gfx_v8_0_late_init(void *handle)
5226 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5229 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5233 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5237 /* requires IBs so do in late init after IB pool is initialized */
5238 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5242 amdgpu_set_powergating_state(adev,
5243 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5248 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5251 uint32_t data, temp;
5253 if (adev->asic_type == CHIP_POLARIS11)
5254 /* Send msg to SMU via Powerplay */
5255 amdgpu_set_powergating_state(adev,
5256 AMD_IP_BLOCK_TYPE_SMC,
5258 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5260 temp = data = RREG32(mmRLC_PG_CNTL);
5261 /* Enable static MGPG */
5263 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5265 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5268 WREG32(mmRLC_PG_CNTL, data);
5271 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5274 uint32_t data, temp;
5276 temp = data = RREG32(mmRLC_PG_CNTL);
5277 /* Enable dynamic MGPG */
5279 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5281 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5284 WREG32(mmRLC_PG_CNTL, data);
5287 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5290 uint32_t data, temp;
5292 temp = data = RREG32(mmRLC_PG_CNTL);
5293 /* Enable quick PG */
5295 data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5297 data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5300 WREG32(mmRLC_PG_CNTL, data);
5303 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5308 orig = data = RREG32(mmRLC_PG_CNTL);
5311 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5313 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5316 WREG32(mmRLC_PG_CNTL, data);
5319 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5324 orig = data = RREG32(mmRLC_PG_CNTL);
5327 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5329 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5332 WREG32(mmRLC_PG_CNTL, data);
5334 /* Read any GFX register to wake up GFX. */
5336 data = RREG32(mmDB_RENDER_CONTROL);
5339 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5342 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5343 cz_enable_gfx_cg_power_gating(adev, true);
5344 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5345 cz_enable_gfx_pipeline_power_gating(adev, true);
5347 cz_enable_gfx_cg_power_gating(adev, false);
5348 cz_enable_gfx_pipeline_power_gating(adev, false);
5352 static int gfx_v8_0_set_powergating_state(void *handle,
5353 enum amd_powergating_state state)
5355 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5356 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5358 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5361 switch (adev->asic_type) {
5364 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5365 cz_update_gfx_cg_power_gating(adev, enable);
5367 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5368 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5370 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5372 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5373 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5375 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5377 case CHIP_POLARIS11:
5378 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5379 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5381 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5383 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5384 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5386 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5388 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5389 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5391 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5400 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5401 uint32_t reg_addr, uint32_t cmd)
5405 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5407 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5408 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5410 data = RREG32(mmRLC_SERDES_WR_CTRL);
5411 if (adev->asic_type == CHIP_STONEY)
5412 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5413 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5414 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5415 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5416 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5417 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5418 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5419 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5420 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5422 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5423 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5424 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5425 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5426 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5427 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5428 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5429 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5430 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5431 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5432 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5433 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5434 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5435 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5436 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5438 WREG32(mmRLC_SERDES_WR_CTRL, data);
5441 #define MSG_ENTER_RLC_SAFE_MODE 1
5442 #define MSG_EXIT_RLC_SAFE_MODE 0
5444 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5445 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5446 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5448 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5453 data = RREG32(mmRLC_CNTL);
5454 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5457 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5458 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5459 AMD_PG_SUPPORT_GFX_DMG))) {
5460 data |= RLC_GPR_REG2__REQ_MASK;
5461 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5462 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5463 WREG32(mmRLC_GPR_REG2, data);
5465 for (i = 0; i < adev->usec_timeout; i++) {
5466 if ((RREG32(mmRLC_GPM_STAT) &
5467 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5468 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5469 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5470 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5475 for (i = 0; i < adev->usec_timeout; i++) {
5476 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5480 adev->gfx.rlc.in_safe_mode = true;
5484 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5489 data = RREG32(mmRLC_CNTL);
5490 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5493 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5494 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5495 AMD_PG_SUPPORT_GFX_DMG))) {
5496 data |= RLC_GPR_REG2__REQ_MASK;
5497 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5498 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5499 WREG32(mmRLC_GPR_REG2, data);
5500 adev->gfx.rlc.in_safe_mode = false;
5503 for (i = 0; i < adev->usec_timeout; i++) {
5504 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5510 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5515 data = RREG32(mmRLC_CNTL);
5516 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5519 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5520 data |= RLC_SAFE_MODE__CMD_MASK;
5521 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5522 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5523 WREG32(mmRLC_SAFE_MODE, data);
5525 for (i = 0; i < adev->usec_timeout; i++) {
5526 if ((RREG32(mmRLC_GPM_STAT) &
5527 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5528 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5529 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5530 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5535 for (i = 0; i < adev->usec_timeout; i++) {
5536 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5540 adev->gfx.rlc.in_safe_mode = true;
5544 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5549 data = RREG32(mmRLC_CNTL);
5550 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5553 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5554 if (adev->gfx.rlc.in_safe_mode) {
5555 data |= RLC_SAFE_MODE__CMD_MASK;
5556 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5557 WREG32(mmRLC_SAFE_MODE, data);
5558 adev->gfx.rlc.in_safe_mode = false;
5562 for (i = 0; i < adev->usec_timeout; i++) {
5563 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5569 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5571 adev->gfx.rlc.in_safe_mode = true;
5574 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5576 adev->gfx.rlc.in_safe_mode = false;
5579 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5580 .enter_safe_mode = cz_enter_rlc_safe_mode,
5581 .exit_safe_mode = cz_exit_rlc_safe_mode
5584 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5585 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5586 .exit_safe_mode = iceland_exit_rlc_safe_mode
5589 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5590 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5591 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5594 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5597 uint32_t temp, data;
5599 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5601 /* It is disabled by HW by default */
5602 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5603 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5604 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5605 /* 1 - RLC memory Light sleep */
5606 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5607 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5609 WREG32(mmRLC_MEM_SLP_CNTL, data);
5612 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5613 /* 2 - CP memory Light sleep */
5614 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5615 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5617 WREG32(mmCP_MEM_SLP_CNTL, data);
5621 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5622 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5623 if (adev->flags & AMD_IS_APU)
5624 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5625 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5626 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5628 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5629 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5630 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5631 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5634 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5636 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5637 gfx_v8_0_wait_for_rlc_serdes(adev);
5639 /* 5 - clear mgcg override */
5640 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5642 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5643 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5644 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5645 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5646 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5647 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5648 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5649 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5650 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5651 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5652 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5653 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5655 WREG32(mmCGTS_SM_CTRL_REG, data);
5659 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5660 gfx_v8_0_wait_for_rlc_serdes(adev);
5662 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5663 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5664 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5665 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5666 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5667 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5669 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5671 /* 2 - disable MGLS in RLC */
5672 data = RREG32(mmRLC_MEM_SLP_CNTL);
5673 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5674 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5675 WREG32(mmRLC_MEM_SLP_CNTL, data);
5678 /* 3 - disable MGLS in CP */
5679 data = RREG32(mmCP_MEM_SLP_CNTL);
5680 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5681 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5682 WREG32(mmCP_MEM_SLP_CNTL, data);
5685 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5686 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5687 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5688 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5690 WREG32(mmCGTS_SM_CTRL_REG, data);
5692 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5693 gfx_v8_0_wait_for_rlc_serdes(adev);
5695 /* 6 - set mgcg override */
5696 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5700 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5701 gfx_v8_0_wait_for_rlc_serdes(adev);
5704 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5707 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5710 uint32_t temp, temp1, data, data1;
5712 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5714 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5716 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5717 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5718 * Cmp_busy/GFX_Idle interrupts
5720 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5722 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5723 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5725 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5727 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5728 gfx_v8_0_wait_for_rlc_serdes(adev);
5730 /* 3 - clear cgcg override */
5731 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5733 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5734 gfx_v8_0_wait_for_rlc_serdes(adev);
5736 /* 4 - write cmd to set CGLS */
5737 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5739 /* 5 - enable cgcg */
5740 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5742 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5744 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5746 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5747 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5750 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5752 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5756 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5758 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5759 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5762 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5763 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5764 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5766 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5768 /* read gfx register to wake up cgcg */
5769 RREG32(mmCB_CGTT_SCLK_CTRL);
5770 RREG32(mmCB_CGTT_SCLK_CTRL);
5771 RREG32(mmCB_CGTT_SCLK_CTRL);
5772 RREG32(mmCB_CGTT_SCLK_CTRL);
5774 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5775 gfx_v8_0_wait_for_rlc_serdes(adev);
5777 /* write cmd to Set CGCG Overrride */
5778 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5780 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5781 gfx_v8_0_wait_for_rlc_serdes(adev);
5783 /* write cmd to Clear CGLS */
5784 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5786 /* disable cgcg, cgls should be disabled too. */
5787 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5788 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5790 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5793 gfx_v8_0_wait_for_rlc_serdes(adev);
5795 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5797 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5801 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5802 * === MGCG + MGLS + TS(CG/LS) ===
5804 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5805 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5807 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5808 * === CGCG + CGLS ===
5810 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5811 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5816 static int gfx_v8_0_set_clockgating_state(void *handle,
5817 enum amd_clockgating_state state)
5819 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5821 switch (adev->asic_type) {
5825 gfx_v8_0_update_gfx_clock_gating(adev,
5826 state == AMD_CG_STATE_GATE ? true : false);
5834 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5838 rptr = ring->adev->wb.wb[ring->rptr_offs];
5843 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5845 struct amdgpu_device *adev = ring->adev;
5848 if (ring->use_doorbell)
5849 /* XXX check if swapping is necessary on BE */
5850 wptr = ring->adev->wb.wb[ring->wptr_offs];
5852 wptr = RREG32(mmCP_RB0_WPTR);
5857 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5859 struct amdgpu_device *adev = ring->adev;
5861 if (ring->use_doorbell) {
5862 /* XXX check if swapping is necessary on BE */
5863 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5864 WDOORBELL32(ring->doorbell_index, ring->wptr);
5866 WREG32(mmCP_RB0_WPTR, ring->wptr);
5867 (void)RREG32(mmCP_RB0_WPTR);
5871 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5873 u32 ref_and_mask, reg_mem_engine;
5875 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5878 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5881 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5888 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5889 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5892 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5893 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5894 WAIT_REG_MEM_FUNCTION(3) | /* == */
5896 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5897 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5898 amdgpu_ring_write(ring, ref_and_mask);
5899 amdgpu_ring_write(ring, ref_and_mask);
5900 amdgpu_ring_write(ring, 0x20); /* poll interval */
5903 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5905 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5906 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5907 WRITE_DATA_DST_SEL(0) |
5909 amdgpu_ring_write(ring, mmHDP_DEBUG0);
5910 amdgpu_ring_write(ring, 0);
5911 amdgpu_ring_write(ring, 1);
5915 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5916 struct amdgpu_ib *ib,
5917 unsigned vm_id, bool ctx_switch)
5919 u32 header, control = 0;
5920 u32 next_rptr = ring->wptr + 5;
5926 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5927 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5928 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5929 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5930 amdgpu_ring_write(ring, next_rptr);
5932 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5934 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5935 amdgpu_ring_write(ring, 0);
5938 if (ib->flags & AMDGPU_IB_FLAG_CE)
5939 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5941 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5943 control |= ib->length_dw | (vm_id << 24);
5945 amdgpu_ring_write(ring, header);
5946 amdgpu_ring_write(ring,
5950 (ib->gpu_addr & 0xFFFFFFFC));
5951 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5952 amdgpu_ring_write(ring, control);
5955 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5956 struct amdgpu_ib *ib,
5957 unsigned vm_id, bool ctx_switch)
5959 u32 header, control = 0;
5960 u32 next_rptr = ring->wptr + 5;
5962 control |= INDIRECT_BUFFER_VALID;
5965 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5966 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5967 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5968 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5969 amdgpu_ring_write(ring, next_rptr);
5971 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5973 control |= ib->length_dw | (vm_id << 24);
5975 amdgpu_ring_write(ring, header);
5976 amdgpu_ring_write(ring,
5980 (ib->gpu_addr & 0xFFFFFFFC));
5981 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5982 amdgpu_ring_write(ring, control);
5985 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5986 u64 seq, unsigned flags)
5988 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5989 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5991 /* EVENT_WRITE_EOP - flush caches, send int */
5992 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5993 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5995 EOP_TC_WB_ACTION_EN |
5996 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5998 amdgpu_ring_write(ring, addr & 0xfffffffc);
5999 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6000 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6001 amdgpu_ring_write(ring, lower_32_bits(seq));
6002 amdgpu_ring_write(ring, upper_32_bits(seq));
6006 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6008 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6009 uint32_t seq = ring->fence_drv.sync_seq;
6010 uint64_t addr = ring->fence_drv.gpu_addr;
6012 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6013 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6014 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6015 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6016 amdgpu_ring_write(ring, addr & 0xfffffffc);
6017 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6018 amdgpu_ring_write(ring, seq);
6019 amdgpu_ring_write(ring, 0xffffffff);
6020 amdgpu_ring_write(ring, 4); /* poll interval */
6023 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
6024 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6025 amdgpu_ring_write(ring, 0);
6026 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6027 amdgpu_ring_write(ring, 0);
6031 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6032 unsigned vm_id, uint64_t pd_addr)
6034 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6036 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6037 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6038 WRITE_DATA_DST_SEL(0)) |
6041 amdgpu_ring_write(ring,
6042 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6044 amdgpu_ring_write(ring,
6045 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6047 amdgpu_ring_write(ring, 0);
6048 amdgpu_ring_write(ring, pd_addr >> 12);
6050 /* bits 0-15 are the VM contexts0-15 */
6051 /* invalidate the cache */
6052 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6053 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6054 WRITE_DATA_DST_SEL(0)));
6055 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6056 amdgpu_ring_write(ring, 0);
6057 amdgpu_ring_write(ring, 1 << vm_id);
6059 /* wait for the invalidate to complete */
6060 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6061 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6062 WAIT_REG_MEM_FUNCTION(0) | /* always */
6063 WAIT_REG_MEM_ENGINE(0))); /* me */
6064 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6065 amdgpu_ring_write(ring, 0);
6066 amdgpu_ring_write(ring, 0); /* ref */
6067 amdgpu_ring_write(ring, 0); /* mask */
6068 amdgpu_ring_write(ring, 0x20); /* poll interval */
6070 /* compute doesn't have PFP */
6072 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6073 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6074 amdgpu_ring_write(ring, 0x0);
6075 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6076 amdgpu_ring_write(ring, 0);
6077 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6078 amdgpu_ring_write(ring, 0);
6082 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6084 return ring->adev->wb.wb[ring->rptr_offs];
6087 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6089 return ring->adev->wb.wb[ring->wptr_offs];
6092 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6094 struct amdgpu_device *adev = ring->adev;
6096 /* XXX check if swapping is necessary on BE */
6097 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6098 WDOORBELL32(ring->doorbell_index, ring->wptr);
6101 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6105 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6106 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6108 /* RELEASE_MEM - flush caches, send int */
6109 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6110 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6112 EOP_TC_WB_ACTION_EN |
6113 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6115 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6116 amdgpu_ring_write(ring, addr & 0xfffffffc);
6117 amdgpu_ring_write(ring, upper_32_bits(addr));
6118 amdgpu_ring_write(ring, lower_32_bits(seq));
6119 amdgpu_ring_write(ring, upper_32_bits(seq));
6122 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6123 enum amdgpu_interrupt_state state)
6128 case AMDGPU_IRQ_STATE_DISABLE:
6129 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6130 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6131 TIME_STAMP_INT_ENABLE, 0);
6132 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6134 case AMDGPU_IRQ_STATE_ENABLE:
6135 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6137 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6138 TIME_STAMP_INT_ENABLE, 1);
6139 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6146 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6148 enum amdgpu_interrupt_state state)
6150 u32 mec_int_cntl, mec_int_cntl_reg;
6153 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6154 * handles the setting of interrupts for this specific pipe. All other
6155 * pipes' interrupts are set by amdkfd.
6161 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6164 DRM_DEBUG("invalid pipe %d\n", pipe);
6168 DRM_DEBUG("invalid me %d\n", me);
6173 case AMDGPU_IRQ_STATE_DISABLE:
6174 mec_int_cntl = RREG32(mec_int_cntl_reg);
6175 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6176 TIME_STAMP_INT_ENABLE, 0);
6177 WREG32(mec_int_cntl_reg, mec_int_cntl);
6179 case AMDGPU_IRQ_STATE_ENABLE:
6180 mec_int_cntl = RREG32(mec_int_cntl_reg);
6181 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6182 TIME_STAMP_INT_ENABLE, 1);
6183 WREG32(mec_int_cntl_reg, mec_int_cntl);
6190 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6191 struct amdgpu_irq_src *source,
6193 enum amdgpu_interrupt_state state)
6198 case AMDGPU_IRQ_STATE_DISABLE:
6199 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6200 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6201 PRIV_REG_INT_ENABLE, 0);
6202 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6204 case AMDGPU_IRQ_STATE_ENABLE:
6205 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6206 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6207 PRIV_REG_INT_ENABLE, 1);
6208 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6217 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6218 struct amdgpu_irq_src *source,
6220 enum amdgpu_interrupt_state state)
6225 case AMDGPU_IRQ_STATE_DISABLE:
6226 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6227 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6228 PRIV_INSTR_INT_ENABLE, 0);
6229 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6231 case AMDGPU_IRQ_STATE_ENABLE:
6232 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6233 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6234 PRIV_INSTR_INT_ENABLE, 1);
6235 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6244 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6245 struct amdgpu_irq_src *src,
6247 enum amdgpu_interrupt_state state)
6250 case AMDGPU_CP_IRQ_GFX_EOP:
6251 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6253 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6254 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6256 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6257 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6259 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6260 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6262 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6263 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6265 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6266 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6268 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6269 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6271 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6272 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6274 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6275 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6283 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6284 struct amdgpu_irq_src *source,
6285 struct amdgpu_iv_entry *entry)
6288 u8 me_id, pipe_id, queue_id;
6289 struct amdgpu_ring *ring;
6291 DRM_DEBUG("IH: CP EOP\n");
6292 me_id = (entry->ring_id & 0x0c) >> 2;
6293 pipe_id = (entry->ring_id & 0x03) >> 0;
6294 queue_id = (entry->ring_id & 0x70) >> 4;
6298 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6302 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6303 ring = &adev->gfx.compute_ring[i];
6304 /* Per-queue interrupt is supported for MEC starting from VI.
6305 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6307 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6308 amdgpu_fence_process(ring);
6315 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6316 struct amdgpu_irq_src *source,
6317 struct amdgpu_iv_entry *entry)
6319 DRM_ERROR("Illegal register access in command stream\n");
6320 schedule_work(&adev->reset_work);
6324 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6325 struct amdgpu_irq_src *source,
6326 struct amdgpu_iv_entry *entry)
6328 DRM_ERROR("Illegal instruction in command stream\n");
6329 schedule_work(&adev->reset_work);
6333 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6335 .early_init = gfx_v8_0_early_init,
6336 .late_init = gfx_v8_0_late_init,
6337 .sw_init = gfx_v8_0_sw_init,
6338 .sw_fini = gfx_v8_0_sw_fini,
6339 .hw_init = gfx_v8_0_hw_init,
6340 .hw_fini = gfx_v8_0_hw_fini,
6341 .suspend = gfx_v8_0_suspend,
6342 .resume = gfx_v8_0_resume,
6343 .is_idle = gfx_v8_0_is_idle,
6344 .wait_for_idle = gfx_v8_0_wait_for_idle,
6345 .soft_reset = gfx_v8_0_soft_reset,
6346 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6347 .set_powergating_state = gfx_v8_0_set_powergating_state,
6350 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6351 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6352 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6353 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6355 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6356 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6357 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6358 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6359 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6360 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6361 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6362 .test_ring = gfx_v8_0_ring_test_ring,
6363 .test_ib = gfx_v8_0_ring_test_ib,
6364 .insert_nop = amdgpu_ring_insert_nop,
6365 .pad_ib = amdgpu_ring_generic_pad_ib,
6368 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6369 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6370 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6371 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6373 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6374 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6375 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6376 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6377 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6378 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6379 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6380 .test_ring = gfx_v8_0_ring_test_ring,
6381 .test_ib = gfx_v8_0_ring_test_ib,
6382 .insert_nop = amdgpu_ring_insert_nop,
6383 .pad_ib = amdgpu_ring_generic_pad_ib,
6386 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6390 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6391 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6393 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6394 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6397 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6398 .set = gfx_v8_0_set_eop_interrupt_state,
6399 .process = gfx_v8_0_eop_irq,
6402 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6403 .set = gfx_v8_0_set_priv_reg_fault_state,
6404 .process = gfx_v8_0_priv_reg_irq,
6407 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6408 .set = gfx_v8_0_set_priv_inst_fault_state,
6409 .process = gfx_v8_0_priv_inst_irq,
6412 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6414 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6415 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6417 adev->gfx.priv_reg_irq.num_types = 1;
6418 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6420 adev->gfx.priv_inst_irq.num_types = 1;
6421 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6424 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6426 switch (adev->asic_type) {
6428 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6432 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6435 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6440 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6442 /* init asci gds info */
6443 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6444 adev->gds.gws.total_size = 64;
6445 adev->gds.oa.total_size = 16;
6447 if (adev->gds.mem.total_size == 64 * 1024) {
6448 adev->gds.mem.gfx_partition_size = 4096;
6449 adev->gds.mem.cs_partition_size = 4096;
6451 adev->gds.gws.gfx_partition_size = 4;
6452 adev->gds.gws.cs_partition_size = 4;
6454 adev->gds.oa.gfx_partition_size = 4;
6455 adev->gds.oa.cs_partition_size = 1;
6457 adev->gds.mem.gfx_partition_size = 1024;
6458 adev->gds.mem.cs_partition_size = 1024;
6460 adev->gds.gws.gfx_partition_size = 16;
6461 adev->gds.gws.cs_partition_size = 16;
6463 adev->gds.oa.gfx_partition_size = 4;
6464 adev->gds.oa.cs_partition_size = 4;
6468 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6476 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6477 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6479 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6482 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6486 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6487 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6489 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6490 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6492 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6494 return (~data) & mask;
6497 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6499 int i, j, k, counter, active_cu_number = 0;
6500 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6501 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6502 unsigned disable_masks[4 * 2];
6504 memset(cu_info, 0, sizeof(*cu_info));
6506 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6508 mutex_lock(&adev->grbm_idx_mutex);
6509 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6510 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6514 gfx_v8_0_select_se_sh(adev, i, j);
6516 gfx_v8_0_set_user_cu_inactive_bitmap(
6517 adev, disable_masks[i * 2 + j]);
6518 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6519 cu_info->bitmap[i][j] = bitmap;
6521 for (k = 0; k < 16; k ++) {
6522 if (bitmap & mask) {
6529 active_cu_number += counter;
6530 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6533 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6534 mutex_unlock(&adev->grbm_idx_mutex);
6536 cu_info->number = active_cu_number;
6537 cu_info->ao_cu_mask = ao_cu_mask;