FrogAi 659adb6457 openpilot v0.9.7 release
date: 2024-03-17T10:14:38
master commit: 7e9a909e0e57ecb31df4c87c5b9a06b1204fd034
2024-05-24 17:43:27 -07:00

81 lines
2.4 KiB
ArmAsm

.global _start
_start:
.rodata
.align 0x10
.global code.kd
.type code.kd,STT_OBJECT
# amd_kernel_code_t (must be at 0x440 for kernel_code_entry_byte_offset to be right)
code.kd:
# amd_kernel_..., amd_machine_...
.long 0,0,0,0
# kernel_code_entry_byte_offset, kernel_code_prefetch_byte_offset
.long 0x00000bc0,0x00000000,0x00000000,0x00000000
# kernel_code_prefetch_byte_size, max_scratch_backing_memory_byte_size
.long 0,0,0,0
# compute_pgm_rsrc1, compute_pgm_rsrc2, kernel_code_properties, workitem_private_segment_byte_size
.long 0x60af0000,0x0000009e,0x00000408,0x00000000
# compute_pgm_rsrc1 |= AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32 | AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64
# compute_pgm_rsrc1 |= AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP | AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE
# compute_pgm_rsrc2 |= AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT = 0xF
# compute_pgm_rsrc2 |= AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X
# kernel_code_properties |= AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR = 1
# kernel_code_properties |= AMD_KERNEL_CODE_PROPERTIES_RESERVED1 = 1
.text
.global code
.type code,STT_FUNC
code:
# https://llvm.org/docs/AMDGPUUsage.html#initial-kernel-execution-state
# s[0:1] contains the kernarg_address
# TODO: can we use s[2:3] if this was really a wave since we only alloced 2 SGPRs?
s_load_b64 s[2:3], s[0:1], null
s_mov_b32 s8, 0
loop:
s_addk_i32 s8, 1
s_cmp_eq_u32 s8, 100000
// FLOPS
s_cbranch_scc0 loop
# wait for the s_load_b64
s_waitcnt lgkmcnt(0)
v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 2.0
global_store_b32 v0, v1, s[2:3]
# Deallocate all VGPRs for this wave. Use only when next instruction is S_ENDPGM.
s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
s_endpgm
s_code_end
.amdgpu_metadata
amdhsa.kernels:
- .args:
- .address_space: global
.name: a
.offset: 0
.size: 8
.type_name: 'float*'
.value_kind: global_buffer
.group_segment_fixed_size: 0
.kernarg_segment_align: 8
.kernarg_segment_size: 8
.language: OpenCL C
.language_version:
- 1
- 2
.max_flat_workgroup_size: 256
.name: code
.private_segment_fixed_size: 0
.sgpr_count: 2
.sgpr_spill_count: 0
.symbol: code.kd
.uses_dynamic_stack: false
.vgpr_count: 256
.vgpr_spill_count: 0
.wavefront_size: 32
amdhsa.target: amdgcn-amd-amdhsa--gfx1100
amdhsa.version:
- 1
- 2
.end_amdgpu_metadata