FrogAi 659adb6457 openpilot v0.9.7 release
date: 2024-03-17T10:14:38
master commit: 7e9a909e0e57ecb31df4c87c5b9a06b1204fd034
2024-05-24 17:43:27 -07:00

25 lines
1.2 KiB
Python

import functools
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
class CUDALanguage(CStyleLanguage):
kernel_prefix = "__global__ "
smem_prefix = "__shared__ "
smem_prefix_for_cast = False
arg_int_prefix = "const int"
barrier = "__syncthreads();"
float4 = "make_float4"
gid = [f'blockIdx.{chr(120+i)}' for i in range(3)]
lid = [f'threadIdx.{chr(120+i)}' for i in range(3)]
xid = [f'(blockIdx.{chr(120+i)}*blockDim.{chr(120+i)}+threadIdx.{chr(120+i)})' for i in range(3)]
half_prekernel = """
#include <cuda_fp16.h>
#include <mma.h>
using namespace nvcuda;
struct __align__(8) half4 {
half2 x, y;
__device__ __forceinline__ explicit half4(const float4& a): x(make_half2(__float2half(a.x), __float2half(a.y))), y(make_half2(__float2half(a.z),__float2half(a.w))) {}
__device__ __forceinline__ explicit operator float4() const {return make_float4(__half2float(x.x), __half2float(x.y), __half2float(y.x), __half2float(y.y)); }
};
""" # if not getenv("PTX") else fromimport("tinygrad.renderer.assembly_ptx", "uops_to_ptx_asm") # assembly_ptx currently isn't supported
CUDARenderer = functools.partial(uops_to_cstyle, CUDALanguage())