carrot/tinygrad_repo/test/external/external_test_hip_compile.py

40 lines
1.6 KiB
Python
Raw Normal View History

import time, unittest
from tinygrad.runtime.support.hip_comgr import compile_hip
from tinygrad import Tensor
from tinygrad.device import Device
from tinygrad.engine.schedule import create_schedule
from tinygrad.codegen.kernel import Kernel
class TestHIPCompileSpeed(unittest.TestCase):
@unittest.skipIf(Device.DEFAULT != "HIP", "only run on HIP")
def test_hip_compile(self):
a, b = Tensor([1,2,3,4,5]), Tensor([1,2,3,4,5])
out = a + b
lin = Kernel(create_schedule([out.lazydata])[-1].ast[0])
lin.linearize()
reference = """
#include <hip/hip_common.h>
typedef long unsigned int size_t;
extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_local_id(unsigned int);
extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_group_id(unsigned int);
extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_local_size(unsigned int);
extern "C" __attribute__((global))void {name}(int* data0, const int* data1, const int* data2) {{
int gidx0 = __ockl_get_group_id(0); /* 5 */
int val0 = data1[gidx0];
int val1 = data2[gidx0];
data0[gidx0] = (val0+val1);
}}
"""
def time_compile(code):
st = time.perf_counter()
compile_hip(code)
return (time.perf_counter() - st) * 1000
tinygrad_tm = min([time_compile(Device[Device.DEFAULT].renderer.render(f"test{i}", lin.uops)) for i in range(10)])
ref_tm = min([time_compile(reference.format(name=f"test{i}")) for i in range(10)])
print(f"tinygrad {tinygrad_tm:6.2f} ms")
print(f"reference {ref_tm:6.2f} ms")
assert (tinygrad_tm - ref_tm) <= 10