
* fix.. speed_limit error... * draw tpms settings. * fix.. traffic light stopping only.. * fix.. waze cam * fix.. waze... * add setting (Enable comma connect ) * auto detect LFA2 * fix.. cruisespeed1 * vff2 driving model. * fix.. * agnos 12.3 * fix.. * ff * ff * test * ff * fix.. drawTurnInfo.. * Update drive_helpers.py * fix.. support eng voice eng sounds fix settings... english fix.. mph.. fix.. roadlimit speed bug.. * new vff model.. 250608 * fix soundd.. * fix safe exit speed.. * fix.. sounds. * fix.. radar timeStep.. * KerryGold model * Update drive_helpers.py * fix.. model. * fix.. * fix.. * Revert "fix.." This reverts commit b09ec459afb855c533d47fd7e8a1a6b1a09466e7. * Revert "fix.." This reverts commit 290bec6b83a4554ca232d531a911edccf94a2156. * fix esim * add more acc table. 10kph * kg update.. * fix cruisebutton mode3 * test atc..cond. * fix.. canfd * fix.. angle control limit
38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
from tinygrad import Tensor, dtypes, Device
|
|
from tinygrad.helpers import getenv, DEBUG
|
|
from tinygrad.codegen.kernel import Kernel, Opt, OptOps
|
|
from tinygrad.engine.realize import CompiledRunner, ExecItem
|
|
from dataclasses import replace
|
|
|
|
N = 4096
|
|
if __name__ == "__main__":
|
|
if getenv("GEMV"):
|
|
A, B = Tensor.empty(1, N, dtype=dtypes.float), Tensor.empty(14336, N, dtype=dtypes.float16).T
|
|
else:
|
|
A, B = Tensor.empty(N, N, dtype=dtypes.float16), Tensor.empty(N, N, dtype=dtypes.float16)
|
|
C = A.matmul(B)
|
|
si = C.schedule()[-1]
|
|
ast = si.ast
|
|
k = Kernel(ast, opts=Device[Device.DEFAULT].renderer)
|
|
if getenv("GEMV"):
|
|
opts = [
|
|
Opt(op=OptOps.UNROLL, axis=0, amt=8),
|
|
Opt(op=OptOps.GROUP, axis=0, amt=32),
|
|
]
|
|
else:
|
|
opts = [
|
|
Opt(op=OptOps.TC, axis=0, amt=0),
|
|
Opt(op=OptOps.UPCAST, axis=0, amt=4),
|
|
Opt(op=OptOps.UPCAST, axis=1, amt=8),
|
|
Opt(op=OptOps.LOCAL, axis=0, amt=2),
|
|
Opt(op=OptOps.LOCAL, axis=1, amt=2),
|
|
Opt(op=OptOps.LOCAL, axis=0, amt=2),
|
|
]
|
|
k.apply_opts(opts)
|
|
prg = k.to_program()
|
|
new_src = prg.src
|
|
# can mod source here
|
|
prg = replace(prg, src=new_src)
|
|
ei = ExecItem(CompiledRunner(prg), [x.ensure_allocated() for x in si.bufs], si.metadata)
|
|
for i in range(5): ei.run(wait=True)
|