
* fix.. speed_limit error... * draw tpms settings. * fix.. traffic light stopping only.. * fix.. waze cam * fix.. waze... * add setting (Enable comma connect ) * auto detect LFA2 * fix.. cruisespeed1 * vff2 driving model. * fix.. * agnos 12.3 * fix.. * ff * ff * test * ff * fix.. drawTurnInfo.. * Update drive_helpers.py * fix.. support eng voice eng sounds fix settings... english fix.. mph.. fix.. roadlimit speed bug.. * new vff model.. 250608 * fix soundd.. * fix safe exit speed.. * fix.. sounds. * fix.. radar timeStep.. * KerryGold model * Update drive_helpers.py * fix.. model. * fix.. * fix.. * Revert "fix.." This reverts commit b09ec459afb855c533d47fd7e8a1a6b1a09466e7. * Revert "fix.." This reverts commit 290bec6b83a4554ca232d531a911edccf94a2156. * fix esim * add more acc table. 10kph * kg update.. * fix cruisebutton mode3 * test atc..cond. * fix.. canfd * fix.. angle control limit
28 lines
1.0 KiB
Python
28 lines
1.0 KiB
Python
from tinygrad import Tensor, dtypes, GlobalCounters
|
|
|
|
if __name__ == "__main__":
|
|
t = Tensor.empty(81920, 4096, dtype=dtypes.half)
|
|
GlobalCounters.reset()
|
|
t.softmax(-1, dtype="half").realize()
|
|
GlobalCounters.reset()
|
|
t.softmax(-1, dtype="half", _single_kernel=True).realize()
|
|
|
|
from tinygrad.codegen.kernel import Kernel, Opt, OptOps
|
|
from tinygrad.helpers import get_single_element
|
|
GlobalCounters.reset()
|
|
si = get_single_element(t.softmax(-1, dtype="half", _single_kernel=True).schedule())
|
|
k = Kernel(si.ast)
|
|
#k.apply_opt(Opt(OptOps.UPCAST, 0, 4))
|
|
k.apply_opt(Opt(OptOps.UPCAST, 1, 4))
|
|
k.apply_opt(Opt(OptOps.LOCAL, 1, 32))
|
|
#k.apply_opt(Opt(OptOps.LOCAL, 0, 8))
|
|
k.apply_opt(Opt(OptOps.UNROLL, 1, 4))
|
|
k.apply_opt(Opt(OptOps.UNROLL, 0, 4))
|
|
#k.apply_opt(Opt(OptOps.GROUP, 1, 256))
|
|
#k.apply_opt(Opt(OptOps.GROUP, 0, 32))
|
|
#k.apply_opt(Opt(OptOps.GROUP, 1, 32))
|
|
#k.apply_opt(Opt(OptOps.GROUP, 0, 32))
|
|
from tinygrad.engine.realize import CompiledRunner, ExecItem
|
|
run = CompiledRunner(prg:=k.to_program())
|
|
ExecItem(run, si.bufs).run()
|