
* fix.. speed_limit error... * draw tpms settings. * fix.. traffic light stopping only.. * fix.. waze cam * fix.. waze... * add setting (Enable comma connect ) * auto detect LFA2 * fix.. cruisespeed1 * vff2 driving model. * fix.. * agnos 12.3 * fix.. * ff * ff * test * ff * fix.. drawTurnInfo.. * Update drive_helpers.py * fix.. support eng voice eng sounds fix settings... english fix.. mph.. fix.. roadlimit speed bug.. * new vff model.. 250608 * fix soundd.. * fix safe exit speed.. * fix.. sounds. * fix.. radar timeStep.. * KerryGold model * Update drive_helpers.py * fix.. model. * fix.. * fix.. * Revert "fix.." This reverts commit b09ec459afb855c533d47fd7e8a1a6b1a09466e7. * Revert "fix.." This reverts commit 290bec6b83a4554ca232d531a911edccf94a2156. * fix esim * add more acc table. 10kph * kg update.. * fix cruisebutton mode3 * test atc..cond. * fix.. canfd * fix.. angle control limit
76 lines
2.7 KiB
Python
76 lines
2.7 KiB
Python
import pickle, sys
|
|
from dataclasses import replace
|
|
from tinygrad import Device, Context, Tensor, GlobalCounters
|
|
from tinygrad.device import Buffer
|
|
from tinygrad.helpers import getenv, BEAM
|
|
from tinygrad.engine.jit import TinyJit
|
|
from tinygrad.engine.realize import CompiledRunner, ExecItem, ScheduleItem, lower_schedule_item
|
|
from tinygrad.renderer import ProgramSpec
|
|
from tinygrad.codegen.kernel import Kernel, Opt, OptOps
|
|
from tinygrad.codegen.heuristic import hand_coded_optimizations
|
|
import numpy as np
|
|
|
|
def move_jit_captured_to_dev(captured, device="DSP"):
|
|
captured.expected_st_vars_dtype_device = [x[:3] + (device,) for x in captured.expected_st_vars_dtype_device]
|
|
|
|
assign = {}
|
|
def move_buffer(b):
|
|
if b in assign: return assign[b]
|
|
|
|
if b._base is not None:
|
|
newbuf = Buffer(device, b.size, b.dtype, base=move_buffer(b._base), offset=b.offset)
|
|
else:
|
|
newbuf = Buffer(device, b.size, b.dtype)
|
|
if b.is_allocated(): newbuf.ensure_allocated().copyin(b.as_buffer())
|
|
assign[b] = newbuf
|
|
return assign[b]
|
|
|
|
for item in captured.jit_cache:
|
|
for b in item.bufs:
|
|
if b is not None: move_buffer(b)
|
|
captured.jit_cache = [ExecItem(item.prg, [assign.get(b,b) for b in item.bufs]) for item in captured.jit_cache]
|
|
return captured
|
|
|
|
if __name__ == "__main__":
|
|
with Context(DEBUG=0):
|
|
with open(sys.argv[1], "rb") as f:
|
|
fxn: TinyJit = pickle.load(f)
|
|
print(f"{f.tell()/1e6:.2f}M loaded")
|
|
print(type(fxn))
|
|
|
|
# Move all buffers to DSP device.
|
|
fxn.captured = move_jit_captured_to_dev(fxn.captured, "DSP")
|
|
new_jit = []
|
|
|
|
knum = 1
|
|
for ei in fxn.captured.jit_cache:
|
|
# skip the copy and the first kernel
|
|
if isinstance(ei.prg, CompiledRunner) and all(x is not None for x in ei.bufs):
|
|
if knum == (pknum:=getenv("KNUM", 0)) or pknum == 0:
|
|
p: ProgramSpec = ei.prg.p
|
|
k = Kernel(p.ast, Device["DSP"].renderer)
|
|
|
|
if getenv("VALIDATE"):
|
|
with Context(NOOPT=1):
|
|
lower_schedule_item(ScheduleItem(p.ast, ei.bufs)).run()
|
|
correct = ei.bufs[0].numpy()
|
|
ei.bufs[0].copyin(memoryview(bytearray(b'\x00'*ei.bufs[0].nbytes)))
|
|
GlobalCounters.kernel_count -= 1
|
|
|
|
if not getenv("NOOPT"): k.apply_opts(hand_coded_optimizations(k))
|
|
p2 = k.to_program()
|
|
new_ei = replace(ei, prg=CompiledRunner(p2))
|
|
new_ei.run()
|
|
new_jit.append(new_ei)
|
|
test = ei.bufs[0].numpy()
|
|
|
|
if getenv("VALIDATE"):
|
|
import numpy as np
|
|
np.testing.assert_allclose(correct, test, rtol=1e-3, atol=1e-3)
|
|
knum += 1
|
|
|
|
if getenv("RUN_JIT", 0):
|
|
fxn.captured.free_intermediates()
|
|
fxn.captured.jit_cache = new_jit
|
|
fxn(input=Tensor(np.zeros((1, 3, 224, 224), dtype=np.float32), device="DSP"))
|