
* fix.. speed_limit error... * draw tpms settings. * fix.. traffic light stopping only.. * fix.. waze cam * fix.. waze... * add setting (Enable comma connect ) * auto detect LFA2 * fix.. cruisespeed1 * vff2 driving model. * fix.. * agnos 12.3 * fix.. * ff * ff * test * ff * fix.. drawTurnInfo.. * Update drive_helpers.py * fix.. support eng voice eng sounds fix settings... english fix.. mph.. fix.. roadlimit speed bug.. * new vff model.. 250608 * fix soundd.. * fix safe exit speed.. * fix.. sounds. * fix.. radar timeStep.. * KerryGold model * Update drive_helpers.py * fix.. model. * fix.. * fix.. * Revert "fix.." This reverts commit b09ec459afb855c533d47fd7e8a1a6b1a09466e7. * Revert "fix.." This reverts commit 290bec6b83a4554ca232d531a911edccf94a2156. * fix esim * add more acc table. 10kph * kg update.. * fix cruisebutton mode3 * test atc..cond. * fix.. canfd * fix.. angle control limit
39 lines
1.9 KiB
Python
39 lines
1.9 KiB
Python
from typing import List, Dict, cast
|
|
import ctypes
|
|
from tinygrad.helpers import dedup, cpu_time_execution, DEBUG
|
|
from tinygrad.engine.jit import GraphRunner, GraphException
|
|
from tinygrad.device import Buffer, Device
|
|
from tinygrad.engine.realize import ExecItem, CompiledRunner
|
|
from tinygrad.uop.ops import Variable
|
|
from tinygrad.runtime.ops_cpu import ClangProgram
|
|
from tinygrad.renderer.cstyle import ClangRenderer
|
|
render_dtype = ClangRenderer().render_dtype
|
|
|
|
class ClangGraph(GraphRunner):
|
|
def __init__(self, jit_cache: List[ExecItem], input_rawbuffers: List[Buffer], var_vals: Dict[Variable, int]):
|
|
super().__init__(jit_cache, input_rawbuffers, var_vals)
|
|
if not all(isinstance(ji.prg, CompiledRunner) for ji in jit_cache): raise GraphException
|
|
|
|
prgs = '\n'.join(dedup([cast(CompiledRunner, ji.prg).p.src for ji in jit_cache]))
|
|
args = [f"{render_dtype(x.dtype)}* arg{i}" for i,x in enumerate(input_rawbuffers)]
|
|
args += sorted([f"int {v.expr}" for v in var_vals])
|
|
code = ["void batched("+','.join(args)+") {"]
|
|
for ji in jit_cache:
|
|
args = []
|
|
for buf in ji.bufs:
|
|
assert buf is not None
|
|
if buf in input_rawbuffers:
|
|
args.append(f"arg{input_rawbuffers.index(buf)}")
|
|
else:
|
|
args.append(f"({render_dtype(buf.dtype)}*)0x{ctypes.addressof(buf._buf):X}")
|
|
args += [x.expr for x in cast(CompiledRunner, ji.prg).p.vars]
|
|
code.append(f" {cast(CompiledRunner, ji.prg).p.function_name}({','.join(args)});")
|
|
code.append("}")
|
|
if DEBUG >= 4: print("\n".join(code))
|
|
compiler = Device["CPU"].compiler
|
|
assert compiler is not None
|
|
self._prg = ClangProgram("batched", compiler.compile(prgs+"\n"+"\n".join(code))) # no point in caching the pointers
|
|
|
|
def __call__(self, rawbufs: List[Buffer], var_vals: Dict[Variable, int], wait=False):
|
|
return cpu_time_execution(
|
|
lambda: self._prg(*[x._buf for x in rawbufs], *[x[1] for x in sorted(var_vals.items(), key=lambda x: x[0].expr)]), enable=wait) |