
* fix.. speed_limit error... * draw tpms settings. * fix.. traffic light stopping only.. * fix.. waze cam * fix.. waze... * add setting (Enable comma connect ) * auto detect LFA2 * fix.. cruisespeed1 * vff2 driving model. * fix.. * agnos 12.3 * fix.. * ff * ff * test * ff * fix.. drawTurnInfo.. * Update drive_helpers.py * fix.. support eng voice eng sounds fix settings... english fix.. mph.. fix.. roadlimit speed bug.. * new vff model.. 250608 * fix soundd.. * fix safe exit speed.. * fix.. sounds. * fix.. radar timeStep.. * KerryGold model * Update drive_helpers.py * fix.. model. * fix.. * fix.. * Revert "fix.." This reverts commit b09ec459afb855c533d47fd7e8a1a6b1a09466e7. * Revert "fix.." This reverts commit 290bec6b83a4554ca232d531a911edccf94a2156. * fix esim * add more acc table. 10kph * kg update.. * fix cruisebutton mode3 * test atc..cond. * fix.. canfd * fix.. angle control limit
47 lines
1.3 KiB
Python
47 lines
1.3 KiB
Python
# https://tvm.apache.org/docs/tutorial/tensor_expr_get_started.html#example-2-manually-optimizing-matrix-multiplication-with-te
|
|
|
|
M, N, K = 1024, 1024, 1024
|
|
|
|
try:
|
|
import tvm
|
|
from tvm import te
|
|
#print(tvm.target.Target.list_kinds())
|
|
|
|
# c, opencl
|
|
target = tvm.target.Target(target="c")
|
|
|
|
# TVM Matrix Multiplication using TE
|
|
k = te.reduce_axis((0, K), "k")
|
|
A = te.placeholder((M, K), name="A")
|
|
B = te.placeholder((K, N), name="B")
|
|
C = te.compute((M, N), lambda x, y: te.sum(A[x, k] * B[k, y], axis=k), name="C")
|
|
|
|
# Default schedule
|
|
s = te.create_schedule(C.op)
|
|
#print(tvm.lower(s, [A, B, C], simple_mode=True))
|
|
|
|
# Output C code
|
|
func = tvm.build(s, [A, B, C], target=target, name="mmult")
|
|
print(func.get_source())
|
|
except ImportError:
|
|
print("** please install TVM for TVM output")
|
|
|
|
# tinygrad version
|
|
|
|
import os
|
|
from tinygrad.tensor import Tensor
|
|
|
|
# define the compute
|
|
A = Tensor.rand(M, K, device="CPU")
|
|
B = Tensor.rand(K, N, device="CPU")
|
|
C = (A.reshape(M, 1, K) * B.permute(1,0).reshape(1, N, K)).sum(axis=2)
|
|
|
|
sched = C.schedule()
|
|
from tinygrad.codegen.kernel import Kernel
|
|
from tinygrad.device import CompilerOptions
|
|
lin = Kernel(sched[-1].ast, CompilerOptions(has_local=False, supports_float4=False))
|
|
lin.linearize()
|
|
from tinygrad.runtime.ops_cpu import renderer
|
|
src = renderer("mmult", lin.uops)
|
|
print(src)
|