KerryGoldModel, AGNOS12.3, ButtonMode3, autoDetectLFA2, (#181)

* fix.. speed_limit error...

* draw tpms settings.

* fix.. traffic light stopping only..

* fix.. waze cam

* fix.. waze...

* add setting (Enable comma connect )

* auto detect LFA2

* fix.. cruisespeed1

* vff2 driving model.

* fix..

* agnos 12.3

* fix..

* ff

* ff

* test

* ff

* fix.. drawTurnInfo..

* Update drive_helpers.py

* fix..

support eng  voice

eng sounds

fix settings... english

fix.. mph..

fix.. roadlimit speed bug..

* new vff model.. 250608

* fix soundd..

* fix safe exit speed..

* fix.. sounds.

* fix.. radar timeStep..

* KerryGold model

* Update drive_helpers.py

* fix.. model.

* fix..

* fix..

* Revert "fix.."

This reverts commit b09ec459afb855c533d47fd7e8a1a6b1a09466e7.

* Revert "fix.."

This reverts commit 290bec6b83a4554ca232d531a911edccf94a2156.

* fix esim

* add more acc table. 10kph

* kg update..

* fix cruisebutton mode3

* test atc..cond.

* fix.. canfd

* fix.. angle control limit
This commit is contained in:
carrot 2025-06-13 15:59:36 +09:00 committed by GitHub
parent 15e6ed3e3b
commit efee1712aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
432 changed files with 25034 additions and 1096533 deletions

View File

@ -1,9 +1,11 @@
import os
import capnp
from importlib.resources import as_file, files
CEREAL_PATH = os.path.dirname(os.path.abspath(__file__))
capnp.remove_import_hook()
with as_file(files("cereal")) as fspath:
CEREAL_PATH = fspath.as_posix()
log = capnp.load(os.path.join(CEREAL_PATH, "log.capnp"))
car = capnp.load(os.path.join(CEREAL_PATH, "car.capnp"))
custom = capnp.load(os.path.join(CEREAL_PATH, "custom.capnp"))

View File

@ -48,6 +48,7 @@ struct OnroadEvent @0xc4fa6047f024e718 {
preEnableStandstill @12; # added during pre-enable state with brake
gasPressedOverride @13; # added when user is pressing gas with no disengage on gas
steerOverride @14;
steerDisengage @94; # exits active state
cruiseDisabled @15;
speedTooLow @16;
outOfSpace @17;
@ -126,10 +127,11 @@ struct OnroadEvent @0xc4fa6047f024e718 {
espActive @90;
personalityChanged @91;
aeb @92;
userFlag @95;
softHold @115;
trafficStopping @94;
audioPrompt @95;
trafficStopping @116;
audioPrompt @117;
audioRefuse @96;
stopStop @97;
audioLaneChange @98;
@ -512,6 +514,7 @@ struct DeviceState @0xa4d8b5af2aa492eb {
# device thermals
cpuTempC @26 :List(Float32);
gpuTempC @27 :List(Float32);
dspTempC @49 :Float32;
memoryTempC @28 :Float32;
nvmeTempC @35 :List(Float32);
modemTempC @36 :List(Float32);

View File

@ -140,6 +140,7 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
{"SupportedCars", PERSISTENT},
{"SupportedCars_gm", PERSISTENT},
{"ShowDebugUI", PERSISTENT},
{"ShowTpms", PERSISTENT},
{"ShowDateTime", PERSISTENT},
{"ShowPathEnd", PERSISTENT},
{"ShowCustomBrightness", PERSISTENT},
@ -203,6 +204,7 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
{"TrafficLightDetectMode", PERSISTENT},
{"SteerActuatorDelay", PERSISTENT},
{"CruiseOnDist", PERSISTENT},
{"CruiseMaxVals0", PERSISTENT},
{"CruiseMaxVals1", PERSISTENT},
{"CruiseMaxVals2", PERSISTENT},
{"CruiseMaxVals3", PERSISTENT},
@ -262,6 +264,7 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
{"SpeedFromPCM", PERSISTENT},
{"MaxTimeOffroadMin", PERSISTENT},
{"DisableDM", PERSISTENT},
{"EnableConnect", PERSISTENT},
{"MuteDoor", PERSISTENT},
{"MuteSeatbelt", PERSISTENT},
{"CarrotException", CLEAR_ON_MANAGER_START},

View File

@ -1,3 +1,25 @@
import os
import subprocess
def sudo_write(val: str, path: str) -> None:
try:
with open(path, 'w') as f:
f.write(str(val))
except PermissionError:
os.system(f"sudo chmod a+w {path}")
try:
with open(path, 'w') as f:
f.write(str(val))
except PermissionError:
# fallback for debugfs files
os.system(f"sudo su -c 'echo {val} > {path}'")
def sudo_read(path: str) -> str:
try:
return subprocess.check_output(f"sudo cat {path}", shell=True, encoding='utf8').strip()
except Exception:
return ""
class MovingAverage:
def __init__(self, window_size: int):
self.window_size: int = window_size

View File

@ -7,7 +7,7 @@ export OPENBLAS_NUM_THREADS=1
export VECLIB_MAXIMUM_THREADS=1
if [ -z "$AGNOS_VERSION" ]; then
export AGNOS_VERSION="12.1"
export AGNOS_VERSION="12.3"
fi
export STAGING_ROOT="/data/safe_staging"

View File

@ -798,5 +798,5 @@ struct CarParams {
maxSteeringAngleDegDEPRECATED @54 :Float32;
longitudinalActuatorDelayLowerBoundDEPRECATED @61 :Float32;
stoppingControlDEPRECATED @31 :Bool; # Does the car allow full control even at lows speeds when stopping
radarTimeStepDEPRECATED @45: Float32 = 0.05; # time delta between radar updates, 20Hz is very standard
radarTimeStep @45: Float32; # time delta between radar updates, 20Hz is very standard
}

View File

@ -450,7 +450,7 @@ class CarState(CarStateBase):
self.hda_info_4a3 = copy.copy(cp.vl.get("HDA_INFO_4A3", {}))
speedLimit = self.hda_info_4a3["SPEED_LIMIT"]
if not self.is_metric:
speedLimit = CV.MPH_TO_KPH
speedLimit *= CV.MPH_TO_KPH
ret.speedLimit = speedLimit if speedLimit < 255 else 0
if int(self.hda_info_4a3["NEW_SIGNAL_4"]) == 17:
speed_limit_cam = True

View File

@ -57,6 +57,10 @@ class CarInterface(CarInterfaceBase):
if 0x4a3 in fingerprint[CAN.ECAN]:
ret.extFlags |= HyundaiExtFlags.CANFD_4A3.value
if 203 in fingerprint[CAN.CAM]: # LFA_ALT
print("##### Anglecontrol detected (LFA_ALT)")
ret.flags |= HyundaiFlags.ANGLE_CONTROL.value
# detect HDA2 with ADAS Driving ECU
if hda2:
print("$$$CANFD HDA2")
@ -188,6 +192,8 @@ class CarInterface(CarInterfaceBase):
#ret.radarUnavailable = False # TODO: canfd... carrot, hyundai cars have radar
ret.radarTimeStep = 0.05 if params.get_int("EnableRadarTracks") > 0 else 0.02
ret.pcmCruise = not ret.openpilotLongitudinalControl
ret.startingState = False # True # carrot
ret.vEgoStarting = 0.1

View File

@ -16,7 +16,7 @@ class CarControllerParams:
ACCEL_MAX = 2.5 # m/s
ANGLE_LIMITS: AngleSteeringLimits = AngleSteeringLimits(
# LKAS angle command is unlimited, but LFA is limited to 176.7 deg (but does not fault if requesting above)
180, # deg
175, # deg
# stock comma's
#([0, 9, 16, 25], [1.4, 0.6, 0.4, 0.1]),
#([0, 9, 16, 25], [1.4, 0.7, 0.5, 0.1]),

View File

@ -154,8 +154,12 @@ class RadarInterfaceBase(ABC):
self.init_done = False
def estimate_dt(self, rcv_time):
if len(self.init_samples) > 50:
estimated_dt = np.mean(np.diff(self.init_samples[20:]))
if self.CP.radarTimeStep > 0.0:
self.dt = self.CP.radarTimeStep
self.init_done = True
print(f"Using radar dt: {self.dt} sec")
elif len(self.init_samples) > 100:
estimated_dt = np.mean(np.diff(self.init_samples[50:]))
self.dt = estimated_dt
self.init_done = True
print(f"Estimated radar dt: {self.dt} sec")

View File

@ -260,6 +260,7 @@ BO_ 352 ADRV_0x160: 16 ADRV
SG_ SET_ME_FC : 72|8@1+ (1,0) [0|255] "" XXX
SG_ SET_ME_9 : 80|8@1+ (1,0) [0|255] "" XXX
SG_ NEW_SIGNAL_3 : 95|8@0+ (1,0) [0|255] "" XXX
SG_ NEW_SIGNAL_7 : 102|1@0+ (1,0) [0|1] "" XXX
BO_ 353 ADRV_0x161: 32 ADRV
SG_ CHECKSUM : 0|16@1+ (1,0) [0|65535] "" XXX

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -236,7 +236,7 @@ class VCruiseCarrot:
#self.event = event
self._log_timer = self._log_timeout
def update_params(self):
def update_params(self, is_metric):
if self.frame % 10 == 0:
self.autoCruiseControl = self.params.get_int("AutoCruiseControl")
self.autoGasTokSpeed = self.params.get_int("AutoGasTokSpeed")
@ -252,17 +252,25 @@ class VCruiseCarrot:
self._paddle_mode = self.params.get_int("PaddleMode")
self._cruise_button_mode = self.params.get_int("CruiseButtonMode")
self._lfa_button_mode = self.params.get_int("LfaButtonMode")
self.autoRoadSpeedLimitOffset = self.params.get_int("AutoRoadSpeedLimitOffset")
self.autoNaviSpeedSafetyFactor = self.params.get_float("AutoNaviSpeedSafetyFactor") * 0.01
self.cruiseOnDist = self.params.get_float("CruiseOnDist") * 0.01
cruiseSpeed1 = self.params.get_float("CruiseSpeed1")
cruiseSpeed2 = self.params.get_float("CruiseSpeed2")
cruiseSpeed3 = self.params.get_float("CruiseSpeed3")
cruiseSpeed4 = self.params.get_float("CruiseSpeed4")
cruiseSpeed5 = self.params.get_float("CruiseSpeed5")
unit_factor = 1.0 if is_metric else CV.MPH_TO_KPH
cruiseSpeed1 = self.params.get_float("CruiseSpeed1") * unit_factor
cruiseSpeed2 = self.params.get_float("CruiseSpeed2") * unit_factor
cruiseSpeed3 = self.params.get_float("CruiseSpeed3") * unit_factor
cruiseSpeed4 = self.params.get_float("CruiseSpeed4") * unit_factor
cruiseSpeed5 = self.params.get_float("CruiseSpeed5") * unit_factor
if cruiseSpeed1 <= 0:
if self.autoRoadSpeedLimitOffset < 0:
cruiseSpeed1 = self.nRoadLimitSpeed * self.autoNaviSpeedSafetyFactor
else:
cruiseSpeed1 = self.nRoadLimitSpeed + self.autoRoadSpeedLimitOffset
self._cruise_speed_table = [cruiseSpeed1, cruiseSpeed2, cruiseSpeed3, cruiseSpeed4, cruiseSpeed5]
def update_v_cruise(self, CS, sm, is_metric):
self._add_log("")
self.update_params()
self.update_params(is_metric)
self.frame += 1
if CS.gearShifter != GearShifter.drive:
self.autoCruiseControl_cancel_timer = 20 * 100 # 20 sec

View File

@ -39,7 +39,7 @@ class TrafficState(Enum):
def __str__(self):
return self.name
A_CRUISE_MAX_BP_CARROT = [0., 40 * CV.KPH_TO_MS, 60 * CV.KPH_TO_MS, 80 * CV.KPH_TO_MS, 110 * CV.KPH_TO_MS, 140 * CV.KPH_TO_MS]
A_CRUISE_MAX_BP_CARROT = [0., 10 * CV.KPH_TO_MS, 40 * CV.KPH_TO_MS, 60 * CV.KPH_TO_MS, 80 * CV.KPH_TO_MS, 110 * CV.KPH_TO_MS, 140 * CV.KPH_TO_MS]
class CarrotPlanner:
def __init__(self):
@ -102,6 +102,7 @@ class CarrotPlanner:
self.dynamicTFollow = 0.0
self.dynamicTFollowLC = 0.0
self.cruiseMaxVals0 = 1.6
self.cruiseMaxVals1 = 1.6
self.cruiseMaxVals2 = 1.2
self.cruiseMaxVals3 = 1.0
@ -162,6 +163,7 @@ class CarrotPlanner:
self.dynamicTFollow = self.params.get_float("DynamicTFollow") / 100.
self.dynamicTFollowLC = self.params.get_float("DynamicTFollowLC") / 100.
elif self.params_count == 30:
self.cruiseMaxVals0 = self.params.get_float("CruiseMaxVals0") / 100.
self.cruiseMaxVals1 = self.params.get_float("CruiseMaxVals1") / 100.
self.cruiseMaxVals2 = self.params.get_float("CruiseMaxVals2") / 100.
self.cruiseMaxVals3 = self.params.get_float("CruiseMaxVals3") / 100.
@ -179,7 +181,7 @@ class CarrotPlanner:
self.params_count = 0
def get_carrot_accel(self, v_ego):
cruiseMaxVals = [self.cruiseMaxVals1, self.cruiseMaxVals2, self.cruiseMaxVals3, self.cruiseMaxVals4, self.cruiseMaxVals5, self.cruiseMaxVals6]
cruiseMaxVals = [self.cruiseMaxVals0, self.cruiseMaxVals1, self.cruiseMaxVals2, self.cruiseMaxVals3, self.cruiseMaxVals4, self.cruiseMaxVals5, self.cruiseMaxVals6]
factor = self.myHighModeFactor if self.myDrivingMode == DrivingMode.High else self.mySafeFactor
return np.interp(v_ego, A_CRUISE_MAX_BP_CARROT, cruiseMaxVals) * factor
@ -387,8 +389,6 @@ class CarrotPlanner:
if self.myDrivingMode == DrivingMode.High or self.trafficLightDetectMode == 0:
self.trafficState = TrafficState.off
if self.trafficState == TrafficState.green and self.trafficLightDetectMode == 1: # Stopping only
self.trafficState = TrafficState.off
if abs(carstate.steeringAngleDeg) > 20:
self.trafficState = TrafficState.off
@ -407,7 +407,7 @@ class CarrotPlanner:
elif lead_detected and (radarstate.leadOne.dRel - stop_model_x) < 2.0:
self.xState = XState.lead
elif self.stopping_count == 0:
if self.trafficState == TrafficState.green and not self.carrot_stay_stop and not carstate.leftBlinker:
if self.trafficState == TrafficState.green and not self.carrot_stay_stop and not carstate.leftBlinker and self.trafficLightDetectMode != 1:
self.xState = XState.e2ePrepare
self.events.add(EventName.trafficSignGreen)
self.stopping_count = max(0, self.stopping_count - 1)
@ -501,7 +501,7 @@ class DrivingModeDetector:
self.speed_threshold = 2 # (km/h)
self.accel_threshold = 1.5 # (m/s^2)
self.distance_threshold = 12 # (m)
self.lead_speed_exit_threshold = 20 # (km/h)
self.lead_speed_exit_threshold = 35 # (km/h)
def update_data(self, my_speed, lead_speed, my_accel, lead_accel, distance):
# 1. 정체 조건: 앞차가 가까이 있고 정지된 상황

View File

@ -853,6 +853,7 @@ class CarrotMan:
host = '0.0.0.0' # 혹은 다른 호스트 주소
port = 7709 # 포트 번호
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind((host, port))
s.listen()
@ -915,7 +916,9 @@ class CarrotMan:
except Exception as e:
print(e)
except Exception as e:
print("################# CarrotMan route server error #####################")
print(e)
def carrot_curve_speed_params(self):
self.autoCurveSpeedFactor = self.params.get_int("AutoCurveSpeedFactor")*0.01
@ -1553,7 +1556,7 @@ class CarrotServ:
print(f"{id_str}: {distance} m")
xSpdType = -1
if 'camera' in id_str:
xSpdType = 1
xSpdType = 101 # 101: waze speed cam, 100: police
elif 'police' in id_str:
xSpdType = 100
@ -1616,7 +1619,7 @@ class CarrotServ:
self.nGoPosDist = 0
self.update_nav_instruction(sm)
if self.xSpdType < 0 or (self.xSpdType != 100 and self.xSpdDist <= 0) or (self.xSpdType == 100 and self.xSpdDist < -250):
if self.xSpdType < 0 or (self.xSpdType not in [100,101] and self.xSpdDist <= 0) or (self.xSpdType in [100,101] and self.xSpdDist < -250):
self.xSpdType = -1
self.xSpdDist = self.xSpdLimit = 0
if self.xTurnInfo < 0 or self.xDistToTurn < -50:
@ -1628,12 +1631,12 @@ class CarrotServ:
sdi_speed = 250
hda_active = False
### 과속카메라, 사고방지턱
if (self.xSpdDist > 0 or self.xSpdType == 100) and self.active_carrot > 0:
if (self.xSpdDist > 0 or self.xSpdType in [100, 101]) and self.active_carrot > 0:
safe_sec = self.autoNaviSpeedBumpTime if self.xSpdType == 22 else self.autoNaviSpeedCtrlEnd
decel = self.autoNaviSpeedDecelRate
sdi_speed = min(sdi_speed, self.calculate_current_speed(self.xSpdDist, self.xSpdLimit, safe_sec, decel))
self.active_carrot = 5 if self.xSpdType == 22 else 3
if self.xSpdType == 4 or (self.xSpdType == 100 and self.xSpdDist <= 0):
if self.xSpdType == 4 or (self.xSpdType in [100, 101] and self.xSpdDist <= 0):
sdi_speed = self.xSpdLimit
self.active_carrot = 4
elif CS is not None and CS.speedLimit > 0 and CS.speedLimitDistance > 0:

View File

@ -302,11 +302,24 @@
},
{
"group": "가속설정",
"name": "CruiseMaxVals1",
"title": "가속설정1:0km/h(160)",
"name": "CruiseMaxVals0",
"title": "가속설정0:0km/h(160)",
"descr": "",
"egroup": "ACCEL",
"etitle": "AccelSet1:0km/h(160)",
"etitle": "AccelSet0:0km/h(160)",
"edescr": "",
"min": 1,
"max": 250,
"default": 160,
"unit": 5
},
{
"group": "가속설정",
"name": "CruiseMaxVals1",
"title": "가속설정1:10km/h(160)",
"descr": "",
"egroup": "ACCEL",
"etitle": "AccelSet1:10km/h(160)",
"edescr": "",
"min": 1,
"max": 250,
@ -642,11 +655,11 @@
"group": "버튼설정",
"name": "CruiseSpeed1",
"title": "크루즈속도1(30)",
"descr": "버튼모드3, 단계별속도",
"descr": "버튼모드3, 단계별속도, 0:RoadLimitSpeed+offset",
"egroup": "BUTN",
"etitle": "Cruise Speed1(30)",
"edescr": "CruiseButtonMode2, prefer speed",
"min": 1,
"edescr": "CruiseButtonMode3, prefer speed, 0:RoadLimitSpeed+offset",
"min": 0,
"max": 160,
"default": 10,
"unit": 10
@ -658,7 +671,7 @@
"descr": "버튼모드3, 단계별속도",
"egroup": "BUTN",
"etitle": "Cruise Speed2(50)",
"edescr": "CruiseButtonMode2, prefer speed",
"edescr": "CruiseButtonMode3, prefer speed",
"min": 1,
"max": 160,
"default": 10,
@ -671,7 +684,7 @@
"descr": "버튼모드3, 단계별속도",
"egroup": "BUTN",
"etitle": "Cruise Speed3(80)",
"edescr": "CruiseButtonMode2, prefer speed",
"edescr": "CruiseButtonMode3, prefer speed",
"min": 1,
"max": 160,
"default": 10,
@ -684,7 +697,7 @@
"descr": "버튼모드3, 단계별속도",
"egroup": "BUTN",
"etitle": "Cruise Speed4(110)",
"edescr": "CruiseButtonMode2, prefer speed",
"edescr": "CruiseButtonMode3, prefer speed",
"min": 1,
"max": 160,
"default": 10,
@ -697,7 +710,7 @@
"descr": "버튼모드3, 단계별속도",
"egroup": "BUTN",
"etitle": "Cruise Speed5(130)",
"edescr": "CruiseButtonMode2, prefer speed",
"edescr": "CruiseButtonMode3, prefer speed",
"min": 1,
"max": 160,
"default": 10,
@ -837,10 +850,23 @@
"group": "시작",
"name": "DisableDM",
"title": "DisableDM",
"descr": "",
"descr": "Reboot필요",
"egroup": "START",
"etitle": "DisableDM",
"edescr": "",
"edescr": "Reboot required",
"min": 0,
"max": 1,
"default": 0,
"unit": 1
},
{
"group": "시작",
"name": "EnableConnect",
"title": "EnableConnect",
"descr": "Reboot, 콤마 Connect에 연결합니다. 상황에 따라 BAN될수 있습니다.",
"egroup": "START",
"etitle": "EnableConnect",
"edescr": "Reboot required, Enable comma connect, Your device may be banned by Comma",
"min": 0,
"max": 1,
"default": 0,
@ -925,6 +951,19 @@
"default": 1,
"unit": 1
},
{
"group": "화면",
"name": "ShowTpms",
"title": "TPMS정보 표시",
"descr": "0:None, 1: 위, 2: 아래, 3: 둘다",
"egroup": "DISP",
"etitle": "Show TPMS Info",
"edescr": "0:None, 1: Upper, 2:Lower, 3: Both",
"min": 0,
"max": 3,
"default": 1,
"unit": 1
},
{
"group": "화면",
"name": "ShowDateTime",

View File

@ -149,7 +149,7 @@ class DesireHelper:
modeldata.laneLines[2], modeldata.roadEdges[1])
self.lane_exist_left_count.update(lane_prob_left)
self.lane_exist_right_count.update(lane_prob_right)
min_lane_width = 2.0
min_lane_width = 2.8
self.lane_width_left_count.update(self.lane_width_left > min_lane_width)
self.lane_width_right_count.update(self.lane_width_right > min_lane_width)
self.road_edge_left_count.update(self.distance_to_road_edge_left > min_lane_width)
@ -269,7 +269,7 @@ class DesireHelper:
auto_lane_change_available = lane_available
else:
auto_lane_change_blocked = ((atc_blinker_state == BLINKER_LEFT) and (driver_blinker_state != BLINKER_LEFT))
auto_lane_change_available = not auto_lane_change_blocked and lane_availabled and edge_availabled and not side_object_detected
auto_lane_change_available = not auto_lane_change_blocked and (lane_availabled or edge_availabled or lane_appeared) and not side_object_detected
if not lateral_active or self.lane_change_timer > LANE_CHANGE_TIME_MAX:
#print("Desire canceled")

View File

@ -49,12 +49,12 @@ def get_lag_adjusted_curvature(CP, v_ego, psis, curvatures, steer_actuator_delay
#average_curvature_desired = psi / (v_ego * delay)
# curve -> straight or reverse curve
if abs(current_curvature_desired) > 0.002 and \
if v_ego > 5 and abs(current_curvature_desired) > 0.002 and \
(abs(future_curvature_desired) < 0.001 or np.sign(current_curvature_desired) != np.sign(future_curvature_desired)):
psis_damping = 0.2
else:
psis_damping = 1.0
psi *= psis_damping
#psi *= psis_damping
average_curvature_desired = psi / distance

View File

@ -1,3 +1,4 @@
import os
import glob
Import('env', 'envCython', 'arch', 'cereal', 'messaging', 'common', 'gpucommon', 'visionipc', 'transformations')
@ -13,7 +14,6 @@ common_src = [
"transforms/transform.cc",
]
# OpenCL is a framework on Mac
if arch == "Darwin":
frameworks += ['OpenCL']
@ -38,17 +38,35 @@ for model_name in ['driving_vision', 'driving_policy']:
cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx'
lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files + script_files, cmd)
# Compile tinygrad model
def tg_compile(flags, model_name):
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
if arch == 'larch64':
device_string = 'QCOM=1'
elif arch == 'Darwin':
device_string = 'CLANG=1 IMAGE=0'
else:
device_string = 'LLVM=1 LLVMOPT=1 BEAM=0 IMAGE=0'
for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
fn = File(f"models/{model_name}").abspath
cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd)
return lenv.Command(
fn + "_tinygrad.pkl",
[fn + ".onnx"] + tinygrad_files,
f'{pythonpath_string} {flags} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
)
# Compile small models
for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
flags = {
'larch64': 'QCOM=1',
'Darwin': 'CPU=1 IMAGE=0 JIT=2',
}.get(arch, 'LLVM=1 LLVMOPT=1 BEAM=0 IMAGE=0 JIT=2')
tg_compile(flags, model_name)
# Compile BIG model if USB GPU is available
import subprocess
from tinygrad import Device
# because tg doesn't support multi-process
devs = subprocess.check_output('python3 -c "from tinygrad import Device; print(list(Device.get_available_devices()))"', shell=True)
if b"AMD" in devs:
del Device
print("USB GPU detected... building")
flags = "AMD=1 AMD_IFACE=USB AMD_LLVM=1 NOLOCALS=0 IMAGE=0"
bp = tg_compile(flags, "big_driving_policy")
bv = tg_compile(flags, "big_driving_vision")
lenv.SideEffect('lock', [bp, bv]) # tg doesn't support multi-process so build serially
else:
print("USB GPU not detected... skipping")

View File

@ -173,9 +173,6 @@ def main():
model_output, gpu_execution_time = model.run(buf, calib, model_transform)
t2 = time.perf_counter()
# run one more time, just for the load
model.run(buf, calib, model_transform)
pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time))

View File

@ -1,13 +1,18 @@
#!/usr/bin/env python3
import os
from openpilot.system.hardware import TICI
from tinygrad.tensor import Tensor
from tinygrad.dtype import dtypes
if TICI:
USBGPU = "USBGPU" in os.environ
if USBGPU:
os.environ['AMD'] = '1'
os.environ['AMD_IFACE'] = 'USB'
elif TICI:
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
os.environ['QCOM'] = '1'
else:
os.environ['LLVM'] = '1'
os.environ['JIT'] = '2'
from tinygrad.tensor import Tensor
from tinygrad.dtype import dtypes
import time
import pickle
import numpy as np
@ -33,8 +38,6 @@ from openpilot.selfdrive.modeld.constants import ModelConstants, Plan
from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext
PROCESS_NAME = "selfdrive.modeld.modeld"
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
@ -45,10 +48,11 @@ POLICY_METADATA_PATH = Path(__file__).parent / 'models/driving_policy_metadata.p
LAT_SMOOTH_SECONDS = 0.13
LONG_SMOOTH_SECONDS = 0.3
MIN_LAT_CONTROL_SPEED = 0.3
def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action,
lat_action_t: float, long_action_t: float, lat_smooth_seconds: float) -> log.ModelDataV2.Action:
lat_action_t: float, long_action_t: float, v_ego: float, lat_smooth_seconds: float) -> log.ModelDataV2.Action:
plan = model_output['plan'][0]
desired_accel, should_stop = get_accel_from_plan(plan[:,Plan.VELOCITY][:,0],
plan[:,Plan.ACCELERATION][:,0],
@ -57,7 +61,10 @@ def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.
desired_accel = smooth_value(desired_accel, prev_action.desiredAcceleration, LONG_SMOOTH_SECONDS)
desired_curvature = model_output['desired_curvature'][0, 0]
if v_ego > MIN_LAT_CONTROL_SPEED:
desired_curvature = smooth_value(desired_curvature, prev_action.desiredCurvature, lat_smooth_seconds)
else:
desired_curvature = prev_action.desiredCurvature
return log.ModelDataV2.Action(desiredCurvature=float(desired_curvature),
desiredAcceleration=float(desired_accel),
@ -79,10 +86,20 @@ class ModelState:
prev_desire: np.ndarray # for tracking the rising edge of the pulse
def __init__(self, context: CLContext):
self.frames = {
'input_imgs': DrivingModelFrame(context, ModelConstants.TEMPORAL_SKIP),
'big_input_imgs': DrivingModelFrame(context, ModelConstants.TEMPORAL_SKIP)
}
with open(VISION_METADATA_PATH, 'rb') as f:
vision_metadata = pickle.load(f)
self.vision_input_shapes = vision_metadata['input_shapes']
self.vision_input_names = list(self.vision_input_shapes.keys())
self.vision_output_slices = vision_metadata['output_slices']
vision_output_size = vision_metadata['output_shapes']['outputs'][1]
with open(POLICY_METADATA_PATH, 'rb') as f:
policy_metadata = pickle.load(f)
self.policy_input_shapes = policy_metadata['input_shapes']
self.policy_output_slices = policy_metadata['output_slices']
policy_output_size = policy_metadata['output_shapes']['outputs'][1]
self.frames = {name: DrivingModelFrame(context, ModelConstants.TEMPORAL_SKIP) for name in self.vision_input_names}
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
self.full_features_buffer = np.zeros((1, ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
@ -99,18 +116,6 @@ class ModelState:
'features_buffer': np.zeros((1, ModelConstants.INPUT_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32),
}
with open(VISION_METADATA_PATH, 'rb') as f:
vision_metadata = pickle.load(f)
self.vision_input_shapes = vision_metadata['input_shapes']
self.vision_output_slices = vision_metadata['output_slices']
vision_output_size = vision_metadata['output_shapes']['outputs'][1]
with open(POLICY_METADATA_PATH, 'rb') as f:
policy_metadata = pickle.load(f)
self.policy_input_shapes = policy_metadata['input_shapes']
self.policy_output_slices = policy_metadata['output_slices']
policy_output_size = policy_metadata['output_shapes']['outputs'][1]
# img buffers are managed in openCL transform code
self.vision_inputs: dict[str, Tensor] = {}
self.vision_output = np.zeros(vision_output_size, dtype=np.float32)
@ -128,7 +133,7 @@ class ModelState:
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in output_slices.items()}
return parsed_model_outputs
def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray,
def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
inputs['desire'][0] = 0
@ -141,10 +146,9 @@ class ModelState:
self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
self.numpy_inputs['lateral_control_params'][:] = inputs['lateral_control_params']
imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}
imgs_cl = {name: self.frames[name].prepare(bufs[name], transforms[name].flatten()) for name in self.vision_input_names}
if TICI:
if TICI and not USBGPU:
# The imgs tensors are backed by opencl memory, only need init once
for key in imgs_cl:
if key not in self.vision_inputs:
@ -185,13 +189,17 @@ def main(demo=False):
sentry.set_tag("daemon", PROCESS_NAME)
cloudlog.bind(daemon=PROCESS_NAME)
setproctitle(PROCESS_NAME)
if not USBGPU:
# USB GPU currently saturates a core so can't do this yet,
# also need to move the aux USB interrupts for good timings
config_realtime_process(7, 54)
st = time.monotonic()
cloudlog.warning("setting up CL context")
cl_context = CLContext()
cloudlog.warning("CL context ready; loading model")
model = ModelState(cl_context)
cloudlog.warning("models loaded, modeld starting")
cloudlog.warning(f"models loaded in {time.monotonic() - st:.1f}s, modeld starting")
# visionipc clients
while True:
@ -330,6 +338,8 @@ def main(demo=False):
if prepare_only:
cloudlog.error(f"skipping model eval. Dropped {vipc_dropped_frames} frames")
bufs = {name: buf_extra if 'big' in name else buf_main for name in model.vision_input_names}
transforms = {name: model_transform_extra if 'big' in name else model_transform_main for name in model.vision_input_names}
inputs:dict[str, np.ndarray] = {
'desire': vec_desire,
'traffic_convention': traffic_convention,
@ -337,7 +347,7 @@ def main(demo=False):
}
mt1 = time.perf_counter()
model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
model_output = model.run(bufs, transforms, inputs, prepare_only)
mt2 = time.perf_counter()
model_execution_time = mt2 - mt1
@ -346,7 +356,7 @@ def main(demo=False):
drivingdata_send = messaging.new_message('drivingModelData')
posenet_send = messaging.new_message('cameraOdometry')
action = get_action_from_model(model_output, prev_action, lat_delay + DT_MDL, long_delay + DT_MDL, lat_smooth_seconds)
action = get_action_from_model(model_output, prev_action, lat_delay + DT_MDL, long_delay + DT_MDL, v_ego, lat_smooth_seconds)
prev_action = action
fill_model_msg(drivingdata_send, modelv2_send, model_output, action,
publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id,

View File

@ -0,0 +1 @@
driving_policy.onnx

View File

@ -0,0 +1 @@
driving_vision.onnx

View File

@ -2677,14 +2677,22 @@ void ui_draw(UIState *s, ModelRenderer* model_renderer, int w, int h) {
drawCarrot.drawDateTime(s);
//drawCarrot.drawConnInfo(s);
drawCarrot.drawDeviceInfo(s);
int show_tpms = params.getInt("ShowTpms");
switch (show_tpms) {
case 0: break;
case 1:
drawCarrot.drawTpms2(s);
int draw_turn_info = drawTurnInfo.draw(s);
if (draw_turn_info < 0) {
break;
case 2:
drawCarrot.drawTpms3(s);
break;
case 3:
drawCarrot.drawTpms2(s);
drawCarrot.drawTpms3(s);
break;
}
drawTurnInfo.draw(s);
ui_draw_text_a2(s);
ui_draw_alert(s);

View File

@ -702,14 +702,15 @@ CarrotPanel::CarrotPanel(QWidget* parent) : QWidget(parent) {
latLongToggles->addItem(new CValueControl("StopDistanceCarrot", "LONG: StopDistance (600)cm", "", "../assets/offroad/icon_logic.png", 300, 1000, 10));
//latLongToggles->addItem(new CValueControl("TraffStopDistanceAdjust", "LONG: TrafficStopDistance adjust(150)cm", "", "../assets/offroad/icon_road.png", -1000, 1000, 10));
latLongToggles->addItem(new CValueControl("JLeadFactor3", "LONG: Jerk Lead Factor (0)", "x0.01", "../assets/offroad/icon_logic.png", 0, 100, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals1", "ACCEL:0km/h(160)", "속도별 가속도를 지정합니다.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals2", "ACCEL:40km/h(120)", "속도별 가속도를 지정합니다.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals3", "ACCEL:60km/h(100)", "속도별 가속도를 지정합니다.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals4", "ACCEL:80km/h(80)", "속도별 가속도를 지정합니다.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals5", "ACCEL:110km/h(70)", "속도별 가속도를 지정합니다.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals6", "ACCEL:140km/h(60)", "속도별 가속도를 지정합니다.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
//latLongToggles->addItem(new CValueControl("CruiseMinVals", "DECEL:(120)", "감속도를 설정합니다.(x0.01m/s^2)", "../assets/offroad/icon_road.png", 50, 250, 5));
latLongToggles->addItem(new CValueControl("MaxAngleFrames", "MaxAngleFrames(89)", "89:기본, 스티어계기판에러시 85~87", "../assets/offroad/icon_logic.png", 80, 100, 1));
latLongToggles->addItem(new CValueControl("CruiseMaxVals0", "ACCEL:0km/h(160)", "Acceleration needed at specified speed.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals1", "ACCEL:10km/h(160)", "Acceleration needed at specified speed.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals2", "ACCEL:40km/h(120)", "Acceleration needed at specified speed.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals3", "ACCEL:60km/h(100)", "Acceleration needed at specified speed.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals4", "ACCEL:80km/h(80)", "Acceleration needed at specified speed.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals5", "ACCEL:110km/h(70)", "Acceleration needed at specified speed.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
latLongToggles->addItem(new CValueControl("CruiseMaxVals6", "ACCEL:140km/h(60)", "Acceleration needed at specified speed.(x0.01m/s^2)", "../assets/offroad/icon_logic.png", 1, 250, 5));
//latLongToggles->addItem(new CValueControl("CruiseMinVals", "DECEL:(120)", "Sets the deceleration rate.(x0.01m/s^2)", "../assets/offroad/icon_road.png", 50, 250, 5));
latLongToggles->addItem(new CValueControl("MaxAngleFrames", "MaxAngleFrames(89)", "89:Basic, steering instrument panel error 85~87", "../assets/offroad/icon_logic.png", 80, 100, 1));
latLongToggles->addItem(new CValueControl("SteerActuatorDelay", "LAT:SteerActuatorDelay(30)", "x0.01, 0:LiveDelay", "../assets/offroad/icon_logic.png", 0, 100, 1));
latLongToggles->addItem(new CValueControl("LateralTorqueCustom", "LAT: TorqueCustom(0)", "", "../assets/offroad/icon_logic.png", 0, 2, 1));
latLongToggles->addItem(new CValueControl("LateralTorqueAccelFactor", "LAT: TorqueAccelFactor(2500)", "", "../assets/offroad/icon_logic.png", 1000, 6000, 10));
@ -721,6 +722,7 @@ CarrotPanel::CarrotPanel(QWidget* parent) : QWidget(parent) {
dispToggles = new ListWidget(this);
//dispToggles->addItem(new CValueControl("ShowHudMode", "DISP:Display Mode", "0:Frog,1:APilot,2:Bottom,3:Top,4:Left,5:Left-Bottom", "../assets/offroad/icon_shell.png", 0, 5, 1));
dispToggles->addItem(new CValueControl("ShowDebugUI", "DISP:Debug Info", "", "../assets/offroad/icon_shell.png", 0, 2, 1));
dispToggles->addItem(new CValueControl("ShowTpms", "DISP:Tpms Info", "", "../assets/offroad/icon_shell.png", 0, 3, 1));
dispToggles->addItem(new CValueControl("ShowDateTime", "DISP:Time Info", "0:None,1:Time/Date,2:Time,3:Date", "../assets/offroad/icon_calendar.png", 0, 3, 1));
//dispToggles->addItem(new CValueControl("ShowSteerRotate", "DISP:Handle rotate", "0:None,1:Rotate", "../assets/offroad/icon_shell.png", 0, 1, 1));
dispToggles->addItem(new CValueControl("ShowPathEnd", "DISP:Path End", "0:None,1:Display", "../assets/offroad/icon_shell.png", 0, 1, 1));
@ -809,7 +811,7 @@ CarrotPanel::CarrotPanel(QWidget* parent) : QWidget(parent) {
startToggles->addItem(new CValueControl("AutoCruiseControl", "Auto Cruise control", "Softhold, Auto Cruise ON/OFF control", "../assets/offroad/icon_road.png", 0, 3, 1));
startToggles->addItem(new CValueControl("CruiseOnDist", "CRUISE: Auto ON distance(0cm)", "When GAS/Brake is OFF, Cruise ON when the lead car gets closer.", "../assets/offroad/icon_road.png", 0, 2500, 50));
startToggles->addItem(new CValueControl("AutoEngage", "Auto Engage control on start", "1:SteerEnable, 2:Steer/Cruise Engage", "../assets/offroad/icon_road.png", 0, 2, 1));
startToggles->addItem(new ParamControl("DisableMinSteerSpeed", "Disable Min.SteerSpeed", "", "../assets/offroad/icon_road.png", this));
startToggles->addItem(new ParamControl("DisableMinSteerSpeed", "Disable Min.SteerSpeed (Eg. SMDPS", "", "../assets/offroad/icon_road.png", this));
startToggles->addItem(new CValueControl("AutoGasTokSpeed", "Auto AccelTok speed", "Gas(Accel)Tok enable speed", "../assets/offroad/icon_road.png", 0, 200, 5));
startToggles->addItem(new ParamControl("AutoGasSyncSpeed", "Auto update Cruise speed", "", "../assets/offroad/icon_road.png", this));
startToggles->addItem(new CValueControl("SpeedFromPCM", "Read Cruise Speed from PCM", "Toyota must set to 1, Honda 3", "../assets/offroad/icon_road.png", 0, 3, 1));
@ -817,6 +819,7 @@ CarrotPanel::CarrotPanel(QWidget* parent) : QWidget(parent) {
startToggles->addItem(new CValueControl("SoundVolumeAdjustEngage", "Sound Volume, Engage(10%)", "", "../assets/offroad/icon_sound.png", 5, 200, 5));
startToggles->addItem(new CValueControl("MaxTimeOffroadMin", "Power off time (min)", "", "../assets/offroad/icon_sandtimer.png", 1, 600, 10));
startToggles->addItem(new ParamControl("DisableDM", "Disable DM", "", "../assets/img_driver_face_static_x.png", this));
startToggles->addItem(new CValueControl("EnableConnect", "EnableConnect", "Your device may be banned by Comma", "../assets/offroad/icon_sandtimer.png", 0, 1, 1));
//startToggles->addItem(new CValueControl("CarrotCountDownSpeed", "NaviCountDown Speed(10)", "", "../assets/offroad/icon_shell.png", 0, 200, 5));
startToggles->addItem(new CValueControl("MapboxStyle", "Mapbox Style(0)", "", "../assets/offroad/icon_shell.png", 0, 2, 1));
startToggles->addItem(new CValueControl("RecordRoadCam", "Record Road camera(0)", "1:RoadCam, 2:RoadCam+WideRoadCam", "../assets/offroad/icon_shell.png", 0, 2, 1));
@ -828,12 +831,12 @@ CarrotPanel::CarrotPanel(QWidget* parent) : QWidget(parent) {
//startToggles->addItem(new CValueControl("LaneChangeLaneCheck", "LaneChange: Check lane exist", "(0:No,1:Lane,2:+Edge)", "../assets/offroad/icon_shell.png", 0, 2, 1));
speedToggles = new ListWidget(this);
speedToggles->addItem(new CValueControl("AutoCurveSpeedLowerLimit", "CURVE: Lower limit speed(30)", "곡선도로를 만나면 속도를 줄여줍니다. 최저속도", "../assets/offroad/icon_road.png", 30, 200, 5));
speedToggles->addItem(new CValueControl("AutoCurveSpeedLowerLimit", "CURVE: Lower limit speed(30)", "When you approach a curve, reduce your speed. Minimum speed", "../assets/offroad/icon_road.png", 30, 200, 5));
speedToggles->addItem(new CValueControl("AutoCurveSpeedFactor", "CURVE: Auto Control ratio(100%)", "", "../assets/offroad/icon_road.png", 50, 300, 1));
speedToggles->addItem(new CValueControl("AutoCurveSpeedAggressiveness", "CURVE: Aggressiveness (100%)", "", "../assets/offroad/icon_road.png", 50, 300, 1));
speedToggles->addItem(new CValueControl("AutoNaviSpeedCtrlEnd", "SpeedCameraDecelEnd(6s)", "감속완료시점을 설정합니다.값이 크면 카메라에서 멀리 감속 완료", "../assets/offroad/icon_road.png", 3, 20, 1));
speedToggles->addItem(new CValueControl("AutoNaviSpeedCtrlMode", "NaviSpeedControlMode(2)", "0:감속안함,1:과속카메라,2:+사고방지턱,3:+이동식카메라", "../assets/offroad/icon_road.png", 0, 3, 1));
speedToggles->addItem(new CValueControl("AutoNaviSpeedDecelRate", "SpeedCameraDecelRatex0.01m/s^2(80)", "낮으면 멀리서부터 감속함", "../assets/offroad/icon_road.png", 10, 200, 10));
speedToggles->addItem(new CValueControl("AutoNaviSpeedCtrlEnd", "SpeedCameraDecelEnd(6s)", "Sets the deceleration completion point. A larger value completes deceleration farther away from the camera.", "../assets/offroad/icon_road.png", 3, 20, 1));
speedToggles->addItem(new CValueControl("AutoNaviSpeedCtrlMode", "NaviSpeedControlMode(2)", "0:No slowdown, 1: speed camera, 2: + accident prevention bump, 3: + mobile camera", "../assets/offroad/icon_road.png", 0, 3, 1));
speedToggles->addItem(new CValueControl("AutoNaviSpeedDecelRate", "SpeedCameraDecelRatex0.01m/s^2(80)", "Lower number, slows down from a greater distance", "../assets/offroad/icon_road.png", 10, 200, 10));
speedToggles->addItem(new CValueControl("AutoNaviSpeedSafetyFactor", "SpeedCameraSafetyFactor(105%)", "", "../assets/offroad/icon_road.png", 80, 120, 1));
speedToggles->addItem(new CValueControl("AutoNaviSpeedBumpTime", "SpeedBumpTimeDistance(1s)", "", "../assets/offroad/icon_road.png", 1, 50, 1));
speedToggles->addItem(new CValueControl("AutoNaviSpeedBumpSpeed", "SpeedBumpSpeed(35Km/h)", "", "../assets/offroad/icon_road.png", 10, 100, 5));
@ -841,8 +844,8 @@ CarrotPanel::CarrotPanel(QWidget* parent) : QWidget(parent) {
speedToggles->addItem(new CValueControl("AutoNaviCountDownMode", "NaviCountDown mode(2)", "0: off, 1:tbt+camera, 2:tbt+camera+bump", "../assets/offroad/icon_road.png", 0, 2, 1));
speedToggles->addItem(new CValueControl("TurnSpeedControlMode", "Turn Speed control mode(1)", "0: off, 1:vision, 2:vision+route, 3: route", "../assets/offroad/icon_road.png", 0, 3, 1));
speedToggles->addItem(new CValueControl("MapTurnSpeedFactor", "Map TurnSpeed Factor(100)", "", "../assets/offroad/icon_map.png", 50, 300, 5));
speedToggles->addItem(new CValueControl("AutoTurnControl", "ATC: Auto turn control(0)", "0:없음,1:차선변경,2:차선변경+속도,3:속도", "../assets/offroad/icon_road.png", 0, 3, 1));
speedToggles->addItem(new CValueControl("AutoTurnControlSpeedTurn", "ATC: Turn Speed (20)", "0:없음, 턴속도", "../assets/offroad/icon_road.png", 0, 100, 5));
speedToggles->addItem(new CValueControl("AutoTurnControl", "ATC: Auto turn control(0)", "0:None, 1: lane change, 2: lane change + speed, 3: speed", "../assets/offroad/icon_road.png", 0, 3, 1));
speedToggles->addItem(new CValueControl("AutoTurnControlSpeedTurn", "ATC: Turn Speed (20)", "0:None, turn speed", "../assets/offroad/icon_road.png", 0, 100, 5));
speedToggles->addItem(new CValueControl("AutoTurnControlTurnEnd", "ATC: Turn CtrlDistTime (6)", "dist=speed*time", "../assets/offroad/icon_road.png", 0, 30, 1));
speedToggles->addItem(new CValueControl("AutoRoadSpeedAdjust", "Auto Roadlimit Speed adjust (50%)", "", "../assets/offroad/icon_road.png", 0, 100, 10));
speedToggles->addItem(new CValueControl("AutoTurnMapChange", "ATC Auto Map Change(0)", "", "../assets/offroad/icon_road.png", 0, 1, 1));

View File

@ -108,6 +108,7 @@ class Soundd:
self.soundVolumeAdjust = 1.0
self.carrot_count_down = 0
self.lang = self.params.get('LanguageSetting', encoding='utf8')
self.load_sounds()
self.current_alert = AudibleAlert.none
@ -118,6 +119,7 @@ class Soundd:
self.spl_filter_weighted = FirstOrderFilter(0, 2.5, FILTER_DT, initialized=False)
def load_sounds(self):
self.loaded_sounds: dict[int, np.ndarray] = {}
@ -125,7 +127,10 @@ class Soundd:
for sound in sound_list:
filename, play_count, volume = sound_list[sound]
if self.lang == "main_ko":
wavefile = wave.open(BASEDIR + "/selfdrive/assets/sounds/" + filename, 'r')
else:
wavefile = wave.open(BASEDIR + "/selfdrive/assets/sounds_eng/" + filename, 'r')
#assert wavefile.getnchannels() == 1
assert wavefile.getsampwidth() == 2

View File

@ -6,6 +6,19 @@ from cereal import log
NetworkType = log.DeviceState.NetworkType
class LPAError(RuntimeError):
pass
class LPAProfileNotFoundError(LPAError):
pass
@dataclass
class Profile:
iccid: str
nickname: str
enabled: bool
provider: str
@dataclass
class ThermalZone:
# a zone from /sys/class/thermal/thermal_zone*
@ -33,6 +46,7 @@ class ThermalZone:
class ThermalConfig:
cpu: list[ThermalZone] | None = None
gpu: list[ThermalZone] | None = None
dsp: ThermalZone | None = None
pmic: list[ThermalZone] | None = None
memory: ThermalZone | None = None
intake: ThermalZone | None = None
@ -50,6 +64,31 @@ class ThermalConfig:
ret[f.name + "TempC"] = v.read()
return ret
class LPABase(ABC):
@abstractmethod
def list_profiles(self) -> list[Profile]:
pass
@abstractmethod
def get_active_profile(self) -> Profile | None:
pass
@abstractmethod
def delete_profile(self, iccid: str) -> None:
pass
@abstractmethod
def download_profile(self, qr: str, nickname: str | None = None) -> None:
pass
@abstractmethod
def nickname_profile(self, iccid: str, nickname: str) -> None:
pass
@abstractmethod
def switch_profile(self, iccid: str) -> None:
pass
class HardwareBase(ABC):
@staticmethod
def get_cmdline() -> dict[str, str]:
@ -104,6 +143,10 @@ class HardwareBase(ABC):
def get_sim_info(self):
pass
@abstractmethod
def get_sim_lpa(self) -> LPABase:
pass
@abstractmethod
def get_network_strength(self, network_type):
pass
@ -130,6 +173,9 @@ class HardwareBase(ABC):
def get_thermal_config(self):
return ThermalConfig()
def set_display_power(self, on: bool):
pass
@abstractmethod
def set_screen_brightness(self, percentage):
pass
@ -164,6 +210,9 @@ class HardwareBase(ABC):
def configure_modem(self):
pass
def reboot_modem(self):
pass
@abstractmethod
def get_networks(self):
pass

45
system/hardware/esim.py Normal file
View File

@ -0,0 +1,45 @@
#!/usr/bin/env python3
import argparse
import time
from openpilot.system.hardware import HARDWARE
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='esim.py', description='manage eSIM profiles on your comma device', epilog='comma.ai')
parser.add_argument('--backend', choices=['qmi', 'at'], default='qmi', help='use the specified backend, defaults to qmi')
parser.add_argument('--switch', metavar='iccid', help='switch to profile')
parser.add_argument('--delete', metavar='iccid', help='delete profile (warning: this cannot be undone)')
parser.add_argument('--download', nargs=2, metavar=('qr', 'name'), help='download a profile using QR code (format: LPA:1$rsp.truphone.com$QRF-SPEEDTEST)')
parser.add_argument('--nickname', nargs=2, metavar=('iccid', 'name'), help='update the nickname for a profile')
args = parser.parse_args()
mutated = False
lpa = HARDWARE.get_sim_lpa()
if args.switch:
lpa.switch_profile(args.switch)
mutated = True
elif args.delete:
confirm = input('are you sure you want to delete this profile? (y/N) ')
if confirm == 'y':
lpa.delete_profile(args.delete)
mutated = True
else:
print('cancelled')
exit(0)
elif args.download:
lpa.download_profile(args.download[0], args.download[1])
elif args.nickname:
lpa.nickname_profile(args.nickname[0], args.nickname[1])
else:
parser.print_help()
if mutated:
HARDWARE.reboot_modem()
# eUICC needs a small delay post-reboot before querying profiles
time.sleep(.5)
profiles = lpa.list_profiles()
print(f'\n{len(profiles)} profile{"s" if len(profiles) > 1 else ""}:')
for p in profiles:
print(f'- {p.iccid} (nickname: {p.nickname or "<none provided>"}) (provider: {p.provider}) - {"enabled" if p.enabled else "disabled"}')

View File

@ -1,12 +1,11 @@
import random
from cereal import log
from openpilot.system.hardware.base import HardwareBase
from openpilot.system.hardware.base import HardwareBase, LPABase
NetworkType = log.DeviceState.NetworkType
NetworkStrength = log.DeviceState.NetworkStrength
class Pc(HardwareBase):
def get_os_version(self):
return None
@ -41,6 +40,9 @@ class Pc(HardwareBase):
'data_connected': False
}
def get_sim_lpa(self) -> LPABase:
raise NotImplementedError("SIM LPA not implemented for PC")
def get_network_strength(self, network_type):
return NetworkStrength.unknown

View File

@ -1,25 +1,25 @@
[
{
"name": "xbl",
"url": "https://commadist.azureedge.net/agnosupdate/xbl-468f1ad6ab55e198647ff9191f91bd2918db9c0a3e27bae5673b4c5575c1254c.img.xz",
"hash": "468f1ad6ab55e198647ff9191f91bd2918db9c0a3e27bae5673b4c5575c1254c",
"hash_raw": "468f1ad6ab55e198647ff9191f91bd2918db9c0a3e27bae5673b4c5575c1254c",
"url": "https://commadist.azureedge.net/agnosupdate/xbl-6710967ca9701f205d7ab19c3a9b0dd2f547e65b3d96048b7c2b03755aafa0f1.img.xz",
"hash": "6710967ca9701f205d7ab19c3a9b0dd2f547e65b3d96048b7c2b03755aafa0f1",
"hash_raw": "6710967ca9701f205d7ab19c3a9b0dd2f547e65b3d96048b7c2b03755aafa0f1",
"size": 3282256,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "d35a86e7b8ddd9279b513a6f27da1521aa0f89fb93987ea74d57d0f0bbbbd247"
"ondevice_hash": "003a17ab1be68a696f7efe4c9938e8be511d4aacfc2f3211fc896bdc1681d089"
},
{
"name": "xbl_config",
"url": "https://commadist.azureedge.net/agnosupdate/xbl_config-92b675dc2862ed15c732d91d9eb307d7e852e349217db8bee8f8829db543686b.img.xz",
"hash": "92b675dc2862ed15c732d91d9eb307d7e852e349217db8bee8f8829db543686b",
"hash_raw": "92b675dc2862ed15c732d91d9eb307d7e852e349217db8bee8f8829db543686b",
"url": "https://commadist.azureedge.net/agnosupdate/xbl_config-63922cfbfdf4ab87986c4ba8f3a4df5bf28414b3f71a29ec5947336722215535.img.xz",
"hash": "63922cfbfdf4ab87986c4ba8f3a4df5bf28414b3f71a29ec5947336722215535",
"hash_raw": "63922cfbfdf4ab87986c4ba8f3a4df5bf28414b3f71a29ec5947336722215535",
"size": 98124,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "623f1568072ee2d687ba8449a3d894c1c83dc4131b2e79eff35696885f70a419"
"ondevice_hash": "2a855dd636cc94718b64bea83a44d0a31741ecaa8f72a63613ff348ec7404091"
},
{
"name": "abl",
@ -34,50 +34,50 @@
},
{
"name": "aop",
"url": "https://commadist.azureedge.net/agnosupdate/aop-f0fcf7611d0890a72984f15a516dd37fa532dfcb70d428a8406838cf74ce23d5.img.xz",
"hash": "f0fcf7611d0890a72984f15a516dd37fa532dfcb70d428a8406838cf74ce23d5",
"hash_raw": "f0fcf7611d0890a72984f15a516dd37fa532dfcb70d428a8406838cf74ce23d5",
"url": "https://commadist.azureedge.net/agnosupdate/aop-21370172e590bd4ea907a558bcd6df20dc7a6c7d38b8e62fdde18f4a512ba9e9.img.xz",
"hash": "21370172e590bd4ea907a558bcd6df20dc7a6c7d38b8e62fdde18f4a512ba9e9",
"hash_raw": "21370172e590bd4ea907a558bcd6df20dc7a6c7d38b8e62fdde18f4a512ba9e9",
"size": 184364,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "bf74feca486f650589f6b7c90eab73274e35a68b5e00bfc1de0ed5f5484d4b3d"
"ondevice_hash": "c1be2f4aac5b3af49b904b027faec418d05efd7bd5144eb4fdfcba602bcf2180"
},
{
"name": "devcfg",
"url": "https://commadist.azureedge.net/agnosupdate/devcfg-225b24ea7b1d2fee7f7d2da21386920ddacac2e33e9e938168436292f4eae180.img.xz",
"hash": "225b24ea7b1d2fee7f7d2da21386920ddacac2e33e9e938168436292f4eae180",
"hash_raw": "225b24ea7b1d2fee7f7d2da21386920ddacac2e33e9e938168436292f4eae180",
"url": "https://commadist.azureedge.net/agnosupdate/devcfg-d7d7e52963bbedbbf8a7e66847579ca106a0a729ce2cf60f4b8d8ea4b535d620.img.xz",
"hash": "d7d7e52963bbedbbf8a7e66847579ca106a0a729ce2cf60f4b8d8ea4b535d620",
"hash_raw": "d7d7e52963bbedbbf8a7e66847579ca106a0a729ce2cf60f4b8d8ea4b535d620",
"size": 40336,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "70f682b59ca0fe2f197d1486bd8be7b9b7e560798ad40ddef83b9f0a2f497938"
"ondevice_hash": "17b229668b20305ff8fa3cd5f94716a3aaa1e5bf9d1c24117eff7f2f81ae719f"
},
{
"name": "boot",
"url": "https://commadist.azureedge.net/agnosupdate/boot-3d8e848796924081f5a6b3d745808b1117ae2ec41c03f2d41ee2e75633bd6425.img.xz",
"hash": "3d8e848796924081f5a6b3d745808b1117ae2ec41c03f2d41ee2e75633bd6425",
"hash_raw": "3d8e848796924081f5a6b3d745808b1117ae2ec41c03f2d41ee2e75633bd6425",
"url": "https://commadist.azureedge.net/agnosupdate/boot-4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42.img.xz",
"hash": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42",
"hash_raw": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42",
"size": 18479104,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "2075104847d1c96a06f07e85efb9f48d0e792d75a059047eae7ba4b463ffeadf"
"ondevice_hash": "6b7b3371100ad36d8a5a9ff19a1663b9b9e2d5e99cbe3cf9255e9c3017291ce3"
},
{
"name": "system",
"url": "https://commadist.azureedge.net/agnosupdate/system-b22bd239c6f9527596fd49b98b7d521a563f99b90953ce021ee36567498e99c7.img.xz",
"hash": "cb9bfde1e995b97f728f5d5ad8d7a0f7a01544db5d138ead9b2350f222640939",
"hash_raw": "b22bd239c6f9527596fd49b98b7d521a563f99b90953ce021ee36567498e99c7",
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img.xz",
"hash": "993d6a1cd2b684e2b1cf6ff840f8996f02a529011372d9c1471e4c80719e7da9",
"hash_raw": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa",
"size": 5368709120,
"sparse": true,
"full_check": false,
"has_ab": true,
"ondevice_hash": "e92a1f34158c60364c8d47b8ebbb6e59edf8d4865cd5edfeb2355d6f54f617fc",
"ondevice_hash": "59db25651da977eeb16a1af741fd01fc3d6b50d21544b1a7428b7c86b2cdef2d",
"alt": {
"hash": "b22bd239c6f9527596fd49b98b7d521a563f99b90953ce021ee36567498e99c7",
"url": "https://commadist.azureedge.net/agnosupdate/system-b22bd239c6f9527596fd49b98b7d521a563f99b90953ce021ee36567498e99c7.img",
"hash": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa",
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img",
"size": 5368709120
}
}

View File

@ -130,25 +130,25 @@
},
{
"name": "xbl",
"url": "https://commadist.azureedge.net/agnosupdate/xbl-468f1ad6ab55e198647ff9191f91bd2918db9c0a3e27bae5673b4c5575c1254c.img.xz",
"hash": "468f1ad6ab55e198647ff9191f91bd2918db9c0a3e27bae5673b4c5575c1254c",
"hash_raw": "468f1ad6ab55e198647ff9191f91bd2918db9c0a3e27bae5673b4c5575c1254c",
"url": "https://commadist.azureedge.net/agnosupdate/xbl-6710967ca9701f205d7ab19c3a9b0dd2f547e65b3d96048b7c2b03755aafa0f1.img.xz",
"hash": "6710967ca9701f205d7ab19c3a9b0dd2f547e65b3d96048b7c2b03755aafa0f1",
"hash_raw": "6710967ca9701f205d7ab19c3a9b0dd2f547e65b3d96048b7c2b03755aafa0f1",
"size": 3282256,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "d35a86e7b8ddd9279b513a6f27da1521aa0f89fb93987ea74d57d0f0bbbbd247"
"ondevice_hash": "003a17ab1be68a696f7efe4c9938e8be511d4aacfc2f3211fc896bdc1681d089"
},
{
"name": "xbl_config",
"url": "https://commadist.azureedge.net/agnosupdate/xbl_config-92b675dc2862ed15c732d91d9eb307d7e852e349217db8bee8f8829db543686b.img.xz",
"hash": "92b675dc2862ed15c732d91d9eb307d7e852e349217db8bee8f8829db543686b",
"hash_raw": "92b675dc2862ed15c732d91d9eb307d7e852e349217db8bee8f8829db543686b",
"url": "https://commadist.azureedge.net/agnosupdate/xbl_config-63922cfbfdf4ab87986c4ba8f3a4df5bf28414b3f71a29ec5947336722215535.img.xz",
"hash": "63922cfbfdf4ab87986c4ba8f3a4df5bf28414b3f71a29ec5947336722215535",
"hash_raw": "63922cfbfdf4ab87986c4ba8f3a4df5bf28414b3f71a29ec5947336722215535",
"size": 98124,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "623f1568072ee2d687ba8449a3d894c1c83dc4131b2e79eff35696885f70a419"
"ondevice_hash": "2a855dd636cc94718b64bea83a44d0a31741ecaa8f72a63613ff348ec7404091"
},
{
"name": "abl",
@ -163,14 +163,14 @@
},
{
"name": "aop",
"url": "https://commadist.azureedge.net/agnosupdate/aop-f0fcf7611d0890a72984f15a516dd37fa532dfcb70d428a8406838cf74ce23d5.img.xz",
"hash": "f0fcf7611d0890a72984f15a516dd37fa532dfcb70d428a8406838cf74ce23d5",
"hash_raw": "f0fcf7611d0890a72984f15a516dd37fa532dfcb70d428a8406838cf74ce23d5",
"url": "https://commadist.azureedge.net/agnosupdate/aop-21370172e590bd4ea907a558bcd6df20dc7a6c7d38b8e62fdde18f4a512ba9e9.img.xz",
"hash": "21370172e590bd4ea907a558bcd6df20dc7a6c7d38b8e62fdde18f4a512ba9e9",
"hash_raw": "21370172e590bd4ea907a558bcd6df20dc7a6c7d38b8e62fdde18f4a512ba9e9",
"size": 184364,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "bf74feca486f650589f6b7c90eab73274e35a68b5e00bfc1de0ed5f5484d4b3d"
"ondevice_hash": "c1be2f4aac5b3af49b904b027faec418d05efd7bd5144eb4fdfcba602bcf2180"
},
{
"name": "bluetooth",
@ -207,14 +207,14 @@
},
{
"name": "devcfg",
"url": "https://commadist.azureedge.net/agnosupdate/devcfg-225b24ea7b1d2fee7f7d2da21386920ddacac2e33e9e938168436292f4eae180.img.xz",
"hash": "225b24ea7b1d2fee7f7d2da21386920ddacac2e33e9e938168436292f4eae180",
"hash_raw": "225b24ea7b1d2fee7f7d2da21386920ddacac2e33e9e938168436292f4eae180",
"url": "https://commadist.azureedge.net/agnosupdate/devcfg-d7d7e52963bbedbbf8a7e66847579ca106a0a729ce2cf60f4b8d8ea4b535d620.img.xz",
"hash": "d7d7e52963bbedbbf8a7e66847579ca106a0a729ce2cf60f4b8d8ea4b535d620",
"hash_raw": "d7d7e52963bbedbbf8a7e66847579ca106a0a729ce2cf60f4b8d8ea4b535d620",
"size": 40336,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "70f682b59ca0fe2f197d1486bd8be7b9b7e560798ad40ddef83b9f0a2f497938"
"ondevice_hash": "17b229668b20305ff8fa3cd5f94716a3aaa1e5bf9d1c24117eff7f2f81ae719f"
},
{
"name": "devinfo",
@ -339,62 +339,62 @@
},
{
"name": "boot",
"url": "https://commadist.azureedge.net/agnosupdate/boot-3d8e848796924081f5a6b3d745808b1117ae2ec41c03f2d41ee2e75633bd6425.img.xz",
"hash": "3d8e848796924081f5a6b3d745808b1117ae2ec41c03f2d41ee2e75633bd6425",
"hash_raw": "3d8e848796924081f5a6b3d745808b1117ae2ec41c03f2d41ee2e75633bd6425",
"url": "https://commadist.azureedge.net/agnosupdate/boot-4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42.img.xz",
"hash": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42",
"hash_raw": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42",
"size": 18479104,
"sparse": false,
"full_check": true,
"has_ab": true,
"ondevice_hash": "2075104847d1c96a06f07e85efb9f48d0e792d75a059047eae7ba4b463ffeadf"
"ondevice_hash": "6b7b3371100ad36d8a5a9ff19a1663b9b9e2d5e99cbe3cf9255e9c3017291ce3"
},
{
"name": "system",
"url": "https://commadist.azureedge.net/agnosupdate/system-b22bd239c6f9527596fd49b98b7d521a563f99b90953ce021ee36567498e99c7.img.xz",
"hash": "cb9bfde1e995b97f728f5d5ad8d7a0f7a01544db5d138ead9b2350f222640939",
"hash_raw": "b22bd239c6f9527596fd49b98b7d521a563f99b90953ce021ee36567498e99c7",
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img.xz",
"hash": "993d6a1cd2b684e2b1cf6ff840f8996f02a529011372d9c1471e4c80719e7da9",
"hash_raw": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa",
"size": 5368709120,
"sparse": true,
"full_check": false,
"has_ab": true,
"ondevice_hash": "e92a1f34158c60364c8d47b8ebbb6e59edf8d4865cd5edfeb2355d6f54f617fc",
"ondevice_hash": "59db25651da977eeb16a1af741fd01fc3d6b50d21544b1a7428b7c86b2cdef2d",
"alt": {
"hash": "b22bd239c6f9527596fd49b98b7d521a563f99b90953ce021ee36567498e99c7",
"url": "https://commadist.azureedge.net/agnosupdate/system-b22bd239c6f9527596fd49b98b7d521a563f99b90953ce021ee36567498e99c7.img",
"hash": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa",
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img",
"size": 5368709120
}
},
{
"name": "userdata_90",
"url": "https://commadist.azureedge.net/agnosupdate/userdata_90-4bb7239f7e82c846e4d2584c0c433f03c582a80950de4094e6c190563d6d84ac.img.xz",
"hash": "b18001a2a87caa070fabf6321f8215ac353d6444564e3f86329b4dccc039ce54",
"hash_raw": "4bb7239f7e82c846e4d2584c0c433f03c582a80950de4094e6c190563d6d84ac",
"url": "https://commadist.azureedge.net/agnosupdate/userdata_90-89a161f17b86637413fe10a641550110b626b699382f5138c02267b7866a8494.img.xz",
"hash": "99d9e6cf6755581c6879bbf442bd62212beb8a04116e965ab987135b8842188b",
"hash_raw": "89a161f17b86637413fe10a641550110b626b699382f5138c02267b7866a8494",
"size": 96636764160,
"sparse": true,
"full_check": true,
"has_ab": false,
"ondevice_hash": "15ce16f2349d5b4d5fec6ad1e36222b1ae744ed10b8930bc9af75bd244dccb3c"
"ondevice_hash": "24ea29ab9c4ecec0568a4aa83e38790fedfce694060e90f4bde725931386ff41"
},
{
"name": "userdata_89",
"url": "https://commadist.azureedge.net/agnosupdate/userdata_89-e36b59bf9ff755b6ca488df2ba1e20da8f7dab6b8843129f3fdcccd7ff2ff7d8.img.xz",
"hash": "12682cf54596ab1bd1c2464c4ca85888e4e06b47af5ff7d0432399e9907e2f64",
"hash_raw": "e36b59bf9ff755b6ca488df2ba1e20da8f7dab6b8843129f3fdcccd7ff2ff7d8",
"url": "https://commadist.azureedge.net/agnosupdate/userdata_89-cdd3401168819987c4840765bba1aa2217641b1a6a4165c412f44cac14ccfcbf.img.xz",
"hash": "5fbfa008a7f6b58ab01d4d171f3185924d4c9db69b54f4bfc0f214c6f17c2435",
"hash_raw": "cdd3401168819987c4840765bba1aa2217641b1a6a4165c412f44cac14ccfcbf",
"size": 95563022336,
"sparse": true,
"full_check": true,
"has_ab": false,
"ondevice_hash": "e4df9dea47ff04967d971263d50c17460ef240457e8d814e7c4f409f7493eb8a"
"ondevice_hash": "c07dc2e883a23d4a24d976cdf53a767a2fd699c8eeb476d60cdf18e84b417a52"
},
{
"name": "userdata_30",
"url": "https://commadist.azureedge.net/agnosupdate/userdata_30-fe1d86f5322c675c58b3ae9753a4670abf44a25746bf6ac822aed108bb577282.img.xz",
"hash": "fa471703be0f0647617d183312d5209d23407f1628e4ab0934e6ec54b1a6b263",
"hash_raw": "fe1d86f5322c675c58b3ae9753a4670abf44a25746bf6ac822aed108bb577282",
"url": "https://commadist.azureedge.net/agnosupdate/userdata_30-2a8e8278b3bb545e6d7292c2417ccebdca9b47507eb5924f7c1e068737a7edfd.img.xz",
"hash": "b3bc293c9c5e0480ef663e980c8ccb2fb83ffd230c85f8797830fb61b8f59360",
"hash_raw": "2a8e8278b3bb545e6d7292c2417ccebdca9b47507eb5924f7c1e068737a7edfd",
"size": 32212254720,
"sparse": true,
"full_check": true,
"has_ab": false,
"ondevice_hash": "0b5b2402c9caa1ed7b832818e66580c974251e735bda91f2f226c41499d5616e"
"ondevice_hash": "8dae1cda089828c750d1d646337774ccd9432f567ecefde19a06dc7feeda9cd3"
}
]

View File

@ -1,115 +1,106 @@
#!/usr/bin/env python3
import json
import os
import math
import time
import binascii
import requests
import serial
import shutil
import subprocess
from typing import Literal
from openpilot.system.hardware.base import LPABase, LPAError, LPAProfileNotFoundError, Profile
def post(url, payload):
print()
print("POST to", url)
r = requests.post(
url,
data=payload,
verify=False,
headers={
"Content-Type": "application/json",
"X-Admin-Protocol": "gsma/rsp/v2.2.0",
"charset": "utf-8",
"User-Agent": "gsma-rsp-lpad",
},
)
print("resp", r)
print("resp text", repr(r.text))
print()
r.raise_for_status()
class TiciLPA(LPABase):
def __init__(self, interface: Literal['qmi', 'at'] = 'qmi'):
self.env = os.environ.copy()
self.env['LPAC_APDU'] = interface
self.env['QMI_DEVICE'] = '/dev/cdc-wdm0'
self.env['AT_DEVICE'] = '/dev/ttyUSB2'
ret = f"HTTP/1.1 {r.status_code}"
ret += ''.join(f"{k}: {v}" for k, v in r.headers.items() if k != 'Connection')
return ret.encode() + r.content
self.timeout_sec = 45
if shutil.which('lpac') is None:
raise LPAError('lpac not found, must be installed!')
class LPA:
def __init__(self):
self.dev = serial.Serial('/dev/ttyUSB2', baudrate=57600, timeout=1, bytesize=8)
self.dev.reset_input_buffer()
self.dev.reset_output_buffer()
assert "OK" in self.at("AT")
def list_profiles(self) -> list[Profile]:
msgs = self._invoke('profile', 'list')
self._validate_successful(msgs)
return [Profile(
iccid=p['iccid'],
nickname=p['profileNickname'],
enabled=p['profileState'] == 'enabled',
provider=p['serviceProviderName']
) for p in msgs[-1]['payload']['data']]
def at(self, cmd):
print(f"==> {cmd}")
self.dev.write(cmd.encode() + b'\r\n')
def get_active_profile(self) -> Profile | None:
return next((p for p in self.list_profiles() if p.enabled), None)
r = b""
cnt = 0
while b"OK" not in r and b"ERROR" not in r and cnt < 20:
r += self.dev.read(8192).strip()
cnt += 1
r = r.decode()
print(f"<== {repr(r)}")
return r
def delete_profile(self, iccid: str) -> None:
self._validate_profile_exists(iccid)
latest = self.get_active_profile()
if latest is not None and latest.iccid == iccid:
raise LPAError('cannot delete active profile, switch to another profile first')
self._validate_successful(self._invoke('profile', 'delete', iccid))
self._process_notifications()
def download_ota(self, qr):
return self.at(f'AT+QESIM="ota","{qr}"')
def download_profile(self, qr: str, nickname: str | None = None) -> None:
msgs = self._invoke('profile', 'download', '-a', qr)
self._validate_successful(msgs)
new_profile = next((m for m in msgs if m['payload']['message'] == 'es8p_meatadata_parse'), None)
if new_profile is None:
raise LPAError('no new profile found')
if nickname:
self.nickname_profile(new_profile['payload']['data']['iccid'], nickname)
self._process_notifications()
def download(self, qr):
smdp = qr.split('$')[1]
out = self.at(f'AT+QESIM="download","{qr}"')
for _ in range(5):
line = out.split("+QESIM: ")[1].split("\r\n\r\nOK")[0]
def nickname_profile(self, iccid: str, nickname: str) -> None:
self._validate_profile_exists(iccid)
self._validate_successful(self._invoke('profile', 'nickname', iccid, nickname))
parts = [x.strip().strip('"') for x in line.split(',', maxsplit=4)]
print(repr(parts))
trans, ret, url, payloadlen, payload = parts
assert trans == "trans" and ret == "0"
assert len(payload) == int(payloadlen)
def switch_profile(self, iccid: str) -> None:
self._validate_profile_exists(iccid)
latest = self.get_active_profile()
if latest and latest.iccid == iccid:
return
self._validate_successful(self._invoke('profile', 'enable', iccid))
self._process_notifications()
r = post(f"https://{smdp}/{url}", payload)
to_send = binascii.hexlify(r).decode()
def _invoke(self, *cmd: str):
proc = subprocess.Popen(['sudo', '-E', 'lpac'] + list(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.env)
try:
out, err = proc.communicate(timeout=self.timeout_sec)
except subprocess.TimeoutExpired as e:
proc.kill()
raise LPAError(f"lpac {cmd} timed out after {self.timeout_sec} seconds") from e
chunk_len = 1400
for i in range(math.ceil(len(to_send) / chunk_len)):
state = 1 if (i+1)*chunk_len < len(to_send) else 0
data = to_send[i * chunk_len : (i+1)*chunk_len]
out = self.at(f'AT+QESIM="trans",{len(to_send)},{state},{i},{len(data)},"{data}"')
assert "OK" in out
messages = []
for line in out.decode().strip().splitlines():
if line.startswith('{'):
message = json.loads(line)
if '+QESIM:"download",1' in out:
raise Exception("profile install failed")
elif '+QESIM:"download",0' in out:
print("done, successfully loaded")
break
# lpac response format validations
assert 'type' in message, 'expected type in message'
assert message['type'] == 'lpa' or message['type'] == 'progress', 'expected lpa or progress message type'
assert 'payload' in message, 'expected payload in message'
assert 'code' in message['payload'], 'expected code in message payload'
assert 'data' in message['payload'], 'expected data in message payload'
def enable(self, iccid):
self.at(f'AT+QESIM="enable","{iccid}"')
msg_ret_code = message['payload']['code']
if msg_ret_code != 0:
raise LPAError(f"lpac {' '.join(cmd)} failed with code {msg_ret_code}: <{message['payload']['message']}> {message['payload']['data']}")
def disable(self, iccid):
self.at(f'AT+QESIM="disable","{iccid}"')
messages.append(message)
def delete(self, iccid):
self.at(f'AT+QESIM="delete","{iccid}"')
if len(messages) == 0:
raise LPAError(f"lpac {cmd} returned no messages")
def list_profiles(self):
out = self.at('AT+QESIM="list"')
return out.strip().splitlines()[1:]
return messages
def _process_notifications(self) -> None:
"""
Process notifications stored on the eUICC, typically to activate/deactivate the profile with the carrier.
"""
self._validate_successful(self._invoke('notification', 'process', '-a', '-r'))
if __name__ == "__main__":
import sys
def _validate_profile_exists(self, iccid: str) -> None:
if not any(p.iccid == iccid for p in self.list_profiles()):
raise LPAProfileNotFoundError(f'profile {iccid} does not exist')
if "RESTART" in os.environ:
subprocess.check_call("sudo systemctl stop ModemManager", shell=True)
subprocess.check_call("/usr/comma/lte/lte.sh stop_blocking", shell=True)
subprocess.check_call("/usr/comma/lte/lte.sh start", shell=True)
while not os.path.exists('/dev/ttyUSB2'):
time.sleep(1)
time.sleep(3)
lpa = LPA()
print(lpa.list_profiles())
if len(sys.argv) > 1:
lpa.download(sys.argv[1])
print(lpa.list_profiles())
def _validate_successful(self, msgs: list[dict]) -> None:
assert msgs[-1]['payload']['message'] == 'success', 'expected success notification'

View File

@ -9,9 +9,11 @@ from functools import cached_property, lru_cache
from pathlib import Path
from cereal import log
from openpilot.common.util import sudo_read, sudo_write
from openpilot.common.gpio import gpio_set, gpio_init, get_irqs_for_action
from openpilot.system.hardware.base import HardwareBase, ThermalConfig, ThermalZone
from openpilot.system.hardware.base import HardwareBase, LPABase, ThermalConfig, ThermalZone
from openpilot.system.hardware.tici import iwlist
from openpilot.system.hardware.tici.esim import TiciLPA
from openpilot.system.hardware.tici.pins import GPIO
from openpilot.system.hardware.tici.amplifier import Amplifier
@ -61,25 +63,6 @@ MM_MODEM_ACCESS_TECHNOLOGY_UMTS = 1 << 5
MM_MODEM_ACCESS_TECHNOLOGY_LTE = 1 << 14
def sudo_write(val, path):
try:
with open(path, 'w') as f:
f.write(str(val))
except PermissionError:
os.system(f"sudo chmod a+w {path}")
try:
with open(path, 'w') as f:
f.write(str(val))
except PermissionError:
# fallback for debugfs files
os.system(f"sudo su -c 'echo {val} > {path}'")
def sudo_read(path: str) -> str:
try:
return subprocess.check_output(f"sudo cat {path}", shell=True, encoding='utf8').strip()
except Exception:
return ""
def affine_irq(val, action):
irqs = get_irqs_for_action(action)
if len(irqs) == 0:
@ -198,6 +181,9 @@ class Tici(HardwareBase):
'data_connected': modem.Get(MM_MODEM, 'State', dbus_interface=DBUS_PROPS, timeout=TIMEOUT) == MM_MODEM_STATE.CONNECTED,
}
def get_sim_lpa(self) -> LPABase:
return TiciLPA()
def get_imei(self, slot):
if slot != 0:
return ""
@ -297,13 +283,11 @@ class Tici(HardwareBase):
return None
def get_modem_temperatures(self):
if self.get_device_type() == "mici":
return []
timeout = 0.2 # Default timeout is too short
try:
modem = self.get_modem()
temps = modem.Command("AT+QTEMP", math.ceil(timeout), dbus_interface=MM_MODEM, timeout=timeout)
return list(map(int, temps.split(' ')[1].split(',')))
return list(filter(lambda t: t != 255, map(int, temps.split(' ')[1].split(','))))
except Exception:
return []
@ -335,12 +319,20 @@ class Tici(HardwareBase):
return ThermalConfig(cpu=[ThermalZone(f"cpu{i}-silver-usr") for i in range(4)] +
[ThermalZone(f"cpu{i}-gold-usr") for i in range(4)],
gpu=[ThermalZone("gpu0-usr"), ThermalZone("gpu1-usr")],
dsp=ThermalZone("compute-hvx-usr"),
memory=ThermalZone("ddr-usr"),
pmic=[ThermalZone("pm8998_tz"), ThermalZone("pm8005_tz")],
intake=intake,
exhaust=exhaust,
case=case)
def set_display_power(self, on):
try:
with open("/sys/class/backlight/panel0-backlight/bl_power", "w") as f:
f.write("0" if on else "4")
except Exception:
pass
def set_screen_brightness(self, percentage):
try:
with open("/sys/class/backlight/panel0-backlight/max_brightness") as f:

View File

@ -89,7 +89,7 @@ class Uploader:
def list_upload_files(self, metered: bool) -> Iterator[tuple[str, str, str]]:
r = self.params.get("AthenadRecentlyViewedRoutes", encoding="utf8")
requested_routes = [] if r is None else r.split(",")
requested_routes = [] if r is None else [route for route in r.split(",") if route]
for logdir in listdir_by_creation(self.root):
path = os.path.join(self.root, logdir)

View File

@ -1,16 +1,17 @@
import os
import errno
import xattr
_cached_attributes: dict[tuple, bytes | None] = {}
def getxattr(path: str, attr_name: str) -> bytes | None:
key = (path, attr_name)
if key not in _cached_attributes:
try:
response = os.getxattr(path, attr_name)
response = xattr.getxattr(path, attr_name)
except OSError as e:
# ENODATA means attribute hasn't been set
if e.errno == errno.ENODATA:
# ENODATA (Linux) or ENOATTR (macOS) means attribute hasn't been set
if e.errno == errno.ENODATA or (hasattr(errno, 'ENOATTR') and e.errno == errno.ENOATTR):
response = None
else:
raise
@ -19,4 +20,4 @@ def getxattr(path: str, attr_name: str) -> bytes | None:
def setxattr(path: str, attr_name: str, attr_value: bytes) -> None:
_cached_attributes.pop((path, attr_name), None)
return os.setxattr(path, attr_name, attr_value)
xattr.setxattr(path, attr_name, attr_value)

View File

@ -40,6 +40,7 @@ def get_default_params():
("LongitudinalPersonalityMax", "3"),
("ShowDebugUI", "0"),
("ShowTpms", "1"),
("ShowDateTime", "1"),
("ShowPathEnd", "1"),
("ShowCustomBrightness", "100"),
@ -101,6 +102,7 @@ def get_default_params():
("MyDrivingMode", "3"),
("MyDrivingModeAuto", "0"),
("TrafficLightDetectMode", "2"),
("CruiseMaxVals0", "160"),
("CruiseMaxVals1", "200"),
("CruiseMaxVals2", "160"),
("CruiseMaxVals3", "130"),
@ -156,6 +158,7 @@ def get_default_params():
("SteerActuatorDelay", "0"),
("MaxTimeOffroadMin", "60"),
("DisableDM", "0"),
("EnableConnect", "0"),
("MuteDoor", "0"),
("MuteSeatbelt", "0"),
("RecordRoadCam", "0"),

View File

@ -72,6 +72,9 @@ def and_(*fns):
def enable_dm(started, params, CP: car.CarParams) -> bool:
return (started or params.get_bool("IsDriverViewEnabled")) and params.get_int("DisableDM") == 0
def enable_connect(started, params, CP: car.CarParams) -> bool:
return params.get_int("EnableConnect") >= 0
procs = [
DaemonProcess("manage_athenad", "system.athena.manage_athenad", "AthenadPid"),
@ -121,7 +124,7 @@ procs = [
PythonProcess("hardwared", "system.hardware.hardwared", always_run),
PythonProcess("tombstoned", "system.tombstoned", always_run, enabled=not PC),
PythonProcess("updated", "system.updated.updated", enable_updated, enabled=not PC),
#PythonProcess("uploader", "system.loggerd.uploader", always_run),
PythonProcess("uploader", "system.loggerd.uploader", enable_connect),
PythonProcess("statsd", "system.statsd", always_run),
# debug procs

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import numpy as np
from functools import cache
import threading
from cereal import messaging
from openpilot.common.realtime import Ratekeeper
@ -52,12 +53,18 @@ class Mic:
self.sound_pressure_weighted = 0
self.sound_pressure_level_weighted = 0
def update(self):
msg = messaging.new_message('microphone', valid=True)
msg.microphone.soundPressure = float(self.sound_pressure)
msg.microphone.soundPressureWeighted = float(self.sound_pressure_weighted)
self.lock = threading.Lock()
msg.microphone.soundPressureWeightedDb = float(self.sound_pressure_level_weighted)
def update(self):
with self.lock:
sound_pressure = self.sound_pressure
sound_pressure_weighted = self.sound_pressure_weighted
sound_pressure_level_weighted = self.sound_pressure_level_weighted
msg = messaging.new_message('microphone', valid=True)
msg.microphone.soundPressure = float(sound_pressure)
msg.microphone.soundPressureWeighted = float(sound_pressure_weighted)
msg.microphone.soundPressureWeightedDb = float(sound_pressure_level_weighted)
self.pm.send('microphone', msg)
self.rk.keep_time()
@ -69,7 +76,7 @@ class Mic:
Logged A-weighted equivalents are rough approximations of the human-perceived loudness.
"""
with self.lock:
self.measurements = np.concatenate((self.measurements, indata[:, 0]))
while self.measurements.size >= FFT_SAMPLES:

View File

@ -9,7 +9,6 @@ import signal
import fcntl
import time
import threading
import gc
from collections import defaultdict
from pathlib import Path
@ -72,12 +71,8 @@ def read_time_from_param(params, param) -> datetime.datetime | None:
pass
return None
def run_org(cmd: list[str], cwd: str = None) -> str:
return subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT, encoding='utf8')
def run(cmd: list[str], cwd: str = None) -> str:
proc = subprocess.run(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8', check=True)
return proc.stdout
return subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT, encoding='utf8')
def set_consistent_flag(consistent: bool) -> None:
@ -497,9 +492,6 @@ def main() -> None:
cloudlog.exception("uncaught updated exception, shouldn't happen")
exception = str(e)
OVERLAY_INIT.unlink(missing_ok=True)
finally:
gc.collect()
try:
params.put("UpdaterState", "idle")

View File

@ -21,11 +21,11 @@ tinygrad: For something between [PyTorch](https://github.com/pytorch/pytorch) an
---
This may not be the best deep learning framework, but it is a deep learning framework.
Despite tinygrad's size, it is a fully featured deep learning framework.
Due to its extreme simplicity, it aims to be the easiest framework to add new accelerators to, with support for both inference and training. If XLA is CISC, tinygrad is RISC.
Due to its extreme simplicity, it is the easiest framework to add new accelerators to, with support for both inference and training. If XLA is CISC, tinygrad is RISC.
tinygrad is still alpha software, but we [raised some money](https://geohot.github.io/blog/jekyll/update/2023/05/24/the-tiny-corp-raised-5M.html) to make it good. Someday, we will tape out chips.
tinygrad is now beta software, we [raised some money](https://geohot.github.io/blog/jekyll/update/2023/05/24/the-tiny-corp-raised-5M.html) to make it good. Someday, we will tape out chips.
## Features

View File

@ -35,7 +35,7 @@ def _try_dlopen_$name():
for candidate in PATHS_TO_TRY:
try: return ctypes.CDLL(candidate)
except OSError: pass
raise RuntimeError("library $name not found")
return None
EOF
}
@ -78,11 +78,11 @@ generate_kfd() {
clang2py /usr/include/linux/kfd_ioctl.h -o $BASE/kfd.py -k cdefstum
fixup $BASE/kfd.py
sed -i "s\import ctypes\import ctypes, os\g" $BASE/kfd.py
sed -i "s\import fcntl, functools\import functools" $BASE/kfd.py
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import HWInterface\g" $BASE/kfd.py
sed -i "s\def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, **kwargs):\def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, **kwargs):\g" $BASE/kfd.py
sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\__fd.ioctl((__idir<<30)\g" $BASE/kfd.py
sed -i "s/import ctypes/import ctypes, os/g" $BASE/kfd.py
sed -i "s/import fcntl, functools/import functools/g" $BASE/kfd.py
sed -i "/import functools/a from tinygrad.runtime.support.hcq import FileIOInterface" $BASE/kfd.py
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, \*\*kwargs):/g" $BASE/kfd.py
sed -i "s/fcntl.ioctl(__fd, (__idir<<30)/__fd.ioctl((__idir<<30)/g" $BASE/kfd.py
python3 -c "import tinygrad.runtime.autogen.kfd"
}
@ -104,10 +104,10 @@ generate_nvrtc() {
}
generate_nv() {
NVKERN_COMMIT_HASH=d6b75a34094b0f56c2ccadf14e5d0bd515ed1ab6
NVKERN_COMMIT_HASH=81fe4fb417c8ac3b9bdcc1d56827d116743892a5
NVKERN_SRC=/tmp/open-gpu-kernel-modules-$NVKERN_COMMIT_HASH
if [ ! -d "$NVKERN_SRC" ]; then
git clone https://github.com/tinygrad/open-gpu-kernel-modules $NVKERN_SRC
git clone https://github.com/NVIDIA/open-gpu-kernel-modules $NVKERN_SRC
pushd .
cd $NVKERN_SRC
git reset --hard $NVKERN_COMMIT_HASH
@ -116,15 +116,19 @@ generate_nv() {
clang2py -k cdefstum \
extra/nv_gpu_driver/clc6c0qmd.h \
extra/nv_gpu_driver/clcec0qmd.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl0080.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl2080_notification.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc56f.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc56f.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc56f.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc86f.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc96f.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc761.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl83de.h \
$NVKERN_SRC/src/nvidia/generated/g_allclasses.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc6c0.h \
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clcdc0.h \
$NVKERN_SRC/kernel-open/nvidia-uvm/clc6b5.h \
$NVKERN_SRC/kernel-open/nvidia-uvm/clc9b5.h \
$NVKERN_SRC/kernel-open/nvidia-uvm/uvm_ioctl.h \
$NVKERN_SRC/kernel-open/nvidia-uvm/uvm_linux_ioctl.h \
$NVKERN_SRC/kernel-open/nvidia-uvm/hwref/ampere/ga100/dev_fault.h \
@ -149,6 +153,7 @@ generate_nv() {
sed -i "s\import ctypes\import ctypes, os\g" $BASE/nv_gpu.py
sed -i 's/#\?\s\([A-Za-z0-9_]\+\) = MW ( \([0-9]\+\) : \([0-9]\+\) )/\1 = (\2 , \3)/' $BASE/nv_gpu.py # NVC6C0_QMDV03_00 processing
sed -i 's/#\sdef NVC6C0_QMD\([A-Za-z0-9_()]\+\):/def NVC6C0_QMD\1:/' $BASE/nv_gpu.py
sed -i 's/#\sdef NVCEC0_QMD\([A-Za-z0-9_()]\+\):/def NVCEC0_QMD\1:/' $BASE/nv_gpu.py
sed -i 's/#\s*return MW(\([0-9i()*+]\+\):\([0-9i()*+]\+\))/ return (\1 , \2)/' $BASE/nv_gpu.py
sed -i 's/#\?\s*\(.*\)\s*=\s*\(NV\)\?BIT\(32\)\?\s*(\s*\([0-9]\+\)\s*)/\1 = (1 << \4)/' $BASE/nv_gpu.py # name = BIT(x) -> name = (1 << x)
sed -i "s/UVM_\([A-Za-z0-9_]\+\) = \['i', '(', '\([0-9]\+\)', ')'\]/UVM_\1 = \2/" $BASE/nv_gpu.py # UVM_name = ['i', '(', '<num>', ')'] -> UVM_name = <num>
@ -287,7 +292,7 @@ generate_vfio() {
fixup $BASE/vfio.py
sed -i "s\import ctypes\import ctypes, os\g" $BASE/vfio.py
sed -i "s\import fcntl, functools\import functools" $BASE/vfio.py
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import HWInterface\g" $BASE/vfio.py
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import FileIOInterface\g" $BASE/vfio.py
sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\return __fd.ioctl((__idir<<30)\g" $BASE/vfio.py
}
@ -349,50 +354,6 @@ generate_am() {
-o $BASE/am/soc24.py
fixup $BASE/am/soc24.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mp/mp_13_0_0_offset.h \
$AMKERN_INC/asic_reg/mp/mp_13_0_0_sh_mask.h \
-o $BASE/am/mp_13_0_0.py
fixup $BASE/am/mp_13_0_0.py
# 14_0_3 reuses 14_0_2
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mp/mp_14_0_2_offset.h \
$AMKERN_INC/asic_reg/mp/mp_14_0_2_sh_mask.h \
-o $BASE/am/mp_14_0_3.py
fixup $BASE/am/mp_14_0_3.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mp/mp_11_0_offset.h \
$AMKERN_INC/asic_reg/mp/mp_11_0_sh_mask.h \
-o $BASE/am/mp_11_0.py
fixup $BASE/am/mp_11_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/gc/gc_9_4_3_offset.h \
$AMKERN_INC/asic_reg/gc/gc_9_4_3_sh_mask.h \
extra/amdpci/overlay/gc_9_4_3.h \
-o $BASE/am/gc_9_4_3.py
fixup $BASE/am/gc_9_4_3.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/gc/gc_10_3_0_offset.h \
$AMKERN_INC/asic_reg/gc/gc_10_3_0_sh_mask.h \
-o $BASE/am/gc_10_3_0.py
fixup $BASE/am/gc_10_3_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/gc/gc_11_0_0_offset.h \
$AMKERN_INC/asic_reg/gc/gc_11_0_0_sh_mask.h \
-o $BASE/am/gc_11_0_0.py
fixup $BASE/am/gc_11_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/gc/gc_12_0_0_offset.h \
$AMKERN_INC/asic_reg/gc/gc_12_0_0_sh_mask.h \
-o $BASE/am/gc_12_0_0.py
fixup $BASE/am/gc_12_0_0.py
clang2py -k cdefstum \
extra/hip_gpu_driver/sdma_registers.h \
$AMKERN_AMD/amdgpu/vega10_sdma_pkt_open.h \
@ -414,66 +375,6 @@ generate_am() {
-o $BASE/am/sdma_6_0_0.py
fixup $BASE/am/sdma_6_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_0_offset.h \
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_0_sh_mask.h \
-o $BASE/am/mmhub_3_0_0.py
fixup $BASE/am/mmhub_3_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_2_offset.h \
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_2_sh_mask.h \
-o $BASE/am/mmhub_3_0_2.py
fixup $BASE/am/mmhub_3_0_2.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbio/nbio_2_3_offset.h \
$AMKERN_INC/asic_reg/nbio/nbio_2_3_sh_mask.h \
-o $BASE/am/nbio_2_3_0.py
fixup $BASE/am/nbio_2_3_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mmhub/mmhub_4_1_0_offset.h \
$AMKERN_INC/asic_reg/mmhub/mmhub_4_1_0_sh_mask.h \
-o $BASE/am/mmhub_4_1_0.py
fixup $BASE/am/mmhub_4_1_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbio/nbio_4_3_0_offset.h \
$AMKERN_INC/asic_reg/nbio/nbio_4_3_0_sh_mask.h \
-o $BASE/am/nbio_4_3_0.py
fixup $BASE/am/nbio_4_3_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbif/nbif_6_3_1_offset.h \
$AMKERN_INC/asic_reg/nbif/nbif_6_3_1_sh_mask.h \
-o $BASE/am/nbif_6_3_1.py
fixup $BASE/am/nbif_6_3_1.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbio/nbio_7_9_0_offset.h \
$AMKERN_INC/asic_reg/nbio/nbio_7_9_0_sh_mask.h \
-o $BASE/am/nbio_7_9_0.py
fixup $BASE/am/nbio_7_9_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbio/nbio_7_11_0_offset.h \
$AMKERN_INC/asic_reg/nbio/nbio_7_11_0_sh_mask.h \
-o $BASE/am/nbio_7_11_0.py
fixup $BASE/am/nbio_7_11_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/oss/osssys_6_0_0_offset.h \
$AMKERN_INC/asic_reg/oss/osssys_6_0_0_sh_mask.h \
-o $BASE/am/osssys_6_0_0.py
fixup $BASE/am/osssys_6_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/oss/osssys_7_0_0_offset.h \
$AMKERN_INC/asic_reg/oss/osssys_7_0_0_sh_mask.h \
-o $BASE/am/osssys_7_0_0.py
fixup $BASE/am/osssys_7_0_0.py
clang2py -k cdefstum \
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h \
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h \
@ -484,24 +385,11 @@ generate_am() {
clang2py -k cdefstum \
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h \
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h \
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h \
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h \
extra/amdpci/headers/amdgpu_smu.h \
--clang-args="-include stdint.h" \
-o $BASE/am/smu_v14_0_3.py
fixup $BASE/am/smu_v14_0_3.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/hdp/hdp_6_0_0_offset.h \
$AMKERN_INC/asic_reg/hdp/hdp_6_0_0_sh_mask.h \
-o $BASE/am/hdp_6_0_0.py
fixup $BASE/am/hdp_6_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/hdp/hdp_7_0_0_offset.h \
$AMKERN_INC/asic_reg/hdp/hdp_7_0_0_sh_mask.h \
-o $BASE/am/hdp_7_0_0.py
fixup $BASE/am/hdp_7_0_0.py
}
generate_sqtt() {
@ -523,6 +411,19 @@ generate_webgpu() {
python3 -c "import tinygrad.runtime.autogen.webgpu"
}
generate_libusb() {
clang2py -k cdefstum \
/usr/include/libusb-1.0/libusb.h \
-o $BASE/libusb.py
fixup $BASE/libusb.py
sed -i "s\import ctypes\import ctypes, os\g" $BASE/libusb.py
sed -i "s/FIXME_STUB/libusb/g" "$BASE/libusb.py"
sed -i "s/libusb_le16_to_cpu = libusb_cpu_to_le16//g" "$BASE/libusb.py"
sed -i "s/FunctionFactoryStub()/None if (lib_path:=os.getenv('LIBUSB_PATH', ctypes.util.find_library('usb-1.0'))) is None else ctypes.CDLL(lib_path)/g" "$BASE/libusb.py"
python3 -c "import tinygrad.runtime.autogen.libusb"
}
if [ "$1" == "opencl" ]; then generate_opencl
elif [ "$1" == "hip" ]; then generate_hip
elif [ "$1" == "comgr" ]; then generate_comgr
@ -543,6 +444,7 @@ elif [ "$1" == "adreno" ]; then generate_adreno
elif [ "$1" == "pci" ]; then generate_pci
elif [ "$1" == "vfio" ]; then generate_vfio
elif [ "$1" == "webgpu" ]; then generate_webgpu
elif [ "$1" == "libusb" ]; then generate_libusb
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_nvrtc; generate_hsa; generate_kfd; generate_nv; generate_amd; generate_io_uring; generate_libc; generate_am; generate_webgpu
else echo "usage: $0 <type>"
fi

View File

@ -1,7 +1,7 @@
# tinygrad is a tensor library, and as a tensor library it has multiple parts
# 1. a "runtime". this allows buffer management, compilation, and running programs
# 2. a "Device" that uses the runtime but specifies compute in an abstract way for all
# 3. a "LazyBuffer" that fuses the compute into kernels, using memory only when needed
# 3. a "UOp" that fuses the compute into kernels, using memory only when needed
# 4. a "Tensor" that provides an easy to use frontend with autograd ".backward()"
@ -39,7 +39,7 @@ DEVICE = "CPU" # NOTE: you can change this!
import struct
from tinygrad.dtype import dtypes
from tinygrad.device import Buffer, Device
from tinygrad.ops import UOp, Ops
from tinygrad.uop.ops import UOp, Ops
from tinygrad.shape.shapetracker import ShapeTracker
# allocate some buffers + load in values
@ -74,30 +74,47 @@ fxn.exec([out, a, b])
assert out.as_buffer().cast('I')[0] == 5
print("******** third, the LazyBuffer ***********")
print("******** third, the UOp ***********")
from tinygrad.engine.realize import run_schedule
from tinygrad.engine.schedule import create_schedule_with_vars
from tinygrad.engine.grouper import get_kernelize_map
# allocate some values + load in values
a = UOp.metaop(Ops.EMPTY, (1,), dtypes.int32, DEVICE)
b = UOp.metaop(Ops.EMPTY, (1,), dtypes.int32, DEVICE)
a = UOp.new_buffer(DEVICE, 1, dtypes.int32)
b = UOp.new_buffer(DEVICE, 1, dtypes.int32)
a.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
b.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
# describe the computation
out = a.alu(Ops.ADD, b)
out = a + b
s = UOp(Ops.SINK, dtypes.void, (out,))
# schedule the computation as a list of kernels
sched, _, becomes_map = create_schedule_with_vars(out.sink())
for si in sched: print(si.ast.op) # NOTE: the first two convert it to CPU
# NOTE: UOps are no longer mutable, the scheduler gives you a map to lookup which BUFFER the result was written to
out = becomes_map[out]
# group the computation into kernels
becomes_map = get_kernelize_map(s)
# the compute maps to an assign
assign = becomes_map[a+b]
# the first source is the output buffer (data)
assert assign.src[0].op is Ops.BUFFER
# the second source is the kernel (compute)
assert assign.src[1].op is Ops.KERNEL
# schedule the kernel graph in a linear list
s = UOp(Ops.SINK, dtypes.void, (assign,))
sched, _ = create_schedule_with_vars(s)
assert len(sched) == 1
# DEBUGGING: print the compute ast
print(sched[-1].ast)
# NOTE: sched[-1].ast is the same as st_0 above
# the output will be stored in a new buffer
out = assign.buf_uop
assert out.op is Ops.BUFFER and not out.buffer.is_allocated()
print(out)
# run that schedule
run_schedule(sched)

View File

@ -1,6 +1,6 @@
# AM Driver
AM driver is a userspace driver targeting AMD's 7900XTX. You only need tinygrad to send compute tasks to your GPU!
AM driver is a userspace driver targeting AMD's RDNA3/RDNA4. You only need tinygrad to send compute tasks to your GPU!
## How to run?
Make sure that amdgpu module is unloaded and just run tinygrad with `AMD=1`!

View File

@ -0,0 +1,109 @@
# Kernel Creation
Tinygrad lazily builds up a graph of Tensor operations. The Tensor graph includes a mix of:
- Buffer and Assignment Ops: `BUFFER`, `BUFFER_VIEW`, `COPY`, `ASSIGN`
- Movement Ops: `RESHAPE`, `EXPAND`, `PERMUTE`, `PAD`, `SHRINK`, `FLIP`
- Compute Ops: `ADD`, `MUL`, `REDUCE_AXIS`, ...
`Tensor.kernelize` creates the kernels and buffers needed to realize the output Tensor(s).
## Kernelize flow
Let's see how a multiply add Tensor graph becomes a fused elementwise kernel.
```py
# initialize 3 input buffers on the device
a = Tensor([1]).realize()
b = Tensor([2]).realize()
c = Tensor([3]).realize()
# create the Tensor graph
mul = a*b
out = mul+c
print(mul) # <Tensor <UOp METAL (1,) int (<Ops.MUL: 48>, None)> on METAL with grad None>
print(out) # <Tensor <UOp METAL (1,) int (<Ops.ADD: 52>, None)> on METAL with grad None>
out.kernelize()
print(mul) # <Tensor <UOp METAL (1,) int (<Ops.MUL: 48>, None)> on METAL with grad None>
print(out) # <Tensor <UOp METAL (1,) int (<Ops.ASSIGN: 66>, None)> on METAL with grad None>
```
The multiply Tensor stays the same because it is fused. The output Tensor's UOp becomes a new ASSIGN UOp:
```py
print(out.lazydata)
```
The first source is the output BUFFER:
```
UOp(Ops.BUFFER, dtypes.int, arg=1, src=(
UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),
UOp(Ops.UNIQUE, dtypes.void, arg=6, src=()),))
```
And the second source is the KERNEL and its 4 buffer edges (output_buffer, a, b, c):
```
UOp(Ops.KERNEL, dtypes.void, arg=<Kernel 12 SINK(<Ops.STORE: 45>,) (__add__, __mul__)>, src=(
UOp(Ops.BUFFER, dtypes.int, arg=1, src=(
x1:=UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),
UOp(Ops.UNIQUE, dtypes.void, arg=6, src=()),)),
UOp(Ops.BUFFER, dtypes.int, arg=1, src=(
x1,
UOp(Ops.UNIQUE, dtypes.void, arg=1, src=()),)),
UOp(Ops.BUFFER, dtypes.int, arg=1, src=(
x1,
UOp(Ops.UNIQUE, dtypes.void, arg=3, src=()),)),
UOp(Ops.BUFFER, dtypes.int, arg=1, src=(
x1,
UOp(Ops.UNIQUE, dtypes.void, arg=5, src=()),)),))
```
KERNEL describes the compute AST, metadata and memory dependencies.
BUFFER holds a reference to the device memory where the output will be stored.
Once a Tensor is kernelized, all children will LOAD its BUFFER, instead of fusing it:
```py
child = out+2
child.kernelize()
print(child.lazydata.src[1].arg.ast)
```
```
UOp(Ops.SINK, dtypes.void, arg=None, src=(
UOp(Ops.STORE, dtypes.void, arg=None, src=(
UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(1), arg=0, src=()),
x2:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1,), strides=(0,), offset=0, mask=None, contiguous=True),)), src=()),
UOp(Ops.ADD, dtypes.int, arg=None, src=(
UOp(Ops.LOAD, dtypes.int, arg=None, src=(
UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(1), arg=1, src=()),
x2,)),
UOp(Ops.CONST, dtypes.int, arg=2, src=(
x2,)),)),)),))
```
`Tensor.realize` will execute the kernels and write outputs to memory:
```py
Tensor.realize(out)
print(out) # <Tensor <UOp METAL (1,) int (<Ops.BUFFER: 23>, <buf real:True device:METAL size:1 dtype:dtypes.int offset:0>)> on METAL with grad None>
print(out.item()) # 5
```
<hr />
**Summary**
- The large Tensor graph is built from a mix of data, compute and movement Ops.
- `Tensor.kernelize` splits the Tensor graph into data (BUFFER), compute (KERNEL) and links dependencies with ASSIGN.
- `Tensor.realize` executes KERNELs on device and replaces the Tensor graph with just a BUFFER.
- Kernelize can be called multiple times on a Tensor. This allows for incrementally building the kernel fusion layout of a large Tensor graph, without having to call `realize` or `schedule`.

View File

@ -68,4 +68,4 @@ We have a simple framework in tinygrad for adding these ALU blocks and achieving
### Indexing
Indexing determines the address of the memory we need to load. GPUs often have less integer math resources than floating point math, so this can sometimes be the bottleneck. We have a symbolic math engine in our rewrite rules to simplifiy indexing before it's emitted to the kernel. Newer NVIDIA GPUs have a "Tensor Memory Accelerator" to assist with fast indexing, however, this is not supported in tinygrad yet.
Indexing determines the address of the memory we need to load. GPUs often have less integer math resources than floating point math, so this can sometimes be the bottleneck. We have a symbolic math engine in our rewrite rules to simplify indexing before it's emitted to the kernel. Newer NVIDIA GPUs have a "Tensor Memory Accelerator" to assist with fast indexing, however, this is not supported in tinygrad yet.

View File

@ -1,10 +1,10 @@
::: tinygrad.ops.UOp
::: tinygrad.uop.ops.UOp
options:
members: false
members_order: source
show_labels: false
::: tinygrad.ops.Ops
::: tinygrad.uop.ops.Ops
options:
members: true
members_order: source

View File

@ -30,7 +30,7 @@ These control the behavior of core tinygrad even when used as a library.
Variable | Possible Value(s) | Description
---|---|---
DEBUG | [1-6] | enable debugging output, with 4 you get operations, timings, speed, generated code and more
DEBUG | [1-7] | enable debugging output (operations, timings, speed, generated code and more)
GPU | [1] | enable the GPU (OpenCL) backend
CUDA | [1] | enable CUDA backend
AMD | [1] | enable AMD backend
@ -50,3 +50,16 @@ JIT | [0-2] | 0=disabled, 1=[jit enabled](quickstart.md#jit
VIZ | [1] | 0=disabled, 1=[viz enabled](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/viz)
ALLOW_TF32 | [1] | enable TensorFloat-32 tensor cores on Ampere or newer GPUs.
WEBGPU_BACKEND | [WGPUBackendType_Metal, ...] | Force select a backend for WebGPU (Metal, DirectX, OpenGL, Vulkan...)
CUDA_PATH | str | Use `CUDA_PATH/include` for CUDA headers for CUDA and NV backends. If not set, TinyGrad will use `/usr/local/cuda/include`, `/usr/include` and `/opt/cuda/include`.
## Debug breakdown
Variable | Value | Description
---|---|---
DEBUG | >= 1 | Enables debugging and lists devices being used
DEBUG | >= 2 | Provides performance metrics for operations, including timing, memory usage, bandwidth for each kernel execution
DEBUG | >= 3 | Outputs buffers used for each kernel (shape, dtype and strides) and the applied optimizations at a kernel level
DEBUG | >= 4 | Outputs the generated kernel code
DEBUG | >= 5 | Displays the intermediate representation of the computation UOps (AST)
DEBUG | >= 6 | Displays the intermediate representation of the computation UOps in a linearized manner, detailing the operation sequence
DEBUG | >= 7 | Outputs the assembly code generated for the target hardware

View File

@ -42,7 +42,7 @@ There's nothing special about a "Module" class in tinygrad, it's just a normal c
### tinygrad is functional
In tinygrad, you can do [`x.conv2d(w, b)`](tensor/ops.md/#tinygrad.Tensor.conv2d) or [`x.sparse_categorical_cross_entropy(y)`](tensor/ops.md/#tinygrad.Tensor.sparse_categorical_crossentropy). We do also have a [`Conv2D`](nn.md/#tinygrad.nn.Conv2d) class like PyTorch if you want a place to keep the state, but all stateless operations don't have classes.
In tinygrad, you can do [`x.conv2d(w, b)`](tensor/ops.md/#tinygrad.Tensor.conv2d) or [`x.sparse_categorical_crossentropy(y)`](tensor/ops.md/#tinygrad.Tensor.sparse_categorical_crossentropy). We do also have a [`Conv2D`](nn.md/#tinygrad.nn.Conv2d) class like PyTorch if you want a place to keep the state, but all stateless operations don't have classes.
### tinygrad is lazy

View File

@ -5,7 +5,7 @@ tinygrad supports various runtimes, enabling your code to scale across a wide ra
| Runtime | Description | Requirements |
|---------|-------------|--------------|
| [NV](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/runtime/ops_nv.py) | Provides acceleration for NVIDIA GPUs | Ampere/Ada series GPUs |
| [AMD](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/runtime/ops_amd.py) | Provides acceleration for AMD GPUs | RDNA2/RDNA3 series GPUs |
| [AMD](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/runtime/ops_amd.py) | Provides acceleration for AMD GPUs | RDNA2/RDNA3/RDNA4 series GPUs. You can select one of the interfaces for communication by setting `AMD_IFACE=(KFD|PCI)`. See [AMD interfaces](#amd-interfaces) for more details. |
| [QCOM](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/runtime/ops_qcom.py) | Provides acceleration for QCOM GPUs | 6xx series GPUs |
| [METAL](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/runtime/ops_metal.py) | Utilizes Metal for acceleration on Apple devices | M1+ Macs; Metal 3.0+ for `bfloat` support |
| [CUDA](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/runtime/ops_cuda.py) | Utilizes CUDA for acceleration on NVIDIA GPUs | NVIDIA GPU with CUDA support |
@ -64,3 +64,11 @@ rawbuf_ptr = to_mv(cl_buf_desc_ptr, 0x100).cast('Q')[20] # offset 0xA0 is a raw
# create tiny tensor
tiny = Tensor.from_blob(rawbuf_ptr, (h*w*4,), dtype=dtypes.imagef((h,w)), device='QCOM')
```
## AMD Interfaces
AMD backend supports several interfaces for communicating with devices:
* `KFD`: uses the amdgpu driver
* `PCI`: uses the [AM driver](developer/am.md)
You can force an interface by setting `AMD_IFACE` to one of these values. In the case of `AMD_IFACE=PCI`, this may unbind your GPU from the amdgpu driver.

View File

@ -22,6 +22,7 @@
::: tinygrad.Tensor.rand
::: tinygrad.Tensor.rand_like
::: tinygrad.Tensor.randn
::: tinygrad.Tensor.randn_like
::: tinygrad.Tensor.randint
::: tinygrad.Tensor.normal
::: tinygrad.Tensor.uniform

View File

@ -18,6 +18,7 @@
::: tinygrad.Tensor.repeat_interleave
::: tinygrad.Tensor.split
::: tinygrad.Tensor.chunk
::: tinygrad.Tensor.unfold
::: tinygrad.Tensor.meshgrid
::: tinygrad.Tensor.squeeze
::: tinygrad.Tensor.unsqueeze

View File

@ -1,4 +1,4 @@
# model based off https://towardsdatascience.com/going-beyond-99-mnist-handwritten-digits-recognition-cfff96337392
# model based off https://medium.com/data-science/going-beyond-99-mnist-handwritten-digits-recognition-cfff96337392
from typing import List, Callable
from tinygrad import Tensor, TinyJit, nn, GlobalCounters
from tinygrad.helpers import getenv, colored, trange

View File

@ -1,12 +1,12 @@
import sys, onnx, time
from tinygrad import TinyJit, Device, GlobalCounters, fetch, getenv
import sys, onnx, time, pickle
from tinygrad import TinyJit, GlobalCounters, fetch, getenv
from tinygrad.frontend.onnx import OnnxRunner
from extra.onnx_helpers import get_example_inputs, validate
def load_onnx_model(onnx_file):
onnx_model = onnx.load(onnx_file)
run_onnx = OnnxRunner(onnx_model)
run_onnx_jit = TinyJit(lambda **kwargs: next(iter(run_onnx({k:v.to(Device.DEFAULT) for k,v in kwargs.items()}).values())), prune=True)
run_onnx_jit = TinyJit(lambda **kwargs: next(iter(run_onnx({k:v.to(None) for k,v in kwargs.items()}).values())), prune=True, optimize=True)
return run_onnx_jit, run_onnx.graph_inputs
if __name__ == "__main__":

View File

@ -34,8 +34,8 @@ if __name__ == "__main__":
part2 = nn.state.torch_load(fetch("https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/resolve/main/pytorch_model-00002-of-00002.bin?download=true"))
with Timing("weights -> model: "):
nn.state.load_state_dict(model, fix_bf16(convert_from_huggingface(part1, model, 32, 8)), strict=False)
nn.state.load_state_dict(model, fix_bf16(convert_from_huggingface(part2, model, 32, 8)), strict=False)
nn.state.load_state_dict(model, fix_bf16(convert_from_huggingface(part1, 32, 32, 8)), strict=False)
nn.state.load_state_dict(model, fix_bf16(convert_from_huggingface(part2, 32, 32, 8)), strict=False)
if not os.path.isfile("/tmp/tokenizer.model"): create_fixed_tokenizer("/tmp/tokenizer.model")
spp = SentencePieceProcessor(model_file="/tmp/tokenizer.model")

View File

@ -3,10 +3,11 @@ import os, argparse, contextlib
from typing import Optional, Union
with contextlib.suppress(ImportError): import tiktoken
from tinygrad import Tensor, TinyJit, Device, GlobalCounters, Variable, dtypes
from tinygrad.ops import UOp
from tinygrad.uop.ops import UOp
from tinygrad.helpers import Timing, DEBUG, JIT, getenv, fetch, colored, trange
from tinygrad.nn import Embedding, Linear, LayerNorm
from tinygrad.nn.state import gguf_load, torch_load, load_state_dict, get_state_dict
from extra.bench_log import BenchEvent, WallTimeEvent
MAX_CONTEXT = getenv("MAX_CONTEXT", 128)
HALF = getenv("HALF")
@ -134,6 +135,7 @@ class GPT2:
# lm head and wte are tied
weights['lm_head.weight'] = weights['wte.weight']
with WallTimeEvent(BenchEvent.LOAD_WEIGHTS):
load_state_dict(model, weights)
if HALF:
@ -167,6 +169,7 @@ class GPT2:
return key
state_dict = { _remap_gguf_key(k): v for k, v in state_dict.items() }
model = Transformer(**gpt2_params)
with WallTimeEvent(BenchEvent.LOAD_WEIGHTS):
load_state_dict(model, state_dict)
return GPT2(model, tiktoken.get_encoding("gpt2"))
@ -185,11 +188,12 @@ class GPT2:
with Timing("ran model in ", on_exit=(lambda et: (f", {(GlobalCounters.time_sum_s-st)*1e3:.2f} ms on GPU" if DEBUG>=2 else "")+
f", {GlobalCounters.global_ops*1e-9:.2f} GOPS, {GlobalCounters.global_mem*1e-9:.2f} GB"+
(f", {GlobalCounters.global_mem*1e-9/(GlobalCounters.time_sum_s-st):.2f} GB/s" if DEBUG>=2 else "")) if DEBUG else None, enabled=timing):
with WallTimeEvent(BenchEvent.STEP):
if batch_size == 1 and len(toks[0][start_pos:]) == 1:
tokens = Variable("tokens", 0, VOCAB_SIZE).bind(toks[0][start_pos])
else:
tokens = Tensor([x[start_pos:] for x in toks])
tok = self.model(tokens, Variable("start_pos", 1 if start_pos else 0, MAX_CONTEXT).bind(start_pos), temperature).tolist()
tok = self.model(tokens, Variable("start_pos", 1 if start_pos else 0, MAX_CONTEXT-1).bind(start_pos), temperature).tolist()
start_pos = len(toks[0])
for i,t in enumerate(tok): toks[i].append(t)
return [self.tokenizer.decode(x) for x in toks]

View File

@ -3,7 +3,8 @@ from extra.mcts_search import mcts_search
from examples.mlperf.helpers import get_mlperf_bert_model
from tinygrad import Tensor, Device, dtypes, nn
from tinygrad.codegen.kernel import Kernel
from tinygrad.ops import Ops, sym_infer
from tinygrad.codegen.heuristic import hand_coded_optimizations
from tinygrad.uop.ops import Ops, sym_infer
from tinygrad.device import Compiled
from tinygrad.engine.search import beam_search, bufs_from_lin
from tinygrad.helpers import DEBUG, ansilen, getenv, colored, TRACEMETA
@ -83,7 +84,7 @@ if __name__ == "__main__":
# always try hand coded opt
lin = Kernel(si.ast, opts=device.renderer)
lin.hand_coded_optimizations()
lin.apply_opts(hand_coded_optimizations(lin))
lins.append((lin, "HC"))
# maybe try tensor cores

View File

@ -11,6 +11,7 @@ from tinygrad import nn, dtypes, Tensor, Device, GlobalCounters, TinyJit
from tinygrad.nn.state import get_state_dict, get_parameters
from tinygrad.nn import optim
from tinygrad.helpers import Context, BEAM, WINO, getenv, colored, prod
from extra.bench_log import BenchEvent, WallTimeEvent
cifar_mean = [0.4913997551666284, 0.48215855929893703, 0.4465309133731618]
cifar_std = [0.24703225141799082, 0.24348516474564, 0.26158783926049628]
@ -395,6 +396,8 @@ def train_cifar():
if STEPS == 0 or i == STEPS: break
GlobalCounters.reset()
with WallTimeEvent(BenchEvent.STEP):
X, Y = next(batcher)
if len(GPUS) > 1:
X.shard_(GPUS, axis=0)
@ -409,6 +412,7 @@ def train_cifar():
if model_ema is None:
model_ema = modelEMA(W, model)
model_ema.update(model, Tensor([projected_ema_decay_val*(i/STEPS)**hyp['ema']['decay_pow']]))
cl = time.monotonic()
device_str = loss.device if isinstance(loss.device, str) else f"{loss.device[0]} * {len(loss.device)}"
# 53 221.74 ms run, 2.22 ms python, 219.52 ms CL, 803.39 loss, 0.000807 LR, 4.66 GB used, 3042.49 GFLOPS, 674.65 GOPS
@ -424,4 +428,5 @@ def train_cifar():
raise ValueError(colored(f"{eval_acc_pct=} < {target}", "red"))
if __name__ == "__main__":
with WallTimeEvent(BenchEvent.FULL):
train_cifar()

View File

@ -13,6 +13,7 @@ from extra.models.llama import Transformer, convert_from_huggingface, fix_bf16
from sentencepiece import SentencePieceProcessor
import tiktoken, sys
from tiktoken.load import load_tiktoken_bpe
from extra.bench_log import BenchEvent, WallTimeEvent
MAX_CONTEXT = getenv("MAX_CONTEXT", 4096)
@ -206,16 +207,19 @@ class LLaMa:
model = Transformer(**params["args"], linear=linear, max_context=MAX_CONTEXT, jit=bool(JIT))
with WallTimeEvent(BenchEvent.LOAD_WEIGHTS):
if model_path.is_dir():
weights = concat_weights([load(filename) for filename in [f"{model_path}/consolidated.{i:02d}.pth" for i in range(params["files"])]], device[0] if isinstance(device, tuple) else device)
else:
weights = load(str(model_path))
if "model.embed_tokens.weight" in weights:
weights = convert_from_huggingface(weights, model, params["args"]["n_heads"], params["args"].get("n_kv_heads", params["args"]["n_heads"]))
weights = convert_from_huggingface(weights, params["args"]["n_layers"], params["args"]["n_heads"], params["args"].get("n_kv_heads", params["args"]["n_heads"]))
weights = fix_bf16(weights)
with Context(BEAM=0):
# prevent tracking model weights
# this is a part of a larger problem with BUFFER UOps and gc in TRACK_MATCH_STATS=2
with Context(BEAM=0, TRACK_MATCH_STATS=0):
# quantize
if quantize is not None:
weights = linear.quantize(weights, device)
@ -475,6 +479,7 @@ After you are done speaking, output [EOS]. You are not Chad.
next_tok = Tensor([toks[start_pos:]], device=device) if tok_tensor is None or (len(toks)-start_pos) > 1 else tok_tensor.reshape(1, 1)
with Profiling(enabled=args.profile):
with Timing("total ", enabled=args.timing, on_exit=lambda x: f", {1e9/x:.2f} tok/s, {GlobalCounters.global_mem/x:.2f} GB/s, param {param_bytes/x:.2f} GB/s"):
with WallTimeEvent(BenchEvent.STEP):
with Timing("enqueue in ", on_exit=(lambda et: (f", {(GlobalCounters.time_sum_s-st)*1e3:.2f} ms on GPU" if DEBUG>=2 else "")+
f", {GlobalCounters.global_ops*1e-9:.2f} GOPS, {GlobalCounters.global_mem*1e-9:.2f} GB"+
(f", {GlobalCounters.global_mem*1e-9/(GlobalCounters.time_sum_s-st):.2f} GB/s, param {param_bytes*1e-9/(GlobalCounters.time_sum_s-st):.2f} GB/s" if DEBUG>=2 else "")) if DEBUG else None, enabled=args.timing):

View File

@ -7,6 +7,7 @@ from extra.models.llama import Transformer, convert_from_huggingface, convert_fr
from tinygrad.nn.state import safe_load, torch_load, load_state_dict, get_parameters, gguf_load
from tinygrad import Tensor, dtypes, nn, Context, Device, GlobalCounters
from tinygrad.helpers import Profiling, Timing, DEBUG, colored, fetch, tqdm
from extra.bench_log import BenchEvent, WallTimeEvent
class Tokenizer:
pat_str = r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"
@ -126,7 +127,8 @@ def NF4Linear(block_size):
return x.linear(unscaled.reshape(self.out_features, self.in_features).T)
@staticmethod
def quantize(state_dict: dict[str, Tensor], device, scale_dtype=dtypes.float16) -> dict[str, Tensor]:
def quantize(state_dict: dict[str, Tensor], device, scale_dtype=dtypes.float16, quantize_embeds=False) -> dict[str, Tensor]:
assert not quantize_embeds # TODO: support this?
new_state_dict = {}
for k, v in state_dict.items():
if "feed_forward" in k or "attention.w" in k:
@ -165,7 +167,9 @@ def build_transformer(model_path: Path, model_size="8B", quantize=None, scale_dt
model = Transformer(**MODEL_PARAMS[model_size]["args"], linear=linear, embedding=embedding, max_context=max_context, jit=True)
if not load_weights: return model
# load weights
with WallTimeEvent(BenchEvent.LOAD_WEIGHTS):
if model_path.is_dir():
if (model_path / "model.safetensors.index.json").exists(): weights = load(str(model_path / "model.safetensors.index.json"))
elif (model_path / "model.safetensors").exists(): weights = load(str(model_path / "model.safetensors"))
@ -173,9 +177,9 @@ def build_transformer(model_path: Path, model_size="8B", quantize=None, scale_dt
else:
weights = load(str(model_path))
if "model.embed_tokens.weight" in weights:
weights = convert_from_huggingface(weights, model, MODEL_PARAMS[model_size]["args"]["n_heads"], MODEL_PARAMS[model_size]["args"]["n_kv_heads"])
weights = convert_from_huggingface(weights, MODEL_PARAMS[model_size]["args"]["n_layers"], MODEL_PARAMS[model_size]["args"]["n_heads"], MODEL_PARAMS[model_size]["args"]["n_kv_heads"])
elif "token_embd.weight" in weights:
weights = convert_from_gguf(weights, model)
weights = convert_from_gguf(weights, MODEL_PARAMS[model_size]["args"]["n_layers"])
weights = fix_bf16(weights)
with Context(BEAM=0):
@ -434,6 +438,7 @@ if __name__ == "__main__":
st = GlobalCounters.time_sum_s
with Profiling(enabled=args.profile):
with Timing("total ", on_exit=lambda x: f", {1e9/x:.2f} tok/s, {GlobalCounters.global_mem/x:.2f} GB/s, param {param_bytes/x:.2f} GB/s"):
with WallTimeEvent(BenchEvent.STEP):
with Timing("enqueue in ", on_exit=(lambda et: (f", {(GlobalCounters.time_sum_s-st)*1e3:.2f} ms on GPU" if DEBUG>=2 else "")+
f", {GlobalCounters.global_ops*1e-9:.2f} GOPS, {GlobalCounters.global_mem*1e-9:.2f} GB"+
(f", {GlobalCounters.global_mem*1e-9/(GlobalCounters.time_sum_s-st):.2f} GB/s, param {param_bytes*1e-9/(GlobalCounters.time_sum_s-st):.2f} GB/s" if DEBUG>=2 else "")) if DEBUG else None):

View File

@ -7,7 +7,7 @@ from train_gpt2 import GPT, GPTConfig
from tinygrad.helpers import dedup, to_function_name, flatten, getenv, GlobalCounters, ansilen, to_function_name
from tinygrad.engine.realize import get_kernel, run_schedule
from tinygrad.engine.memory import memory_planner
from tinygrad.ops import Ops
from tinygrad.uop.ops import Ops
TIMING = getenv("TIMING")
@ -25,7 +25,7 @@ if __name__ == "__main__":
Tensor.training = True
optimizer = nn.optim.Adam(nn.state.get_parameters(model), lr=1e-4)
warmup_count = getenv("WARMUP", 3)
for i in range(warmup_count): # TODO: why does it take three and not two to stablize
for i in range(warmup_count): # TODO: why does it take three and not two to stabilize
GlobalCounters.reset()
X = Tensor.empty(4, 64, dtype=dtypes.int).reshape(B, T)
Y = Tensor.empty(4, 64, dtype=dtypes.int).reshape(B, T)

View File

@ -99,7 +99,7 @@ class GPT:
def __call__(self, idx:Tensor, targets=None):
b, t = idx.shape
pos = Tensor.arange(0, t)
pos = Tensor.arange(0, t, device=idx.device)
tok_emb = self.wte(idx) # token embeddings of shape (b, t, n_embd)
pos_emb = self.wpe(pos) # position embeddings of shape (t, n_embd)
@ -124,6 +124,7 @@ if __name__ == "__main__":
parser.add_argument("--batch_size", type=int, default=4, help="batch size")
parser.add_argument("--sequence_length", type=int, default=64, help="sequence length")
parser.add_argument("--skip_test", action="store_true", help="skip test")
parser.add_argument("--gpus", type=int, default=1, help="sequence length")
args = parser.parse_args()
B, T = args.batch_size, args.sequence_length
assert 1 <= T <= 1024
@ -131,6 +132,10 @@ if __name__ == "__main__":
model = GPT(GPTConfig(n_layer=12, n_head=12, n_embd=768))
model.load_pretrained()
if args.gpus > 1:
GPUS = tuple(f'{Device.DEFAULT}:{i}' for i in range(args.gpus))
for x in nn.state.get_parameters(model): x.to_(GPUS) # we put a copy of the model on every GPU
# init the tokenizer
enc = tiktoken.get_encoding("gpt2")
encode = lambda s: enc.encode(s, allowed_special={"<|endoftext|>"})
@ -165,23 +170,32 @@ if __name__ == "__main__":
x, y = next(data_iter) # we'll overfit this batch below
optimizer = nn.optim.AdamW(nn.state.get_parameters(model), lr=1e-4, weight_decay=0)
print(f"model state: {sum(x.nbytes() for x in nn.state.get_parameters(model))/1e9:.2f} GB")
print(f"optimizer state: {sum(x.nbytes() for x in nn.state.get_parameters(optimizer))/1e9:.2f} GB")
# shard the data on axis 0
if args.gpus > 1: x, y = x.shard(GPUS, axis=0), y.shard(GPUS, axis=0)
@TinyJit
def step(x, y):
@Tensor.train()
def step(x:Tensor, y:Tensor) -> Tensor:
_, loss = model(x, y)
optimizer.zero_grad()
loss.backward()
return loss.realize(*optimizer.schedule_step())
with Tensor.train():
for i in range(args.num_iterations):
GlobalCounters.reset()
t0 = time.time()
t0 = time.perf_counter()
loss = step(x.contiguous(), y.contiguous())
Device[Device.DEFAULT].synchronize()
t1 = time.time()
print(f"iteration {i}, loss: {loss.item():.6f}, time: {(t1-t0)*1000:.3f}ms, {int(B*T/(t1-t0))} tok/s")
t1 = time.perf_counter()
print(f"iteration {i}, loss: {loss.item():.6f}, time: {(t1-t0)*1000:.3f}ms, {int(B*T/(t1-t0))} tok/s, {GlobalCounters.global_mem/1e9:.2f} GB")
if not args.skip_test:
# copy back to single gpu for test
if args.gpus > 1:
for x in nn.state.get_parameters(model): x.to_(Device.DEFAULT)
start = "<|endoftext|>"
start_ids = encode(start)
x = (Tensor(start_ids)[None, ...])

View File

@ -0,0 +1,157 @@
# much taken from https://github.com/cloneofsimo/minRF
from tinygrad import Tensor, nn, GlobalCounters, TinyJit
from tinygrad.helpers import getenv, trange
from extra.models.llama import Attention, FeedForward, precompute_freqs_cis
def modulate(x:Tensor, shift:Tensor, scale:Tensor) -> Tensor: return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
# TODO: why doesn't the TimestepEmbedder from minRF work?
class TimestepEmbedder:
def __init__(self, hidden_size): self.mlp = [nn.Linear(1, hidden_size), Tensor.silu, nn.Linear(hidden_size, hidden_size)]
def __call__(self, t:Tensor): return t.reshape(-1, 1).sequential(self.mlp)
class TransformerBlock:
def __init__(self, dim, n_heads, norm_eps=1e-5):
self.attention = Attention(dim, n_heads)
self.feed_forward = FeedForward(dim, 4*dim)
self.attention_norm = nn.LayerNorm(dim, eps=norm_eps)
self.ffn_norm = nn.LayerNorm(dim, eps=norm_eps)
self.adaLN_modulation = nn.Linear(dim, 6 * dim, bias=True)
def __call__(self, x:Tensor, freqs_cis:Tensor, adaln_input:Tensor):
shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.adaLN_modulation(adaln_input.silu()).chunk(6, dim=1)
x = x + gate_msa.unsqueeze(1) * self.attention(modulate(self.attention_norm(x), shift_msa, scale_msa), 0, freqs_cis)
x = x + gate_mlp.unsqueeze(1) * self.feed_forward(modulate(self.ffn_norm(x), shift_mlp, scale_mlp))
return x.contiguous().contiguous_backward()
class FinalLayer:
def __init__(self, dim, patch_size, out_channels):
self.norm_final = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6)
self.linear = nn.Linear(dim, patch_size*patch_size*out_channels, bias=True)
self.adaLN_modulation = nn.Linear(dim, 2 * dim, bias=True)
# init weights/bias to 0
self.linear.weight.replace(self.linear.weight.zeros_like().contiguous())
self.linear.bias.replace(self.linear.bias.zeros_like().contiguous())
def __call__(self, x:Tensor, c:Tensor):
shift, scale = self.adaLN_modulation(c.silu()).chunk(2, dim=1)
x = modulate(self.norm_final(x), shift, scale)
return self.linear(x)
# channels=1, input_size=32, dim=64, n_layers=6, n_heads=4, num_classes=10
class DiT_Llama:
def __init__(self, in_channels=1, dim=64, n_layers=6, n_heads=4, num_classes=10, patch_size=2):
self.patch_size = patch_size
self.out_channels = in_channels
self.num_classes = num_classes
self.init_conv_seq = [
nn.Conv2d(in_channels, dim // 2, kernel_size=5, padding=2, stride=1), Tensor.silu, nn.GroupNorm(32, dim//2),
nn.Conv2d(dim //2, dim // 2, kernel_size=5, padding=2, stride=1), Tensor.silu, nn.GroupNorm(32, dim//2),
]
self.x_embedder = nn.Linear(self.patch_size * self.patch_size * dim // 2, dim, bias=True)
self.t_embedder = TimestepEmbedder(dim)
self.y_embedder = nn.Embedding(num_classes+1, dim)
self.final_layer = FinalLayer(dim, self.patch_size, self.out_channels)
self.freqs_cis = precompute_freqs_cis(dim // n_heads, 4096)
self.layers = [TransformerBlock(dim, n_heads) for _ in range(n_layers)]
def unpatchify(self, x:Tensor):
c, p = self.out_channels, self.patch_size
h = w = int(x.shape[1] ** 0.5)
x = x.reshape(shape=(x.shape[0], h, w, p, p, c))
x = x.rearrange("n h w p q c -> n c h p w q")
return x.reshape(shape=(x.shape[0], c, h * p, h * p))
def patchify(self, x:Tensor):
B, C, H, W = x.shape
x = x.reshape(B, C, H // self.patch_size, self.patch_size, W // self.patch_size, self.patch_size)
x = x.permute(0, 2, 4, 1, 3, 5).flatten(-3).flatten(1, 2)
return x # B <H*W ish> <C*patch_size*patch_size>
def __call__(self, x:Tensor, t:Tensor, y:Tensor) -> Tensor:
x = x.sequential(self.init_conv_seq)
x = self.patchify(x)
x = self.x_embedder(x)
adaln_input = self.t_embedder(t) + self.y_embedder(y)
adaln_input = adaln_input.contiguous()
for layer in self.layers:
x = layer(x, self.freqs_cis[:, :x.size(1)], adaln_input=adaln_input)
x = self.final_layer(x, adaln_input)
return self.unpatchify(x)
def rf(self, x:Tensor, cond:Tensor):
b = x.shape[0]
# self.ln is True
t = Tensor.randn((b,)).sigmoid()
texp = t.view([b, *([1] * len(x.shape[1:]))])
# conditional dropout
dropout_prob = 0.1
cond = (Tensor.rand(cond.shape[0]) < dropout_prob).where(cond.full_like(self.num_classes), cond)
# this is rectified flow
z1 = x.randn_like()
zt = (1 - texp) * x + texp * z1
vtheta = self(zt, t, cond)
# MSE loss
return ((z1 - x) - vtheta).square().mean()
def sample(self, z, cond, null_cond, sample_steps=50, cfg=2.0):
b = z.size(0)
dt = Tensor.full((b,)+(1,)*len(z.shape[1:]), fill_value=1.0/sample_steps).contiguous()
images = [z]
for i in range(sample_steps, 0, -1):
t = Tensor.full((b,), fill_value=i/sample_steps).contiguous()
vc = self(z, t, cond)
vu = self(z, t, null_cond)
vc = vu + cfg * (vc - vu)
z = z - dt * vc
z = z.contiguous()
images.append(z)
return images
def mviz(t:Tensor):
assert len(t.shape) == 4 and t.shape[1] == 1
ft = t.permute(1,2,0,3).reshape(32, -1)
assert ft.shape[-1]%32 == 0
print("")
for y in ((ft+1)/2).clamp(0,1).tolist():
ln = [f"\033[38;5;{232+int(x*23)}m██" for x in y]
print(''.join(ln) + "\033[0m")
if __name__ == "__main__":
X_train, Y_train, X_test, Y_test = nn.datasets.mnist()
X_train = X_train.pad((2,2,2,2))
X_train = ((X_train.float()/255)-0.5)/0.5
Y_train = Y_train.int()
model = DiT_Llama(patch_size=getenv("PATCH_SIZE", 2))
for r in nn.state.get_parameters(model): r.realize()
optimizer = nn.optim.Adam(nn.state.get_parameters(model), lr=5e-4)
@TinyJit
@Tensor.train()
def train_step():
if getenv("OVERFIT"): samples = Tensor.zeros(getenv("BS", 256), dtype='int')
else: samples = Tensor.randint(getenv("BS", 256), high=X_train.shape[0])
optimizer.zero_grad()
loss = model.rf(X_train[samples], Y_train[samples])
loss.backward()
optimizer.step()
return loss
@TinyJit
@Tensor.test()
def sample(z:Tensor, cond:Tensor) -> Tensor:
return model.sample(z, cond, Tensor.full_like(cond, 10), sample_steps=getenv("SAMPLE_STEPS", 20))[-1]
for steps in (t:=trange(getenv("STEPS", 5000))):
if steps%10 == 0: mviz(sample(Tensor.randn(3, 1, 32, 32), Tensor([5,0,4], dtype='int')))
GlobalCounters.reset()
loss = train_step()
t.set_description(f"loss: {loss.item():9.2f}")

View File

@ -3,6 +3,7 @@ from tinygrad import Tensor, nn, Device, GlobalCounters, Variable
from tinygrad.helpers import Timing, Profiling, CI, tqdm
from tinygrad.nn.state import torch_load, get_state_dict
from extra.models.llama import FeedForward, Transformer
from extra.bench_log import BenchEvent, WallTimeEvent
class MixtureFeedForward:
def __init__(self, num_experts:int, dim:int, hidden_dim:int, linear=nn.Linear):
@ -30,6 +31,7 @@ if __name__ == "__main__":
help="Path to the downloaded weights")
args = parser.parse_args()
with WallTimeEvent(BenchEvent.LOAD_WEIGHTS):
state = torch_load(args.weights + "/consolidated.00.pth.b")
model = Transformer(n_layers=32, dim=4096, hidden_dim=14336, n_heads=32, n_kv_heads=8, norm_eps=1e-5, vocab_size=32000, feed_forward=functools.partial(MixtureFeedForward, 8), jit=False)
model_state_dict = get_state_dict(model)
@ -53,6 +55,7 @@ if __name__ == "__main__":
GlobalCounters.reset()
with Profiling(sort="time", frac=0.1, enabled=args.profile):
with Timing("total ", enabled=args.timing, on_exit=lambda x: f", {1e9/x:.2f} tok/sec"):
with WallTimeEvent(BenchEvent.STEP):
tok = model(Tensor([toks[start_pos:]]), 0 if start_pos == 0 else Variable("start_pos", 1, 1024).bind(start_pos), args.temperature).item()
toks.append(tok)
start_pos += 1

View File

@ -5,7 +5,7 @@ from multiprocessing import Queue, Process, shared_memory, connection, Lock, cpu
import numpy as np
from tinygrad import dtypes, Tensor
from tinygrad.helpers import getenv, prod, Context, round_up, tqdm
from tinygrad.helpers import getenv, prod, Context, round_up, tqdm, OSX
### ResNet
@ -129,14 +129,15 @@ def batch_load_resnet(batch_size=64, val=False, shuffle=True, seed=None, pad_fir
q_in, q_out = Queue(), Queue()
sz = (batch_size*BATCH_COUNT, 224, 224, 3)
if os.path.exists("/dev/shm/resnet_X"): os.unlink("/dev/shm/resnet_X")
shm = shared_memory.SharedMemory(name="resnet_X", create=True, size=prod(sz))
shm_name = "resnet_X_val" if val else "resnet_X_train"
if not OSX and os.path.exists(f"/dev/shm/{shm_name}"): os.unlink(f"/dev/shm/{shm_name}")
shm = shared_memory.SharedMemory(name=shm_name, create=True, size=prod(sz))
procs = []
try:
# disk:shm is slower
#X = Tensor.empty(*sz, dtype=dtypes.uint8, device=f"disk:shm:{shm.name}")
X = Tensor.empty(*sz, dtype=dtypes.uint8, device=f"disk:/dev/shm/resnet_X")
if OSX: X = Tensor.empty(*sz, dtype=dtypes.uint8, device=f"disk:shm:{shm.name}")
else: X = Tensor.empty(*sz, dtype=dtypes.uint8, device=f"disk:/dev/shm/{shm_name}")
Y = [None] * (batch_size*BATCH_COUNT)
for _ in range(cpu_count()):
@ -532,7 +533,7 @@ if __name__ == "__main__":
def load_retinanet(val):
from extra.datasets.openimages import BASEDIR, download_dataset
from pycocotools.coco import COCO
dataset = COCO(download_dataset(base_dir:=getenv("BASE_DIR", BASEDIR), "validation" if val else "train"))
dataset = COCO(download_dataset(base_dir:=getenv("BASEDIR", BASEDIR), "validation" if val else "train"))
with tqdm(total=len(dataset.imgs.keys())) as pbar:
for x in batch_load_retinanet(dataset, val, base_dir):
pbar.update(x[0].shape[0])

Some files were not shown because too many files have changed in this diff Show More