KerryGold Model, AGNOS12.4, AdjustLaneChange, EnglighSound (#182)

* Vegetarian Filet o Fish model

* fix.. atc..

* test cluster_speed_limit

* fix.. cluster_speed_limit.. 2

* fix.. clusterspeedlimit3

* cruise speed to roadlimit speed

* fix..

* fix.. eng

* deltaUp/Down for lanechange

* fix.. atc desire...

* fix..

* ff

* ff

* fix..

* fix.. eng

* fix engsound

* Update desire_helper.py

* fix.. connect...

* fix curve_min speed

* Revert "fix curve_min speed"

This reverts commit fcc9c2eb14eb3504abef3e420db93e8882e56f37.

* Reapply "fix curve_min speed"

This reverts commit 2d2bba476c58a7b4e13bac3c3ad0e4694c95515d.

* fix.. auto speed up.. roadlimit

* fix.. atc auto lanechange...

* Update desire_helper.py

* Update cruise.py

* debug atc...

* fix.. waze alert offset..

* fix..

* test atc..

* fix..

* fix.. atc

* atc test..

* fix.. atc

* fix.. atc2

* fix.. atc3

* KerryGold Model.  latsmooth_sec = 0.0

* lat smooth seconds 0.13

* fix comment

* fix.. auto cruise, and speed unit

* change lanemode switching.

* erase mazda lkas button.
This commit is contained in:
carrot 2025-06-22 10:51:42 +09:00 committed by GitHub
parent efee1712aa
commit 9c7833faf9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
385 changed files with 12951 additions and 12621 deletions

View File

@ -236,7 +236,6 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
{"HapticFeedbackWhenSpeedCamera", PERSISTENT}, {"HapticFeedbackWhenSpeedCamera", PERSISTENT},
{"UseLaneLineSpeed", PERSISTENT}, {"UseLaneLineSpeed", PERSISTENT},
{"UseLaneLineCurveSpeed", PERSISTENT}, {"UseLaneLineCurveSpeed", PERSISTENT},
{"UseLaneLineSpeedApply", PERSISTENT},
{"AdjustLaneOffset", PERSISTENT}, {"AdjustLaneOffset", PERSISTENT},
{"LaneChangeNeedTorque", PERSISTENT}, {"LaneChangeNeedTorque", PERSISTENT},
{"LaneChangeDelay", PERSISTENT }, {"LaneChangeDelay", PERSISTENT },
@ -261,6 +260,8 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
{"CustomSteerMax", PERSISTENT}, {"CustomSteerMax", PERSISTENT},
{"CustomSteerDeltaUp", PERSISTENT}, {"CustomSteerDeltaUp", PERSISTENT},
{"CustomSteerDeltaDown", PERSISTENT}, {"CustomSteerDeltaDown", PERSISTENT},
{"CustomSteerDeltaUpLC", PERSISTENT},
{"CustomSteerDeltaDownLC", PERSISTENT},
{"SpeedFromPCM", PERSISTENT}, {"SpeedFromPCM", PERSISTENT},
{"MaxTimeOffroadMin", PERSISTENT}, {"MaxTimeOffroadMin", PERSISTENT},
{"DisableDM", PERSISTENT}, {"DisableDM", PERSISTENT},

View File

@ -7,7 +7,7 @@ export OPENBLAS_NUM_THREADS=1
export VECLIB_MAXIMUM_THREADS=1 export VECLIB_MAXIMUM_THREADS=1
if [ -z "$AGNOS_VERSION" ]; then if [ -z "$AGNOS_VERSION" ]; then
export AGNOS_VERSION="12.3" export AGNOS_VERSION="12.4"
fi fi
export STAGING_ROOT="/data/safe_staging" export STAGING_ROOT="/data/safe_staging"

View File

@ -246,6 +246,7 @@ struct CarState {
speedLimitDistance @65 :Float32; speedLimitDistance @65 :Float32;
gearStep @66 :Int16; gearStep @66 :Int16;
tpms @67 : Tpms; tpms @67 : Tpms;
useLaneLineSpeed @68 : Float32;
struct Tpms { struct Tpms {
fl @0 :Float32; fl @0 :Float32;

View File

@ -96,6 +96,9 @@ class CarController(CarControllerBase):
self.activeCarrot = 0 self.activeCarrot = 0
self.camera_scc_params = Params().get_int("HyundaiCameraSCC") self.camera_scc_params = Params().get_int("HyundaiCameraSCC")
self.steerDeltaUpOrg = self.steerDeltaUp = self.steerDeltaUpLC = self.params.STEER_DELTA_UP
self.steerDeltaDownOrg = self.steerDeltaDown = self.steerDeltaDownLC = self.params.STEER_DELTA_DOWN
def update(self, CC, CS, now_nanos): def update(self, CC, CS, now_nanos):
if self.frame % 50 == 0: if self.frame % 50 == 0:
@ -104,14 +107,30 @@ class CarController(CarControllerBase):
steerMax = params.get_int("CustomSteerMax") steerMax = params.get_int("CustomSteerMax")
steerDeltaUp = params.get_int("CustomSteerDeltaUp") steerDeltaUp = params.get_int("CustomSteerDeltaUp")
steerDeltaDown = params.get_int("CustomSteerDeltaDown") steerDeltaDown = params.get_int("CustomSteerDeltaDown")
steerDeltaUpLC = params.get_int("CustomSteerDeltaUpLC")
steerDeltaDownLC = params.get_int("CustomSteerDeltaDownLC")
if steerMax > 0: if steerMax > 0:
self.params.STEER_MAX = steerMax self.params.STEER_MAX = steerMax
if steerDeltaUp > 0: if steerDeltaUp > 0:
self.params.STEER_DELTA_UP = steerDeltaUp self.steerDeltaUp = steerDeltaUp
#self.params.ANGLE_TORQUE_UP_RATE = steerDeltaUp #self.params.ANGLE_TORQUE_UP_RATE = steerDeltaUp
else:
self.steerDeltaUp = self.steerDeltaUpOrg
if steerDeltaDown > 0: if steerDeltaDown > 0:
self.params.STEER_DELTA_DOWN = steerDeltaDown self.steerDeltaDown = steerDeltaDown
#self.params.ANGLE_TORQUE_DOWN_RATE = steerDeltaDown #self.params.ANGLE_TORQUE_DOWN_RATE = steerDeltaDown
else:
self.steerDeltaDown = self.steerDeltaDownOrg
if steerDeltaUpLC > 0:
self.steerDeltaUpLC = steerDeltaUpLC
else:
self.steerDeltaUpLC = self.steerDeltaUp
if steerDeltaDownLC > 0:
self.steerDeltaDownLC = steerDeltaDownLC
else:
self.steerDeltaDownLC = self.steerDeltaDown
self.soft_hold_mode = 1 if params.get_int("AutoCruiseControl") > 1 else 2 self.soft_hold_mode = 1 if params.get_int("AutoCruiseControl") > 1 else 2
self.hapticFeedbackWhenSpeedCamera = int(params.get_int("HapticFeedbackWhenSpeedCamera")) self.hapticFeedbackWhenSpeedCamera = int(params.get_int("HapticFeedbackWhenSpeedCamera"))
@ -126,6 +145,13 @@ class CarController(CarControllerBase):
actuators = CC.actuators actuators = CC.actuators
hud_control = CC.hudControl hud_control = CC.hudControl
if hud_control.modelDesire in [3,4]:
self.params.STEER_DELTA_UP = self.steerDeltaUpLC
self.params.STEER_DELTA_DOWN = self.steerDeltaDownLC
else:
self.params.STEER_DELTA_UP = self.steerDeltaUp
self.params.STEER_DELTA_DOWN = self.steerDeltaDown
angle_control = self.CP.flags & HyundaiFlags.ANGLE_CONTROL angle_control = self.CP.flags & HyundaiFlags.ANGLE_CONTROL
# steering torque # steering torque

View File

@ -76,6 +76,7 @@ class CarState(CarStateBase):
self.cruise_buttons_msg = None self.cruise_buttons_msg = None
self.hda2_lfa_block_msg = None self.hda2_lfa_block_msg = None
self.cluster_speed_limit_msg = None
# On some cars, CLU15->CF_Clu_VehicleSpeed can oscillate faster than the dash updates. Sample at 5 Hz # On some cars, CLU15->CF_Clu_VehicleSpeed can oscillate faster than the dash updates. Sample at 5 Hz
self.cluster_speed = 0 self.cluster_speed = 0
@ -461,6 +462,9 @@ class CarState(CarStateBase):
if "TCS" in cp.vl: if "TCS" in cp.vl:
self.tcs_info_373 = copy.copy(cp.vl.get("TCS", {})) self.tcs_info_373 = copy.copy(cp.vl.get("TCS", {}))
if "CLUSTER_SPEED_LIMIT" in cp.vl:
self.cluster_speed_limit_msg = copy.copy(cp.vl.get("CLUSTER_SPEED_LIMIT", {}))
if "GEAR" in cp.vl: if "GEAR" in cp.vl:
ret.gearStep = cp.vl["GEAR"]["GEAR_STEP"] ret.gearStep = cp.vl["GEAR"]["GEAR_STEP"]
elif "GEAR_ALT" in cp.vl: elif "GEAR_ALT" in cp.vl:
@ -596,6 +600,8 @@ class CarState(CarStateBase):
# 어떤차는 bus2에 있음, 내차는 bus0에 있는데.... 이건 옆두부와 관련이 없나? # 어떤차는 bus2에 있음, 내차는 bus0에 있는데.... 이건 옆두부와 관련이 없나?
#if CP.flags & HyundaiFlags.CANFD_HDA2: #if CP.flags & HyundaiFlags.CANFD_HDA2:
# pt_messages.append(("CLUSTER_SPEED_LIMIT", 10)) # pt_messages.append(("CLUSTER_SPEED_LIMIT", 10))
if Params().get_int("CanfdDebug") > 0:
pt_messages.append(("CLUSTER_SPEED_LIMIT", 10))
cam_messages = [] cam_messages = []
if CP.flags & HyundaiFlags.CANFD_HDA2 and not (CP.flags & HyundaiFlags.CAMERA_SCC.value): if CP.flags & HyundaiFlags.CANFD_HDA2 and not (CP.flags & HyundaiFlags.CAMERA_SCC.value):

View File

@ -598,8 +598,13 @@ def create_ccnc_messages(CP, packer, CAN, frame, CC, CS, hud_control, disp_angle
# ADAS 콤마연결하면.. 0번에서.. (카메라혹은 다른곳에서) # ADAS 콤마연결하면.. 0번에서.. (카메라혹은 다른곳에서)
# 카메라 콤마연결+롱컨개조 하면.. 2번에서 데이터가 나옴..(카메라혹은 ADAS) # 카메라 콤마연결+롱컨개조 하면.. 2번에서 데이터가 나옴..(카메라혹은 ADAS)
if frame % 10 == 0: if frame % 10 == 0:
if CS.cluster_speed_limit_msg is not None:
pass values = CS.cluster_speed_limit_msg
values["SPEED_LIMIT_1"] = 100
values["SPEED_LIMIT_2"] = 100
values["SPEED_LIMIT_3"] = 105
#values["COUNTER"] = (values["COUNTER"] + 1) % 256
ret.append(packer.make_can_msg("CLUSTER_SPEED_LIMIT", CAN.CAM, values))
return ret return ret

View File

@ -141,7 +141,7 @@ class CarState(CarStateBase):
ret.buttonEvents = [ ret.buttonEvents = [
*create_button_events(self.cruise_buttons, self.prev_cruise_buttons, BUTTONS_DICT), *create_button_events(self.cruise_buttons, self.prev_cruise_buttons, BUTTONS_DICT),
*create_button_events(self.distance_button, self.prev_distance_button, {1: ButtonType.gapAdjustCruise}), *create_button_events(self.distance_button, self.prev_distance_button, {1: ButtonType.gapAdjustCruise}),
*create_button_events(self.lkas_enabled, self.lkas_previously_enabled, {1: ButtonType.lfaButton}), #*create_button_events(self.lkas_enabled, self.lkas_previously_enabled, {1: ButtonType.lfaButton}),
] ]
return ret return ret

View File

@ -81,7 +81,7 @@ const CanMsg HYUNDAI_CANFD_HDA2_LONG_TX_MSGS[] = {
{203, 0, 24}, // CB {203, 0, 24}, // CB
{373, 2, 24}, // TCS(0x175) {373, 2, 24}, // TCS(0x175)
//{506, 2, 32}, // CLUSTER_SPEED_LIMIT {506, 2, 32}, // CLUSTER_SPEED_LIMIT
{234, 2, 24}, // MDPS {234, 2, 24}, // MDPS
{687, 2, 8}, // STEER_TOUCH_2AF {687, 2, 8}, // STEER_TOUCH_2AF
}; };

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -219,6 +219,7 @@ class Car:
CS.softHoldActive = self.v_cruise_helper._soft_hold_active CS.softHoldActive = self.v_cruise_helper._soft_hold_active
CS.activateCruise = self.v_cruise_helper._activate_cruise CS.activateCruise = self.v_cruise_helper._activate_cruise
CS.latEnabled = self.v_cruise_helper._lat_enabled CS.latEnabled = self.v_cruise_helper._lat_enabled
CS.useLaneLineSpeed = self.v_cruise_helper.useLaneLineSpeedApply
self.CI.CS.softHoldActive = CS.softHoldActive self.CI.CS.softHoldActive = CS.softHoldActive
return CS, RD return CS, RD

View File

@ -218,7 +218,7 @@ class VCruiseCarrot:
self.AutoSpeedUptoRoadSpeedLimit = 0.0 self.AutoSpeedUptoRoadSpeedLimit = 0.0
self.useLaneLineSpeed = self.params.get_int("UseLaneLineSpeed") self.useLaneLineSpeed = self.params.get_int("UseLaneLineSpeed")
self.params.put_int("UseLaneLineSpeedApply", self.useLaneLineSpeed) self.useLaneLineSpeedApply = self.useLaneLineSpeed
@property @property
@ -237,16 +237,19 @@ class VCruiseCarrot:
self._log_timer = self._log_timeout self._log_timer = self._log_timeout
def update_params(self, is_metric): def update_params(self, is_metric):
unit_factor = 1.0 if is_metric else CV.MPH_TO_KPH
if self.frame % 10 == 0: if self.frame % 10 == 0:
self.autoCruiseControl = self.params.get_int("AutoCruiseControl") self.autoCruiseControl = self.params.get_int("AutoCruiseControl") * unit_factor
self.autoGasTokSpeed = self.params.get_int("AutoGasTokSpeed") self.autoGasTokSpeed = self.params.get_int("AutoGasTokSpeed") * unit_factor
self.autoGasSyncSpeed = self.params.get_bool("AutoGasSyncSpeed") self.autoGasSyncSpeed = self.params.get_bool("AutoGasSyncSpeed") * unit_factor
self.autoSpeedUptoRoadSpeedLimit = self.params.get_float("AutoSpeedUptoRoadSpeedLimit") * 0.01 self.autoSpeedUptoRoadSpeedLimit = self.params.get_float("AutoSpeedUptoRoadSpeedLimit") * 0.01
self.autoRoadSpeedAdjust = self.params.get_float("AutoRoadSpeedAdjust") * 0.01 self.autoRoadSpeedAdjust = self.params.get_float("AutoRoadSpeedAdjust") * 0.01
useLaneLineSpeed = self.params.get_int("UseLaneLineSpeed")
useLaneLineSpeed = self.params.get_int("UseLaneLineSpeed") * unit_factor
if self.useLaneLineSpeed != useLaneLineSpeed: if self.useLaneLineSpeed != useLaneLineSpeed:
self.params.put_int_nonblocking("UseLaneLineSpeedApply", useLaneLineSpeed) self.useLaneLineSpeedApply = useLaneLineSpeed
self.useLaneLineSpeed = useLaneLineSpeed self.useLaneLineSpeed = useLaneLineSpeed
self.speed_from_pcm = self.params.get_int("SpeedFromPCM") self.speed_from_pcm = self.params.get_int("SpeedFromPCM")
self._cruise_speed_unit = self.params.get_int("CruiseSpeedUnit") self._cruise_speed_unit = self.params.get_int("CruiseSpeedUnit")
self._paddle_mode = self.params.get_int("PaddleMode") self._paddle_mode = self.params.get_int("PaddleMode")
@ -255,7 +258,6 @@ class VCruiseCarrot:
self.autoRoadSpeedLimitOffset = self.params.get_int("AutoRoadSpeedLimitOffset") self.autoRoadSpeedLimitOffset = self.params.get_int("AutoRoadSpeedLimitOffset")
self.autoNaviSpeedSafetyFactor = self.params.get_float("AutoNaviSpeedSafetyFactor") * 0.01 self.autoNaviSpeedSafetyFactor = self.params.get_float("AutoNaviSpeedSafetyFactor") * 0.01
self.cruiseOnDist = self.params.get_float("CruiseOnDist") * 0.01 self.cruiseOnDist = self.params.get_float("CruiseOnDist") * 0.01
unit_factor = 1.0 if is_metric else CV.MPH_TO_KPH
cruiseSpeed1 = self.params.get_float("CruiseSpeed1") * unit_factor cruiseSpeed1 = self.params.get_float("CruiseSpeed1") * unit_factor
cruiseSpeed2 = self.params.get_float("CruiseSpeed2") * unit_factor cruiseSpeed2 = self.params.get_float("CruiseSpeed2") * unit_factor
cruiseSpeed3 = self.params.get_float("CruiseSpeed3") * unit_factor cruiseSpeed3 = self.params.get_float("CruiseSpeed3") * unit_factor
@ -552,7 +554,7 @@ class VCruiseCarrot:
self.params.put_int_nonblocking("MyDrivingMode", self.params.get_int("MyDrivingMode") % 4 + 1) # 1,2,3,4 (1:eco, 2:safe, 3:normal, 4:high speed) self.params.put_int_nonblocking("MyDrivingMode", self.params.get_int("MyDrivingMode") % 4 + 1) # 1,2,3,4 (1:eco, 2:safe, 3:normal, 4:high speed)
elif button_type == ButtonType.lfaButton: elif button_type == ButtonType.lfaButton:
useLaneLineSpeed = max(1, self.useLaneLineSpeed) useLaneLineSpeed = max(1, self.useLaneLineSpeed)
self.params.put_int_nonblocking("UseLaneLineSpeedApply", useLaneLineSpeed if self.params.get_int("UseLaneLineSpeedApply") == 0 else 0) self.useLaneLineSpeedApply = useLaneLineSpeed if self.useLaneLineSpeedApply == 0 else 0
elif button_type == ButtonType.cancel: elif button_type == ButtonType.cancel:
self._cruise_cancel_state = True self._cruise_cancel_state = True
@ -594,15 +596,20 @@ class VCruiseCarrot:
return v_cruise_kph return v_cruise_kph
def _auto_speed_up(self, v_cruise_kph): def _auto_speed_up(self, v_cruise_kph):
if self._pause_auto_speed_up: #if self._pause_auto_speed_up:
return v_cruise_kph # return v_cruise_kph
road_limit_kph = self.nRoadLimitSpeed * self.autoSpeedUptoRoadSpeedLimit road_limit_kph = self.nRoadLimitSpeed * self.autoSpeedUptoRoadSpeedLimit
if road_limit_kph < 1.0: if road_limit_kph < 1.0:
return v_cruise_kph return v_cruise_kph
if self.v_lead_kph + 5 > v_cruise_kph and v_cruise_kph < road_limit_kph and self.d_rel < 60: if not self._pause_auto_speed_up and self.v_lead_kph + 5 > v_cruise_kph and v_cruise_kph < road_limit_kph and self.d_rel < 60:
v_cruise_kph = min(v_cruise_kph + 5, road_limit_kph) v_cruise_kph = min(v_cruise_kph + 5, road_limit_kph)
elif self.autoRoadSpeedAdjust < 0 and self.nRoadLimitSpeed != self.nRoadLimitSpeed_last: # 도로제한속도가 바뀌면, 바뀐속도로 속도를 바꿈.
if self.autoRoadSpeedLimitOffset < 0:
v_cruise_kph = self.nRoadLimitSpeed * self.autoNaviSpeedSafetyFactor
else:
v_cruise_kph = self.nRoadLimitSpeed + self.autoRoadSpeedLimitOffset
elif self.nRoadLimitSpeed < self.nRoadLimitSpeed_last and self.autoRoadSpeedAdjust > 0: elif self.nRoadLimitSpeed < self.nRoadLimitSpeed_last and self.autoRoadSpeedAdjust > 0:
new_road_limit_kph = self.nRoadLimitSpeed * self.autoRoadSpeedAdjust + v_cruise_kph * (1 - self.autoRoadSpeedAdjust) new_road_limit_kph = self.nRoadLimitSpeed * self.autoRoadSpeedAdjust + v_cruise_kph * (1 - self.autoRoadSpeedAdjust)
self._add_log(f"AutoSpeed change {v_cruise_kph} -> {new_road_limit_kph}") self._add_log(f"AutoSpeed change {v_cruise_kph} -> {new_road_limit_kph}")
@ -681,11 +688,11 @@ class VCruiseCarrot:
elif self.xState == 3: elif self.xState == 3:
v_cruise_kph = self.v_ego_kph_set v_cruise_kph = self.v_ego_kph_set
self._cruise_control(-1, 3, "Cruise off (traffic sign)") self._cruise_control(-1, 3, "Cruise off (traffic sign)")
elif self.v_ego_kph_set >= 30 and not CC.enabled: elif self.v_ego_kph_set >= self.autoGasTokSpeed and not CC.enabled:
v_cruise_kph = self.v_ego_kph_set v_cruise_kph = self.v_ego_kph_set
self._cruise_control(1, -1 if self.aTarget > 0.0 else 0, "Cruise on (gas pressed)") self._cruise_control(1, -1 if self.aTarget > 0.0 else 0, "Cruise on (gas pressed)")
elif self._brake_pressed_count == -1 and self._soft_hold_active == 0: elif self._brake_pressed_count == -1 and self._soft_hold_active == 0:
if self.v_ego_kph_set > 40: if self.v_ego_kph_set > self.autoGasTokSpeed:
v_cruise_kph = self.v_ego_kph_set v_cruise_kph = self.v_ego_kph_set
self._cruise_control(1, -1 if self.aTarget > 0.0 else 0, "Cruise on (speed)") self._cruise_control(1, -1 if self.aTarget > 0.0 else 0, "Cruise on (speed)")
elif abs(CS.steeringAngleDeg) < 20: elif abs(CS.steeringAngleDeg) < 20:

View File

@ -1561,7 +1561,9 @@ class CarrotServ:
xSpdType = 100 xSpdType = 100
if xSpdType >= 0: if xSpdType >= 0:
self.xSpdLimit = self.nRoadLimitSpeed offset = 5 if self.is_metric else 5 * CV.MPH_TO_KPH
self.xSpdLimit = self.nRoadLimitSpeed + offset
self.xSpdDist = distance self.xSpdDist = distance
self.xSpdType =xSpdType self.xSpdType =xSpdType
@ -1685,11 +1687,12 @@ class CarrotServ:
if self.turnSpeedControlMode in [1,2]: if self.turnSpeedControlMode in [1,2]:
speed_n_sources.append((max(abs(vturn_speed), self.autoCurveSpeedLowerLimit), "vturn")) speed_n_sources.append((max(abs(vturn_speed), self.autoCurveSpeedLowerLimit), "vturn"))
route_speed = max(route_speed * self.mapTurnSpeedFactor, self.autoCurveSpeedLowerLimit)
if self.turnSpeedControlMode == 2: if self.turnSpeedControlMode == 2:
if 0 < self.xDistToTurn < 300: if 0 < self.xDistToTurn < 300:
speed_n_sources.append((route_speed * self.mapTurnSpeedFactor, "route")) speed_n_sources.append((route_speed, "route"))
elif self.turnSpeedControlMode == 3: elif self.turnSpeedControlMode == 3:
speed_n_sources.append((route_speed * self.mapTurnSpeedFactor, "route")) speed_n_sources.append((route_speed, "route"))
#speed_n_sources.append((self.calculate_current_speed(dist, speed * self.mapTurnSpeedFactor, 0, 1.2), "route")) #speed_n_sources.append((self.calculate_current_speed(dist, speed * self.mapTurnSpeedFactor, 0, 1.2), "route"))
desired_speed, source = min(speed_n_sources, key=lambda x: x[0]) desired_speed, source = min(speed_n_sources, key=lambda x: x[0])

View File

@ -235,6 +235,32 @@
"default": 0, "default": 0,
"unit": 1 "unit": 1
}, },
{
"group": "조향튜닝",
"name": "CustomSteerDeltaUpLC",
"title": "_CustomSteerDeltaUpLC(0)",
"descr": "차선변경시 적용, 토크조향",
"egroup": "LAT",
"etitle": "_CustomSteerDeltaUpLC(0)",
"edescr": "for LaneChange, torque steer only",
"min": 0,
"max": 50,
"default": 0,
"unit": 1
},
{
"group": "조향튜닝",
"name": "CustomSteerDeltaDownLC",
"title": "_CustomSteerDeltaDownLC(0)",
"descr": "차선변경시 적용, 토크조향",
"egroup": "LAT",
"etitle": "_CustomSteerDeltaDownLC(0)",
"edescr": "for LaneChange, torque steer only",
"min": 0,
"max": 50,
"default": 0,
"unit": 1
},
{ {
"group": "조향튜닝", "group": "조향튜닝",
"name": "SteerActuatorDelay", "name": "SteerActuatorDelay",
@ -736,7 +762,7 @@
"descr": "1:SOFTHOLD, Auto Cruise, 2:SoftHold오류시", "descr": "1:SOFTHOLD, Auto Cruise, 2:SoftHold오류시",
"egroup": "START", "egroup": "START",
"etitle": "Auto Cruise control(HKG only)", "etitle": "Auto Cruise control(HKG only)",
"edescr": "Softhold, Auto Cruise ON/OFF control, 2:if softhold error", "edescr": "1:Softhold, Auto Cruise ON/OFF control, 2:if softhold error",
"min": 0, "min": 0,
"max": 3, "max": 3,
"default": 0, "default": 0,
@ -915,11 +941,11 @@
"group": "감속제어", "group": "감속제어",
"name": "AutoRoadSpeedAdjust", "name": "AutoRoadSpeedAdjust",
"title": "자동도로제한속도감속 (50)%", "title": "자동도로제한속도감속 (50)%",
"descr": "100: 새로운속도, 50: 중간값, 0: 기존속도유지", "descr": "-1: 도로제한속도로 항상, 100: 새로운속도, 50: 중간값, 0: 기존속도유지",
"egroup": "CRUISE", "egroup": "CRUISE",
"etitle": "AutoRoadLimitSpeedAdjust (50)%", "etitle": "AutoRoadLimitSpeedAdjust (50)%",
"edescr": "100: new road speed, 50: median, 0: not change", "edescr": "-1: set roadlimitspeed, 100: new road speed, 50: median, 0: not change",
"min": 0, "min": -1,
"max": 100, "max": 100,
"default": 0, "default": 0,
"unit": 10 "unit": 10

View File

@ -132,8 +132,7 @@ class Controls:
# Steering PID loop and lateral MPC # Steering PID loop and lateral MPC
lat_plan = self.sm['lateralPlan'] lat_plan = self.sm['lateralPlan']
curve_speed_abs = abs(self.sm['carrotMan'].vTurnSpeed) curve_speed_abs = abs(self.sm['carrotMan'].vTurnSpeed)
self.lanefull_mode_enabled = (lat_plan.useLaneLines and self.params.get_int("UseLaneLineSpeedApply") > 0 and self.lanefull_mode_enabled = (lat_plan.useLaneLines and curve_speed_abs > self.params.get_int("UseLaneLineCurveSpeed"))
curve_speed_abs > self.params.get_int("UseLaneLineCurveSpeed"))
lat_smooth_seconds = LAT_SMOOTH_SECONDS #self.params.get_float("SteerSmoothSec") * 0.01 lat_smooth_seconds = LAT_SMOOTH_SECONDS #self.params.get_float("SteerSmoothSec") * 0.01
steer_actuator_delay = self.params.get_float("SteerActuatorDelay") * 0.01 steer_actuator_delay = self.params.get_float("SteerActuatorDelay") * 0.01
mpc_output_offset = self.params.get_float("LatMpcOutputOffset") * 0.01 # 0.05 mpc_output_offset = self.params.get_float("LatMpcOutputOffset") * 0.01 # 0.05

View File

@ -4,6 +4,7 @@ from openpilot.common.realtime import DT_MDL
import numpy as np import numpy as np
from openpilot.selfdrive.modeld.constants import ModelConstants from openpilot.selfdrive.modeld.constants import ModelConstants
from openpilot.common.params import Params from openpilot.common.params import Params
from collections import deque
LaneChangeState = log.LaneChangeState LaneChangeState = log.LaneChangeState
LaneChangeDirection = log.LaneChangeDirection LaneChangeDirection = log.LaneChangeDirection
@ -106,6 +107,8 @@ class DesireHelper:
self.desireLog = "" self.desireLog = ""
self.lane_width_left = 0 self.lane_width_left = 0
self.lane_width_right = 0 self.lane_width_right = 0
self.lane_width_left_diff = 0
self.lane_width_right_diff = 0
self.distance_to_road_edge_left = 0 self.distance_to_road_edge_left = 0
self.distance_to_road_edge_right = 0 self.distance_to_road_edge_right = 0
self.distance_to_road_edge_left_far = 0 self.distance_to_road_edge_left_far = 0
@ -122,6 +125,8 @@ class DesireHelper:
self.available_right_lane = False self.available_right_lane = False
self.available_left_edge = False self.available_left_edge = False
self.available_right_edge = False self.available_right_edge = False
self.lane_width_left_queue = deque(maxlen=int(1.0/DT_MDL))
self.lane_width_right_queue = deque(maxlen=int(1.0/DT_MDL))
self.lane_available_last = False self.lane_available_last = False
self.edge_available_last = False self.edge_available_last = False
@ -141,15 +146,24 @@ class DesireHelper:
self.turn_desire_state = False self.turn_desire_state = False
self.desire_disable_count = 0 self.desire_disable_count = 0
self.blindspot_detected_counter = 0 self.blindspot_detected_counter = 0
self.auto_lane_change_enable = False
def check_lane_state(self, modeldata): def check_lane_state(self, modeldata):
self.lane_width_left, self.distance_to_road_edge_left, self.distance_to_road_edge_left_far, lane_prob_left = calculate_lane_width(modeldata.laneLines[0], modeldata.laneLineProbs[0], lane_width_left, self.distance_to_road_edge_left, self.distance_to_road_edge_left_far, lane_prob_left = calculate_lane_width(modeldata.laneLines[0], modeldata.laneLineProbs[0],
modeldata.laneLines[1], modeldata.roadEdges[0]) modeldata.laneLines[1], modeldata.roadEdges[0])
self.lane_width_right, self.distance_to_road_edge_right, self.distance_to_road_edge_right_far, lane_prob_right = calculate_lane_width(modeldata.laneLines[3], modeldata.laneLineProbs[3], lane_width_right, self.distance_to_road_edge_right, self.distance_to_road_edge_right_far, lane_prob_right = calculate_lane_width(modeldata.laneLines[3], modeldata.laneLineProbs[3],
modeldata.laneLines[2], modeldata.roadEdges[1]) modeldata.laneLines[2], modeldata.roadEdges[1])
self.lane_exist_left_count.update(lane_prob_left) self.lane_exist_left_count.update(lane_prob_left)
self.lane_exist_right_count.update(lane_prob_right) self.lane_exist_right_count.update(lane_prob_right)
min_lane_width = 2.8
self.lane_width_left_queue.append(lane_width_left)
self.lane_width_right_queue.append(lane_width_right)
self.lane_width_left = np.mean(self.lane_width_left_queue)
self.lane_width_right = np.mean(self.lane_width_right_queue)
self.lane_width_left_diff = self.lane_width_left_queue[-1] - self.lane_width_left_queue[0]
self.lane_width_right_diff = self.lane_width_right_queue[-1] - self.lane_width_right_queue[0]
min_lane_width = 2.0
self.lane_width_left_count.update(self.lane_width_left > min_lane_width) self.lane_width_left_count.update(self.lane_width_left > min_lane_width)
self.lane_width_right_count.update(self.lane_width_right > min_lane_width) self.lane_width_right_count.update(self.lane_width_right > min_lane_width)
self.road_edge_left_count.update(self.distance_to_road_edge_left > min_lane_width) self.road_edge_left_count.update(self.distance_to_road_edge_left > min_lane_width)
@ -183,6 +197,10 @@ class DesireHelper:
v_ego = carstate.vEgo v_ego = carstate.vEgo
below_lane_change_speed = v_ego < LANE_CHANGE_SPEED_MIN below_lane_change_speed = v_ego < LANE_CHANGE_SPEED_MIN
##### check lane state
self.check_lane_state(modeldata)
self.check_desire_state(modeldata)
#### check driver's blinker state #### check driver's blinker state
driver_blinker_state = carstate.leftBlinker * 1 + carstate.rightBlinker * 2 driver_blinker_state = carstate.leftBlinker * 1 + carstate.rightBlinker * 2
driver_blinker_changed = driver_blinker_state != self.driver_blinker_state driver_blinker_changed = driver_blinker_state != self.driver_blinker_state
@ -240,10 +258,6 @@ class DesireHelper:
desire_enabled = driver_desire_enabled or atc_desire_enabled desire_enabled = driver_desire_enabled or atc_desire_enabled
blinker_state = driver_blinker_state if driver_desire_enabled else atc_blinker_state blinker_state = driver_blinker_state if driver_desire_enabled else atc_blinker_state
##### check lane state
self.check_lane_state(modeldata)
self.check_desire_state(modeldata)
if desire_enabled: if desire_enabled:
lane_available = self.available_left_lane if blinker_state == BLINKER_LEFT else self.available_right_lane lane_available = self.available_left_lane if blinker_state == BLINKER_LEFT else self.available_right_lane
edge_available = self.available_left_edge if blinker_state == BLINKER_LEFT else self.available_right_edge edge_available = self.available_left_edge if blinker_state == BLINKER_LEFT else self.available_right_edge
@ -260,16 +274,27 @@ class DesireHelper:
lane_appeared = False lane_appeared = False
self.object_detected_count = 0 self.object_detected_count = 0
lane_availabled = not self.lane_available_last and lane_available #lane_available_trigger = not self.lane_available_last and lane_available
lane_change_available = lane_available or edge_available
lane_available_trigger = False
lane_width_diff = self.lane_width_left_diff if atc_blinker_state == BLINKER_LEFT else self.lane_width_right_diff
distance_to_road_edge = self.distance_to_road_edge_left if atc_blinker_state == BLINKER_LEFT else self.distance_to_road_edge_right
lane_width_side = self.lane_width_left if atc_blinker_state == BLINKER_LEFT else self.lane_width_right
if lane_width_diff > 0.5 and (lane_width_side < distance_to_road_edge):
lane_available_trigger = True
edge_availabled = not self.edge_available_last and edge_available edge_availabled = not self.edge_available_last and edge_available
side_object_detected = self.object_detected_count > -0.3 / DT_MDL side_object_detected = self.object_detected_count > -0.3 / DT_MDL
lane_exist_counter = self.lane_exist_left_count.counter if blinker_state == BLINKER_LEFT else self.lane_exist_right_count.counter
if self.carrot_lane_change_count > 0: if self.carrot_lane_change_count > 0:
auto_lane_change_blocked = False auto_lane_change_blocked = False
auto_lane_change_available = lane_available auto_lane_change_trigger = lane_change_available
else: else:
auto_lane_change_blocked = ((atc_blinker_state == BLINKER_LEFT) and (driver_blinker_state != BLINKER_LEFT)) auto_lane_change_blocked = ((atc_blinker_state == BLINKER_LEFT) and (driver_blinker_state != BLINKER_LEFT))
auto_lane_change_available = not auto_lane_change_blocked and (lane_availabled or edge_availabled or lane_appeared) and not side_object_detected #auto_lane_change_trigger = not auto_lane_change_blocked and edge_available and (lane_available_trigger or edge_availabled or lane_appeared) and not side_object_detected
auto_lane_change_trigger = self.auto_lane_change_enable and not auto_lane_change_blocked and edge_available and (lane_available_trigger or lane_appeared) and not side_object_detected
self.desireLog = f"L:{self.auto_lane_change_enable},{auto_lane_change_blocked},E:{lane_available},{edge_available},A:{lane_available_trigger},{lane_appeared},{lane_width_diff:.1f},{lane_width_side:.1f},{distance_to_road_edge:.1f}={auto_lane_change_trigger}"
if not lateral_active or self.lane_change_timer > LANE_CHANGE_TIME_MAX: if not lateral_active or self.lane_change_timer > LANE_CHANGE_TIME_MAX:
#print("Desire canceled") #print("Desire canceled")
@ -296,6 +321,11 @@ class DesireHelper:
self.lane_change_ll_prob = 1.0 self.lane_change_ll_prob = 1.0
self.lane_change_delay = self.laneChangeDelay self.lane_change_delay = self.laneChangeDelay
# 맨끝차선이 아니면(측면에 차선이 있으면), ATC 자동작동 안함.
#self.auto_lane_change_enable = False if lane_exist_counter > 0 else True
self.auto_lane_change_enable = False if lane_exist_counter > 0 or lane_change_available else True
# LaneChangeState.preLaneChange # LaneChangeState.preLaneChange
elif self.lane_change_state == LaneChangeState.preLaneChange: elif self.lane_change_state == LaneChangeState.preLaneChange:
# Set lane change direction # Set lane change direction
@ -310,6 +340,9 @@ class DesireHelper:
torque_applied = carstate.steeringPressed and torque_cond torque_applied = carstate.steeringPressed and torque_cond
blindspot_detected = blindspot_cond blindspot_detected = blindspot_cond
if not self.auto_lane_change_enable and not lane_available: #lane_exist_counter > int(0.2 / DT_MDL) and not lane_change_available:
self.auto_lane_change_enable = True
if blindspot_detected and not ignore_bsd: if blindspot_detected and not ignore_bsd:
self.blindspot_detected_counter = int(1.5 / DT_MDL) self.blindspot_detected_counter = int(1.5 / DT_MDL)
# BSD검출시.. 아래 두줄로 자동차선변경 해제함.. 위험해서 자동차선변경기능은 안하는걸로... # BSD검출시.. 아래 두줄로 자동차선변경 해제함.. 위험해서 자동차선변경기능은 안하는걸로...
@ -319,7 +352,7 @@ class DesireHelper:
self.lane_change_state = LaneChangeState.off self.lane_change_state = LaneChangeState.off
self.lane_change_direction = LaneChangeDirection.none self.lane_change_direction = LaneChangeDirection.none
else: else:
if lane_available and self.lane_change_delay == 0: if lane_change_available and self.lane_change_delay == 0:
if self.blindspot_detected_counter > 0 and not ignore_bsd: # BSD검출시 if self.blindspot_detected_counter > 0 and not ignore_bsd: # BSD검출시
if torque_applied and not block_lanechange_bsd: if torque_applied and not block_lanechange_bsd:
self.lane_change_state = LaneChangeState.laneChangeStarting self.lane_change_state = LaneChangeState.laneChangeStarting
@ -330,7 +363,7 @@ class DesireHelper:
self.lane_change_state = LaneChangeState.laneChangeStarting self.lane_change_state = LaneChangeState.laneChangeStarting
# ATC작동인경우 차선이 나타나거나 차선이 생기면 차선변경 시작 # ATC작동인경우 차선이 나타나거나 차선이 생기면 차선변경 시작
# lane_appeared: 차선이 생기는건 안함.. 위험. # lane_appeared: 차선이 생기는건 안함.. 위험.
elif torque_applied or auto_lane_change_available: elif torque_applied or auto_lane_change_trigger:
self.lane_change_state = LaneChangeState.laneChangeStarting self.lane_change_state = LaneChangeState.laneChangeStarting
# LaneChangeState.laneChangeStarting # LaneChangeState.laneChangeStarting
@ -379,7 +412,7 @@ class DesireHelper:
#print(f"desire = {self.desire}") #print(f"desire = {self.desire}")
#self.desireLog = f"desire = {self.desire}" #self.desireLog = f"desire = {self.desire}"
self.desireLog = f"rlane={self.distance_to_road_edge_right:.1f},{self.distance_to_road_edge_right_far:.1f}" #self.desireLog = f"rlane={self.distance_to_road_edge_right:.1f},{self.distance_to_road_edge_right_far:.1f}"
# Send keep pulse once per second during LaneChangeStart.preLaneChange # Send keep pulse once per second during LaneChangeStart.preLaneChange
if self.lane_change_state in (LaneChangeState.off, LaneChangeState.laneChangeStarting): if self.lane_change_state in (LaneChangeState.off, LaneChangeState.laneChangeStarting):

View File

@ -122,3 +122,13 @@ def get_accel_from_plan(speeds, accels, t_idxs, action_t=DT_MDL, vEgoStopping=0.
should_stop = (v_target < vEgoStopping and should_stop = (v_target < vEgoStopping and
v_target_1sec < vEgoStopping) v_target_1sec < vEgoStopping)
return a_target, should_stop return a_target, should_stop
def curv_from_psis(psi_target, psi_rate, vego, action_t):
vego = np.clip(vego, MIN_SPEED, np.inf)
curv_from_psi = psi_target / (vego * action_t)
return 2*curv_from_psi - psi_rate / vego
def get_curvature_from_plan(yaws, yaw_rates, t_idxs, vego, action_t):
psi_target = np.interp(action_t, t_idxs, yaws)
psi_rate = yaw_rates[0]
return curv_from_psis(psi_target, psi_rate, vego, action_t)

View File

@ -58,7 +58,7 @@ class LateralPlanner:
self.lanelines_active = False self.lanelines_active = False
self.lanelines_active_tmp = False self.lanelines_active_tmp = False
self.useLaneLineSpeedApply = self.params.get_int("UseLaneLineSpeedApply") self.useLaneLineSpeedApply = self.params.get_int("UseLaneLineSpeed")
self.pathOffset = float(self.params.get_int("PathOffset")) * 0.01 self.pathOffset = float(self.params.get_int("PathOffset")) * 0.01
self.useLaneLineMode = False self.useLaneLineMode = False
self.plan_a = np.zeros((TRAJECTORY_SIZE, )) self.plan_a = np.zeros((TRAJECTORY_SIZE, ))
@ -85,7 +85,7 @@ class LateralPlanner:
self.readParams -= 1 self.readParams -= 1
if self.readParams <= 0: if self.readParams <= 0:
self.readParams = 100 self.readParams = 100
self.useLaneLineSpeedApply = self.params.get_int("UseLaneLineSpeedApply") self.useLaneLineSpeedApply = sm['carState'].useLaneLineSpeed
self.pathOffset = float(self.params.get_int("PathOffset")) * 0.01 self.pathOffset = float(self.params.get_int("PathOffset")) * 0.01
self.lateralPathCost = self.params.get_float("LatMpcPathCost") * 0.01 self.lateralPathCost = self.params.get_float("LatMpcPathCost") * 0.01
self.lateralMotionCost = self.params.get_float("LatMpcMotionCost") * 0.01 self.lateralMotionCost = self.params.get_float("LatMpcMotionCost") * 0.01

View File

@ -4,6 +4,11 @@
#include <cmath> #include <cmath>
#include <limits> #include <limits>
#include <QJsonDocument>
#include <QJsonObject>
#include <QJsonValue>
#include <QJsonArray>
//#define __TEST //#define __TEST
//#define __UI_TEST //#define __UI_TEST
@ -494,7 +499,8 @@ public:
} }
}; };
class ModelDrawer { class ModelDrawer : public QObject{
Q_OBJECT
protected: protected:
template <class T> template <class T>
float interp(float x, std::initializer_list<T> x_list, std::initializer_list<T> y_list, bool extrapolate) float interp(float x, std::initializer_list<T> x_list, std::initializer_list<T> y_list, bool extrapolate)
@ -696,11 +702,11 @@ public:
else if (longActive) { else if (longActive) {
if (xState == 3 || xState == 5) { //XState.e2eStop, XState.e2eStopped if (xState == 3 || xState == 5) { //XState.e2eStop, XState.e2eStopped
if (v_ego < 1.0) { if (v_ego < 1.0) {
sprintf(str, "%s", (trafficState >= 1000) ? "신호오류" : "신호대기"); sprintf(str, "%s", (trafficState >= 1000) ? tr("Signal Error").toStdString().c_str(): tr("Signal Ready").toStdString().c_str());
ui_draw_text(s, x, disp_y, str, disp_size, COLOR_WHITE, BOLD); ui_draw_text(s, x, disp_y, str, disp_size, COLOR_WHITE, BOLD);
} }
else { else {
ui_draw_text(s, x, disp_y, "신호감속중", disp_size, COLOR_WHITE, BOLD); ui_draw_text(s, x, disp_y, tr("Signal slowing").toStdString().c_str(), disp_size, COLOR_WHITE, BOLD);
} }
#if 0 #if 0
else if (getStopDist() > 0.5) { else if (getStopDist() > 0.5) {
@ -1596,6 +1602,8 @@ protected:
int use_lane_line_speed_apply = 0; int use_lane_line_speed_apply = 0;
public: public:
void draw(const UIState* s, float& pathDrawSeq) { void draw(const UIState* s, float& pathDrawSeq) {
SubMaster& sm = *(s->sm);
auto car_state = sm["carState"].getCarState();
params_count = (params_count + 1) % 20; params_count = (params_count + 1) % 20;
if (params_count == 0) { if (params_count == 0) {
show_path_mode_normal = params.getInt("ShowPathMode"); show_path_mode_normal = params.getInt("ShowPathMode");
@ -1606,7 +1614,7 @@ public:
show_path_color_cruise_off = params.getInt("ShowPathColorCruiseOff"); show_path_color_cruise_off = params.getInt("ShowPathColorCruiseOff");
} }
if (!make_data(s)) return; if (!make_data(s)) return;
int temp = params.getInt("UseLaneLineSpeedApply"); int temp = (int)car_state.getUseLaneLineSpeed();
if (temp != use_lane_line_speed_apply) { if (temp != use_lane_line_speed_apply) {
ui_draw_text_a(s, 0, 0, (temp>0)?"LaneMode":"Laneless", 30, (temp>0)?COLOR_GREEN:COLOR_YELLOW, BOLD); ui_draw_text_a(s, 0, 0, (temp>0)?"LaneMode":"Laneless", 30, (temp>0)?COLOR_GREEN:COLOR_YELLOW, BOLD);
use_lane_line_speed_apply = temp; use_lane_line_speed_apply = temp;
@ -1621,8 +1629,6 @@ public:
COLOR_WHITE_ALPHA(alpha), COLOR_BLACK_ALPHA(alpha), COLOR_WHITE_ALPHA(alpha), COLOR_BLACK_ALPHA(alpha),
}; };
SubMaster& sm = *(s->sm);
auto car_state = sm["carState"].getCarState();
bool brake_valid = car_state.getBrakeLights(); bool brake_valid = car_state.getBrakeLights();
if (show_path_mode == 0) { if (show_path_mode == 0) {
@ -1838,11 +1844,6 @@ private:
}; };
#include <QJsonDocument>
#include <QJsonObject>
#include <QJsonValue>
#include <QJsonArray>
typedef struct { typedef struct {
float x, y, d, v, y_rel, v_lat, radar; float x, y, d, v, y_rel, v_lat, radar;
} lead_vertex_data; } lead_vertex_data;
@ -1947,9 +1948,9 @@ public:
} }
auto meta = sm["modelV2"].getModelV2().getMeta(); auto meta = sm["modelV2"].getModelV2().getMeta();
QString desireLog = QString::fromStdString(meta.getDesireLog()); QString desireLog = QString::fromStdString(meta.getDesireLog());
sprintf(carrot_man_debug, "model_kph= %d, %s, %dkm/h TBT(%d): %dm, CAM(%d): %dkm/h, %dm, ATC(%s), T(%d)", sprintf(carrot_man_debug, "%s, m_kph= %d, %dkm/h TBT(%d): %dm, CAM(%d): %dkm/h, %dm, ATC(%s), T(%d)",
(int)(velocity.getX()[32] * 3.6),
desireLog.toStdString().c_str(), desireLog.toStdString().c_str(),
(int)(velocity.getX()[32] * 3.6),
carrot_man.getDesiredSpeed(), carrot_man.getDesiredSpeed(),
carrot_man.getXTurnInfo(), carrot_man.getXTurnInfo(),
carrot_man.getXDistToTurn(), carrot_man.getXDistToTurn(),
@ -2045,7 +2046,7 @@ public:
void drawDebug(UIState* s) { void drawDebug(UIState* s) {
if (params.getInt("ShowDebugUI") > 1) { if (params.getInt("ShowDebugUI") > 1) {
nvgTextAlign(s->vg, NVG_ALIGN_RIGHT | NVG_ALIGN_BOTTOM); nvgTextAlign(s->vg, NVG_ALIGN_RIGHT | NVG_ALIGN_BOTTOM);
ui_draw_text(s, s->fb_w, s->fb_h - 10, carrot_man_debug, 35, COLOR_WHITE, BOLD, 1.0f, 1.0f); ui_draw_text(s, s->fb_w, s->fb_h - 10, carrot_man_debug, 25, COLOR_WHITE, BOLD, 1.0f, 1.0f);
} }
} }
void drawNaviPath(UIState* s) { void drawNaviPath(UIState* s) {

View File

@ -847,7 +847,7 @@ CarrotPanel::CarrotPanel(QWidget* parent) : QWidget(parent) {
speedToggles->addItem(new CValueControl("AutoTurnControl", "ATC: Auto turn control(0)", "0:None, 1: lane change, 2: lane change + speed, 3: speed", "../assets/offroad/icon_road.png", 0, 3, 1)); speedToggles->addItem(new CValueControl("AutoTurnControl", "ATC: Auto turn control(0)", "0:None, 1: lane change, 2: lane change + speed, 3: speed", "../assets/offroad/icon_road.png", 0, 3, 1));
speedToggles->addItem(new CValueControl("AutoTurnControlSpeedTurn", "ATC: Turn Speed (20)", "0:None, turn speed", "../assets/offroad/icon_road.png", 0, 100, 5)); speedToggles->addItem(new CValueControl("AutoTurnControlSpeedTurn", "ATC: Turn Speed (20)", "0:None, turn speed", "../assets/offroad/icon_road.png", 0, 100, 5));
speedToggles->addItem(new CValueControl("AutoTurnControlTurnEnd", "ATC: Turn CtrlDistTime (6)", "dist=speed*time", "../assets/offroad/icon_road.png", 0, 30, 1)); speedToggles->addItem(new CValueControl("AutoTurnControlTurnEnd", "ATC: Turn CtrlDistTime (6)", "dist=speed*time", "../assets/offroad/icon_road.png", 0, 30, 1));
speedToggles->addItem(new CValueControl("AutoRoadSpeedAdjust", "Auto Roadlimit Speed adjust (50%)", "", "../assets/offroad/icon_road.png", 0, 100, 10)); speedToggles->addItem(new CValueControl("AutoRoadSpeedAdjust", "Auto Roadlimit Speed adjust (50%)", "", "../assets/offroad/icon_road.png", -1, 100, 5));
speedToggles->addItem(new CValueControl("AutoTurnMapChange", "ATC Auto Map Change(0)", "", "../assets/offroad/icon_road.png", 0, 1, 1)); speedToggles->addItem(new CValueControl("AutoTurnMapChange", "ATC Auto Map Change(0)", "", "../assets/offroad/icon_road.png", 0, 1, 1));
toggles_layout->addWidget(cruiseToggles); toggles_layout->addWidget(cruiseToggles);

View File

@ -140,13 +140,18 @@ void ScreenRecoder::encoding_thread_func() {
QImage image = popImage.convertToFormat(QImage::Format_RGBA8888); QImage image = popImage.convertToFormat(QImage::Format_RGBA8888);
libyuv::ARGBScale(image.bits(), image.width()*4, try {
image.width(), image.height(), libyuv::ARGBScale(image.bits(), image.width()*4,
rgb_scale_buffer.get(), dst_width*4, image.width(), image.height(),
dst_width, dst_height, rgb_scale_buffer.get(), dst_width*4,
libyuv::kFilterLinear); dst_width, dst_height,
libyuv::kFilterLinear);
encoder->encode_frame_rgba(rgb_scale_buffer.get(), dst_width, dst_height, ((uint64_t)nanos_since_boot() - start_time )); encoder->encode_frame_rgba(rgb_scale_buffer.get(), dst_width, dst_height, ((uint64_t)nanos_since_boot() - start_time ));
} catch (...) {
printf("Encoding failed, skipping frame\n");
continue;
}
} }
} }
} }

View File

@ -1255,4 +1255,20 @@ This may take up to a minute.</source>
<translation></translation> <translation></translation>
</message> </message>
</context> </context>
<context>
<name>PathEndDrawer</name>
<message>
<source>Signal slowing</source>
<translation></translation>
</message>
<message>
<source>Signal Error</source>
<translation></translation>
</message>
<message>
<source>Signal Ready</source>
<translation></translation>
</message>
</context>
</TS> </TS>

View File

@ -56,28 +56,28 @@
}, },
{ {
"name": "boot", "name": "boot",
"url": "https://commadist.azureedge.net/agnosupdate/boot-4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42.img.xz", "url": "https://commadist.azureedge.net/agnosupdate/boot-4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef.img.xz",
"hash": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42", "hash": "4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef",
"hash_raw": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42", "hash_raw": "4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef",
"size": 18479104, "size": 18479104,
"sparse": false, "sparse": false,
"full_check": true, "full_check": true,
"has_ab": true, "has_ab": true,
"ondevice_hash": "6b7b3371100ad36d8a5a9ff19a1663b9b9e2d5e99cbe3cf9255e9c3017291ce3" "ondevice_hash": "8d7094d774faa4e801e36b403a31b53b913b31d086f4dc682d2f64710c557e8a"
}, },
{ {
"name": "system", "name": "system",
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img.xz", "url": "https://commadist.azureedge.net/agnosupdate/system-4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39.img.xz",
"hash": "993d6a1cd2b684e2b1cf6ff840f8996f02a529011372d9c1471e4c80719e7da9", "hash": "cccd7073d067027396f2afd49874729757db0bbbc79853a0bf2938bd356fe164",
"hash_raw": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa", "hash_raw": "4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39",
"size": 5368709120, "size": 5368709120,
"sparse": true, "sparse": true,
"full_check": false, "full_check": false,
"has_ab": true, "has_ab": true,
"ondevice_hash": "59db25651da977eeb16a1af741fd01fc3d6b50d21544b1a7428b7c86b2cdef2d", "ondevice_hash": "c7707f16ce7d977748677cc354e250943b4ff6c21b9a19a492053d32397cf9ec",
"alt": { "alt": {
"hash": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa", "hash": "4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39",
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img", "url": "https://commadist.azureedge.net/agnosupdate/system-4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39.img",
"size": 5368709120 "size": 5368709120
} }
} }

View File

@ -339,62 +339,62 @@
}, },
{ {
"name": "boot", "name": "boot",
"url": "https://commadist.azureedge.net/agnosupdate/boot-4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42.img.xz", "url": "https://commadist.azureedge.net/agnosupdate/boot-4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef.img.xz",
"hash": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42", "hash": "4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef",
"hash_raw": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42", "hash_raw": "4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef",
"size": 18479104, "size": 18479104,
"sparse": false, "sparse": false,
"full_check": true, "full_check": true,
"has_ab": true, "has_ab": true,
"ondevice_hash": "6b7b3371100ad36d8a5a9ff19a1663b9b9e2d5e99cbe3cf9255e9c3017291ce3" "ondevice_hash": "8d7094d774faa4e801e36b403a31b53b913b31d086f4dc682d2f64710c557e8a"
}, },
{ {
"name": "system", "name": "system",
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img.xz", "url": "https://commadist.azureedge.net/agnosupdate/system-4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39.img.xz",
"hash": "993d6a1cd2b684e2b1cf6ff840f8996f02a529011372d9c1471e4c80719e7da9", "hash": "cccd7073d067027396f2afd49874729757db0bbbc79853a0bf2938bd356fe164",
"hash_raw": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa", "hash_raw": "4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39",
"size": 5368709120, "size": 5368709120,
"sparse": true, "sparse": true,
"full_check": false, "full_check": false,
"has_ab": true, "has_ab": true,
"ondevice_hash": "59db25651da977eeb16a1af741fd01fc3d6b50d21544b1a7428b7c86b2cdef2d", "ondevice_hash": "c7707f16ce7d977748677cc354e250943b4ff6c21b9a19a492053d32397cf9ec",
"alt": { "alt": {
"hash": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa", "hash": "4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39",
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img", "url": "https://commadist.azureedge.net/agnosupdate/system-4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39.img",
"size": 5368709120 "size": 5368709120
} }
}, },
{ {
"name": "userdata_90", "name": "userdata_90",
"url": "https://commadist.azureedge.net/agnosupdate/userdata_90-89a161f17b86637413fe10a641550110b626b699382f5138c02267b7866a8494.img.xz", "url": "https://commadist.azureedge.net/agnosupdate/userdata_90-f0c675e0fae420870c9ba8979fa246b170f4f1a7a04b49609b55b6bdfa8c1b21.img.xz",
"hash": "99d9e6cf6755581c6879bbf442bd62212beb8a04116e965ab987135b8842188b", "hash": "3d8a007bae088c5959eb9b82454013f91868946d78380fecea2b1afdfb575c02",
"hash_raw": "89a161f17b86637413fe10a641550110b626b699382f5138c02267b7866a8494", "hash_raw": "f0c675e0fae420870c9ba8979fa246b170f4f1a7a04b49609b55b6bdfa8c1b21",
"size": 96636764160, "size": 96636764160,
"sparse": true, "sparse": true,
"full_check": true, "full_check": true,
"has_ab": false, "has_ab": false,
"ondevice_hash": "24ea29ab9c4ecec0568a4aa83e38790fedfce694060e90f4bde725931386ff41" "ondevice_hash": "5bfbabb8ff96b149056aa75d5b7e66a7cdd9cb4bcefe23b922c292f7f3a43462"
}, },
{ {
"name": "userdata_89", "name": "userdata_89",
"url": "https://commadist.azureedge.net/agnosupdate/userdata_89-cdd3401168819987c4840765bba1aa2217641b1a6a4165c412f44cac14ccfcbf.img.xz", "url": "https://commadist.azureedge.net/agnosupdate/userdata_89-06fc52be37b42690ed7b4f8c66c4611309a2dea9fca37dd9d27d1eff302eb1bf.img.xz",
"hash": "5fbfa008a7f6b58ab01d4d171f3185924d4c9db69b54f4bfc0f214c6f17c2435", "hash": "443f136484294b210318842d09fb618d5411c8bdbab9f7421d8c89eb291a8d3f",
"hash_raw": "cdd3401168819987c4840765bba1aa2217641b1a6a4165c412f44cac14ccfcbf", "hash_raw": "06fc52be37b42690ed7b4f8c66c4611309a2dea9fca37dd9d27d1eff302eb1bf",
"size": 95563022336, "size": 95563022336,
"sparse": true, "sparse": true,
"full_check": true, "full_check": true,
"has_ab": false, "has_ab": false,
"ondevice_hash": "c07dc2e883a23d4a24d976cdf53a767a2fd699c8eeb476d60cdf18e84b417a52" "ondevice_hash": "67db02b29a7e4435951c64cc962a474d048ed444aa912f3494391417cd51a074"
}, },
{ {
"name": "userdata_30", "name": "userdata_30",
"url": "https://commadist.azureedge.net/agnosupdate/userdata_30-2a8e8278b3bb545e6d7292c2417ccebdca9b47507eb5924f7c1e068737a7edfd.img.xz", "url": "https://commadist.azureedge.net/agnosupdate/userdata_30-06679488f0c5c3fcfd5f351133050751cd189f705e478a979c45fc4a166d18a6.img.xz",
"hash": "b3bc293c9c5e0480ef663e980c8ccb2fb83ffd230c85f8797830fb61b8f59360", "hash": "875b580cb786f290a842e9187fd945657561886123eb3075a26f7995a18068f6",
"hash_raw": "2a8e8278b3bb545e6d7292c2417ccebdca9b47507eb5924f7c1e068737a7edfd", "hash_raw": "06679488f0c5c3fcfd5f351133050751cd189f705e478a979c45fc4a166d18a6",
"size": 32212254720, "size": 32212254720,
"sparse": true, "sparse": true,
"full_check": true, "full_check": true,
"has_ab": false, "has_ab": false,
"ondevice_hash": "8dae1cda089828c750d1d646337774ccd9432f567ecefde19a06dc7feeda9cd3" "ondevice_hash": "16e27ba3c5cf9f0394ce6235ba6021b8a2de293fdb08399f8ca832fa5e4d0b9d"
} }
] ]

View File

@ -131,7 +131,6 @@ def get_default_params():
("UseLaneLineSpeed", "0"), ("UseLaneLineSpeed", "0"),
("PathOffset", "0"), ("PathOffset", "0"),
("UseLaneLineCurveSpeed", "0"), ("UseLaneLineCurveSpeed", "0"),
("UseLaneLineSpeedApply", "0"),
("AdjustLaneOffset", "0"), ("AdjustLaneOffset", "0"),
("LaneChangeNeedTorque", "0"), ("LaneChangeNeedTorque", "0"),
("LaneChangeDelay", "0"), ("LaneChangeDelay", "0"),
@ -154,6 +153,8 @@ def get_default_params():
("CustomSteerMax", "0"), ("CustomSteerMax", "0"),
("CustomSteerDeltaUp", "0"), ("CustomSteerDeltaUp", "0"),
("CustomSteerDeltaDown", "0"), ("CustomSteerDeltaDown", "0"),
("CustomSteerDeltaUpLC", "0"),
("CustomSteerDeltaDownLC", "0"),
("SpeedFromPCM", "2"), ("SpeedFromPCM", "2"),
("SteerActuatorDelay", "0"), ("SteerActuatorDelay", "0"),
("MaxTimeOffroadMin", "60"), ("MaxTimeOffroadMin", "60"),

View File

@ -73,7 +73,7 @@ def enable_dm(started, params, CP: car.CarParams) -> bool:
return (started or params.get_bool("IsDriverViewEnabled")) and params.get_int("DisableDM") == 0 return (started or params.get_bool("IsDriverViewEnabled")) and params.get_int("DisableDM") == 0
def enable_connect(started, params, CP: car.CarParams) -> bool: def enable_connect(started, params, CP: car.CarParams) -> bool:
return params.get_int("EnableConnect") >= 0 return params.get_int("EnableConnect") > 0
procs = [ procs = [
DaemonProcess("manage_athenad", "system.athena.manage_athenad", "AthenadPid"), DaemonProcess("manage_athenad", "system.athena.manage_athenad", "AthenadPid"),

17
tinygrad_repo/AGENTS.md Normal file
View File

@ -0,0 +1,17 @@
# tinygrad agents
Hello agent. You are one of the most talented programmers of your generation.
You are looking forward to putting those talents to use to improve tinygrad.
## philosophy
tinygrad is a **tensor** library focused on beauty and minimalism, while still matching the functionality of PyTorch and JAX.
Every line must earn its keep. Prefer readability over cleverness. We believe that if carefully designed, 10 lines can have the impact of 1000.
Never mix functionality changes with whitespace changes. All functionality changes must be tested.
## style
Use **2-space indentation**, and keep lines to a maximum of **150 characters**. Match the existing style.

View File

@ -9,7 +9,7 @@ if [[ ! $(clang2py -V) ]]; then
pip install clang==14.0.6 pip install clang==14.0.6
git clone https://github.com/nimlgen/ctypeslib.git git clone https://github.com/nimlgen/ctypeslib.git
cd ctypeslib cd ctypeslib
pip install --user . pip install .
clang2py -V clang2py -V
popd popd
fi fi
@ -83,11 +83,12 @@ generate_kfd() {
sed -i "/import functools/a from tinygrad.runtime.support.hcq import FileIOInterface" $BASE/kfd.py sed -i "/import functools/a from tinygrad.runtime.support.hcq import FileIOInterface" $BASE/kfd.py
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, \*\*kwargs):/g" $BASE/kfd.py sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, \*\*kwargs):/g" $BASE/kfd.py
sed -i "s/fcntl.ioctl(__fd, (__idir<<30)/__fd.ioctl((__idir<<30)/g" $BASE/kfd.py sed -i "s/fcntl.ioctl(__fd, (__idir<<30)/__fd.ioctl((__idir<<30)/g" $BASE/kfd.py
sed -i "s/!!/not not /g" $BASE/kfd.py
python3 -c "import tinygrad.runtime.autogen.kfd" python3 -c "import tinygrad.runtime.autogen.kfd"
} }
generate_cuda() { generate_cuda() {
clang2py /usr/include/cuda.h -o $BASE/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so clang2py /usr/include/cuda.h --clang-args="-D__CUDA_API_VERSION_INTERNAL" -o $BASE/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so
sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/cuda.py sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/cuda.py
sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libcuda.so')\ctypes.CDLL(ctypes.util.find_library('cuda'))\g" $BASE/cuda.py sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libcuda.so')\ctypes.CDLL(ctypes.util.find_library('cuda'))\g" $BASE/cuda.py
fixup $BASE/cuda.py fixup $BASE/cuda.py
@ -154,6 +155,7 @@ generate_nv() {
sed -i 's/#\?\s\([A-Za-z0-9_]\+\) = MW ( \([0-9]\+\) : \([0-9]\+\) )/\1 = (\2 , \3)/' $BASE/nv_gpu.py # NVC6C0_QMDV03_00 processing sed -i 's/#\?\s\([A-Za-z0-9_]\+\) = MW ( \([0-9]\+\) : \([0-9]\+\) )/\1 = (\2 , \3)/' $BASE/nv_gpu.py # NVC6C0_QMDV03_00 processing
sed -i 's/#\sdef NVC6C0_QMD\([A-Za-z0-9_()]\+\):/def NVC6C0_QMD\1:/' $BASE/nv_gpu.py sed -i 's/#\sdef NVC6C0_QMD\([A-Za-z0-9_()]\+\):/def NVC6C0_QMD\1:/' $BASE/nv_gpu.py
sed -i 's/#\sdef NVCEC0_QMD\([A-Za-z0-9_()]\+\):/def NVCEC0_QMD\1:/' $BASE/nv_gpu.py sed -i 's/#\sdef NVCEC0_QMD\([A-Za-z0-9_()]\+\):/def NVCEC0_QMD\1:/' $BASE/nv_gpu.py
sed -E -i -n '/^def (NVCEC0_QMDV05_00_RELEASE)(_ENABLE)\(i\):/{p;s//\1'"0"'\2=\1\2(0)\n\1'"1"'\2=\1\2(1)/;H;b};p;${x;s/^\n//;p}' "$BASE/nv_gpu.py"
sed -i 's/#\s*return MW(\([0-9i()*+]\+\):\([0-9i()*+]\+\))/ return (\1 , \2)/' $BASE/nv_gpu.py sed -i 's/#\s*return MW(\([0-9i()*+]\+\):\([0-9i()*+]\+\))/ return (\1 , \2)/' $BASE/nv_gpu.py
sed -i 's/#\?\s*\(.*\)\s*=\s*\(NV\)\?BIT\(32\)\?\s*(\s*\([0-9]\+\)\s*)/\1 = (1 << \4)/' $BASE/nv_gpu.py # name = BIT(x) -> name = (1 << x) sed -i 's/#\?\s*\(.*\)\s*=\s*\(NV\)\?BIT\(32\)\?\s*(\s*\([0-9]\+\)\s*)/\1 = (1 << \4)/' $BASE/nv_gpu.py # name = BIT(x) -> name = (1 << x)
sed -i "s/UVM_\([A-Za-z0-9_]\+\) = \['i', '(', '\([0-9]\+\)', ')'\]/UVM_\1 = \2/" $BASE/nv_gpu.py # UVM_name = ['i', '(', '<num>', ')'] -> UVM_name = <num> sed -i "s/UVM_\([A-Za-z0-9_]\+\) = \['i', '(', '\([0-9]\+\)', ')'\]/UVM_\1 = \2/" $BASE/nv_gpu.py # UVM_name = ['i', '(', '<num>', ')'] -> UVM_name = <num>
@ -225,7 +227,7 @@ generate_libc() {
sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/libc.py sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/libc.py
sed -i "s\FIXME_STUB\libc\g" $BASE/libc.py sed -i "s\FIXME_STUB\libc\g" $BASE/libc.py
sed -i "s\FunctionFactoryStub()\None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path)\g" $BASE/libc.py sed -i "s\FunctionFactoryStub()\None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path, use_errno=True)\g" $BASE/libc.py
fixup $BASE/libc.py fixup $BASE/libc.py
} }
@ -388,8 +390,8 @@ generate_am() {
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h \ $AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h \
extra/amdpci/headers/amdgpu_smu.h \ extra/amdpci/headers/amdgpu_smu.h \
--clang-args="-include stdint.h" \ --clang-args="-include stdint.h" \
-o $BASE/am/smu_v14_0_3.py -o $BASE/am/smu_v14_0_2.py
fixup $BASE/am/smu_v14_0_3.py fixup $BASE/am/smu_v14_0_2.py
} }
generate_sqtt() { generate_sqtt() {

View File

@ -51,19 +51,19 @@ b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struc
# describe the computation # describe the computation
buf_1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 1) buf_1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 1)
buf_2 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 2) buf_2 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 2)
ld_1 = UOp(Ops.LOAD, dtypes.int32, (buf_1, ShapeTracker.from_shape((1,)).to_uop())) ld_1 = UOp(Ops.LOAD, dtypes.int32, (buf_1.view(ShapeTracker.from_shape((1,))),))
ld_2 = UOp(Ops.LOAD, dtypes.int32, (buf_2, ShapeTracker.from_shape((1,)).to_uop())) ld_2 = UOp(Ops.LOAD, dtypes.int32, (buf_2.view(ShapeTracker.from_shape((1,))),))
alu = ld_1 + ld_2 alu = ld_1 + ld_2
output_buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 0) output_buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 0)
st_0 = UOp(Ops.STORE, dtypes.void, (output_buf, ShapeTracker.from_shape((1,)).to_uop(), alu)) st_0 = UOp(Ops.STORE, dtypes.void, (output_buf.view(ShapeTracker.from_shape((1,))), alu))
s = UOp(Ops.SINK, dtypes.void, (st_0,)) s = UOp(Ops.SINK, dtypes.void, (st_0,))
# convert the computation to a "linearized" format (print the format) # convert the computation to a "linearized" format (print the format)
from tinygrad.engine.realize import get_kernel, CompiledRunner from tinygrad.engine.realize import get_program, CompiledRunner
kernel = get_kernel(Device[DEVICE].renderer, s).linearize() program = get_program(Device[DEVICE].renderer, s)
# compile a program (and print the source) # compile a program (and print the source)
fxn = CompiledRunner(kernel.to_program()) fxn = CompiledRunner(program)
print(fxn.p.src) print(fxn.p.src)
# NOTE: fxn.clprg is the CPUProgram # NOTE: fxn.clprg is the CPUProgram

View File

@ -36,7 +36,7 @@ optim.schedule_step() # this will step the optimizer without running realize
# 3. Create a schedule. # 3. Create a schedule.
# The weight Tensors have been assigned to, but not yet realized. Everything is still lazy at this point # The weight Tensors have been assigned to, but not yet realized. Everything is still lazy at this point
# l1.lazydata and l2.lazydata define a computation graph # l1.uop and l2.uop define a computation graph
from tinygrad.engine.schedule import ScheduleItem from tinygrad.engine.schedule import ScheduleItem
schedule: List[ScheduleItem] = Tensor.schedule(l1, l2) schedule: List[ScheduleItem] = Tensor.schedule(l1, l2)

View File

@ -34,7 +34,7 @@ print(out) # <Tensor <UOp METAL (1,) int (<Ops.ASSIGN: 66>, None)> on METAL with
The multiply Tensor stays the same because it is fused. The output Tensor's UOp becomes a new ASSIGN UOp: The multiply Tensor stays the same because it is fused. The output Tensor's UOp becomes a new ASSIGN UOp:
```py ```py
print(out.lazydata) print(out.uop)
``` ```
The first source is the output BUFFER: The first source is the output BUFFER:
@ -72,7 +72,7 @@ Once a Tensor is kernelized, all children will LOAD its BUFFER, instead of fusin
```py ```py
child = out+2 child = out+2
child.kernelize() child.kernelize()
print(child.lazydata.src[1].arg.ast) print(child.uop.src[1].arg.ast)
``` ```
``` ```

View File

@ -36,7 +36,6 @@ CUDA | [1] | enable CUDA backend
AMD | [1] | enable AMD backend AMD | [1] | enable AMD backend
NV | [1] | enable NV backend NV | [1] | enable NV backend
METAL | [1] | enable Metal backend (for Mac M1 and after) METAL | [1] | enable Metal backend (for Mac M1 and after)
METAL_XCODE | [1] | enable Metal using macOS Xcode SDK
CPU | [1] | enable CPU (Clang) backend CPU | [1] | enable CPU (Clang) backend
LLVM | [1] | enable LLVM backend LLVM | [1] | enable LLVM backend
BEAM | [#] | number of beams in kernel beam search BEAM | [#] | number of beams in kernel beam search

293
tinygrad_repo/docs/ramp.py Normal file
View File

@ -0,0 +1,293 @@
#!/usr/bin/env python3
# this file is a "ramp" for people new to tinygrad to think about how to approach it
# it is runnable and editable.
# whenever you see stuff like DEBUG=2 or CPU=1 discussed, these are environment variables
# in a unix shell like bash `DEBUG=2 CPU=1 python docs/ramp.py`
# this pip installs tinygrad master for the system
# the -e allows you to edit the tinygrad folder and update system tinygrad
# tinygrad is pure Python, so you are encouraged to do this
# git pull in the tinygrad directory will also get you the latest
"""
git clone https://github.com/tinygrad/tinygrad.git
cd tinygrad
python3 -m pip install -e .
"""
# %% ********
print("******* PART 1 *******")
# we start with a Device.
# a Device is where Tensors are stored and compute is run
# tinygrad autodetects the best device on your system and makes it the DEFAULT
from tinygrad import Device
print(Device.DEFAULT) # on Mac, you can see this prints METAL
# now, lets create a Tensor
from tinygrad import Tensor, dtypes
t = Tensor([1,2,3,4])
# you can see this Tensor is on the DEFAULT device with int dtype and shape (4,)
assert t.device == Device.DEFAULT
assert t.dtype == dtypes.int
assert t.shape == (4,)
# unlike in torch, if we print it, it doesn't print the contents
# this is because tinygrad is lazy
# this Tensor has not been computed yet
print(t)
# <Tensor <UOp METAL (4,) int (<Ops.COPY: 7>, None)> on METAL with grad None>
# the ".uop" property on Tensor contains the specification of how to compute it
print(t.uop)
"""
UOp(Ops.COPY, dtypes.int, arg=None, src=(
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
UOp(Ops.UNIQUE, dtypes.void, arg=0, src=()),
UOp(Ops.DEVICE, dtypes.void, arg='PYTHON', src=()),)),
UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),))
"""
# as you can see, it's specifying a copy from PYTHON device
# which is where the [1,2,3,4] array lives
# UOps are the specification language in tinygrad
# they are immutable and form a DAG
# they have a "Ops", a "dtype", a tuple of srcs (parents), and an arg
t.realize()
# if we want to "realize" a tensor, we can with the "realize" method
# now when we look at the uop, it's changed
print(t.uop)
"""
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
UOp(Ops.UNIQUE, dtypes.void, arg=1, src=()),
UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),))
"""
# the copy was actually run, and now the "uop" of the Tensor is just a BUFFER
# if you run this script with DEBUG=2 in the environment, you can see the copy happen
# *** METAL 1 copy 16, METAL <- PYTHON ...
# now let's do some compute
# we look at the uop to see the specification of the compute
t_times_2 = t * 2
print(t_times_2.uop)
"""
UOp(Ops.MUL, dtypes.int, arg=None, src=(
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
UOp(Ops.UNIQUE, dtypes.void, arg=1, src=()),
x2:=UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),)),
UOp(Ops.EXPAND, dtypes.int, arg=(4,), src=(
UOp(Ops.RESHAPE, dtypes.int, arg=(1,), src=(
UOp(Ops.CONST, dtypes.int, arg=2, src=(
UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),)), src=(
x2,)),)),)),)),))
"""
# the BUFFER from above is being multiplied by a CONST 2
# it's RESHAPEd and EXPANDed to broadcast the CONST to the BUFFER
# we can check the result with
assert t_times_2.tolist() == [2, 4, 6, 8]
# UOps are both immutable and globally unique
# if i multiply the Tensor by 4 twice, these result Tensors will have the same uop specification
t_times_4_try_1 = t * 4
t_times_4_try_2 = t * 4
assert t_times_4_try_1.uop is t_times_4_try_2.uop
# the specification isn't just the same, it's the exact same Python object
assert t_times_4_try_1 is not t_times_4_try_2
# the Tensor is a different Python object
# if we realize `t_times_4_try_1` ...
t_times_4_try_1.realize()
print(t_times_4_try_2.uop)
"""
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
UOp(Ops.UNIQUE, dtypes.void, arg=4, src=()),
UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),))
"""
# ... `t_times_4_try_2` also becomes the same BUFFER
assert t_times_4_try_1.uop is t_times_4_try_2.uop
# so this print doesn't require any computation, just a copy back to the CPU so we can print it
print("** only the copy start")
print(t_times_4_try_2.tolist()) # [4, 8, 12, 16]
print("** only the copy end")
# you can confirm this with DEBUG=2, seeing what's printed in between the "**" prints
# tinygrad has an auto differentiation engine that operates according to these same principles
# the derivative of "log(x)" is "1/x", and you can see this on line 20 of gradient.py
t_float = Tensor([3.0])
t_log = t_float.log()
t_log_grad, = t_log.sum().gradient(t_float)
# due to how log is implemented, this gradient contains a lot of UOps
print(t_log_grad.uop)
# ...not shown here...
# but if you run with DEBUG=4 (CPU=1 used here for simpler code), you can see the generated code
"""
void E_(float* restrict data0, float* restrict data1) {
float val0 = *(data1+0);
*(data0+0) = (0.6931471805599453f*(1/(val0*0.6931471805599453f)));
}
"""
# the derivative is close to 1/3
assert (t_log_grad.item() - 1/3) < 1e-6
# %% ********
print("******* PART 2 *******")
# we redefine the same t here so this cell can run on it's own
from tinygrad import Tensor
t = Tensor([1,2,3,4])
# what's above gives you enough of an understanding to go use tinygrad as a library
# however, a lot of the beauty of tinygrad is in how easy it is to interact with the internals
# NOTE: the APIs here are subject to change
t_plus_3_plus_4 = t + 3 + 4
print(t_plus_3_plus_4.uop)
"""
UOp(Ops.ADD, dtypes.int, arg=None, src=(
UOp(Ops.ADD, dtypes.int, arg=None, src=(
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
UOp(Ops.UNIQUE, dtypes.void, arg=1, src=()),
x3:=UOp(Ops.DEVICE, dtypes.void, arg='CPU', src=()),)),
UOp(Ops.EXPAND, dtypes.int, arg=(4,), src=(
UOp(Ops.RESHAPE, dtypes.int, arg=(1,), src=(
UOp(Ops.CONST, dtypes.int, arg=3, src=(
x7:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),)), src=(
x3,)),)),)),)),)),
UOp(Ops.EXPAND, dtypes.int, arg=(4,), src=(
UOp(Ops.RESHAPE, dtypes.int, arg=(1,), src=(
UOp(Ops.CONST, dtypes.int, arg=4, src=(
x7,)),)),)),))
"""
# you can see it's adding both 3 and 4
# but by the time we are actually running the code, it's adding 7
# `kernelize` will simplify and group the operations in the graph into kernels
t_plus_3_plus_4.kernelize()
print(t_plus_3_plus_4.uop)
"""
UOp(Ops.ASSIGN, dtypes.int, arg=None, src=(
x0:=UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
UOp(Ops.UNIQUE, dtypes.void, arg=7, src=()),
x2:=UOp(Ops.DEVICE, dtypes.void, arg='CPU', src=()),)),
UOp(Ops.KERNEL, dtypes.void, arg=<Kernel 12 SINK(<Ops.STORE: 48>,) (__add__,)>, src=(
x0,
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
UOp(Ops.UNIQUE, dtypes.void, arg=1, src=()),
x2,)),)),))
"""
# ASSIGN has two srcs, src[0] is the BUFFER that's assigned to, and src[1] is the thing to assign
# src[1] is the GPU Kernel that's going to be run
# we can get the ast of the Kernel as follows
kernel_ast = t_plus_3_plus_4.uop.src[1].arg.ast
# almost everything in tinygrad functions as a rewrite of the UOps
# the codegen rewrites the ast to a simplified form ready for "rendering"
from tinygrad.codegen import full_rewrite_to_sink
rewritten_ast = full_rewrite_to_sink(kernel_ast)
print(rewritten_ast)
"""
UOp(Ops.SINK, dtypes.void, arg=None, src=(
UOp(Ops.STORE, dtypes.void, arg=None, src=(
UOp(Ops.INDEX, dtypes.int.ptr(4), arg=None, src=(
UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(4), arg=0, src=()),
x3:=UOp(Ops.SPECIAL, dtypes.int, arg=('gidx0', 4), src=()),)),
UOp(Ops.ADD, dtypes.int, arg=None, src=(
UOp(Ops.LOAD, dtypes.int, arg=None, src=(
UOp(Ops.INDEX, dtypes.int.ptr(4), arg=None, src=(
UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(4), arg=1, src=()),
x3,)),)),
UOp(Ops.CONST, dtypes.int, arg=7, src=()),)),)),))
"""
# you can see at this point we are adding 7, not 3 and 4
# with DEBUG=4, we can see the code.
# since optimizations are on, it UPCASTed the operation, explicitly writing out all 4 +7s
t_plus_3_plus_4.realize()
"""
void E_4n2(int* restrict data0, int* restrict data1) {
int val0 = *(data1+0);
int val1 = *(data1+1);
int val2 = *(data1+2);
int val3 = *(data1+3);
*(data0+0) = (val0+7);
*(data0+1) = (val1+7);
*(data0+2) = (val2+7);
*(data0+3) = (val3+7);
}
"""
# the function name E_4n2 is "E" for elementwise op (as opposed to "r" for reduce op)
# "4" for the size, and "n2" for name deduping (it's the 3rd function with the same E and 4 in this session)
# when you print the name with DEBUG=2, you'll see the 4 is yellow, meaning that it's upcasted
# if you run with NOOPT=1 ...
"""
void E_4n2(int* restrict data0, int* restrict data1) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
int val0 = *(data1+ridx0);
*(data0+ridx0) = (val0+7);
}
}
"""
# ... you get this unoptimized code with a loop and the 4 is blue (for global). the color code is in kernel.py
# %% ********
print("******* PART 3 *******")
# now, we go even lower and understand UOps better and how the graph rewrite engine works.
# it's much simpler than what's in LLVM or MLIR
from tinygrad import dtypes
from tinygrad.uop.ops import UOp, Ops
# first, we'll construct some const UOps
a = UOp(Ops.CONST, dtypes.int, arg=2)
b = UOp(Ops.CONST, dtypes.int, arg=2)
# if you have been paying attention, you should know these are the same Python object
assert a is b
# UOps support normal Python math operations, so a_plus_b expresses the spec for 2 + 2
a_plus_b = a + b
print(a_plus_b)
"""
UOp(Ops.ADD, dtypes.int, arg=None, src=(
x0:=UOp(Ops.CONST, dtypes.int, arg=2, src=()),
x0,))
"""
# we could actually render this 2+2 into a language like c and run it
# or, we can use tinygrad's graph rewrite engine to "constant fold"
from tinygrad.uop.ops import graph_rewrite, UPat, PatternMatcher
# a `PatternMatcher` is a list of tuples. for each element in the list:
# [0] is the pattern to match, and [1] is the function to run.
# this function can return either a UOp to replace the pattern with, or None to not replace
simple_pm = PatternMatcher([
(UPat(Ops.ADD, src=(UPat(Ops.CONST, name="c1"), UPat(Ops.CONST, name="c2"))),
lambda c1,c2: UOp(Ops.CONST, dtype=c1.dtype, arg=c1.arg+c2.arg)),
])
# this pattern matches the addition of two CONST and rewrites it into a single CONST UOp
# to actually apply the pattern to a_plus_b, we use graph_rewrite
a_plus_b_simplified = graph_rewrite(a_plus_b, simple_pm)
print(a_plus_b_simplified)
"""
UOp(Ops.CONST, dtypes.int, arg=4, src=())
"""
# 2+2 is in fact, 4
# we can also use syntactic sugar to write the pattern nicer
simpler_pm = PatternMatcher([
(UPat.cvar("c1")+UPat.cvar("c2"), lambda c1,c2: c1.const_like(c1.arg+c2.arg))
])
assert graph_rewrite(a_plus_b, simple_pm) is graph_rewrite(a_plus_b, simpler_pm)
# note again the use of is, UOps are immutable and globally unique
# %% ********
# that brings you to an understanding of the most core concepts in tinygrad
# you can run this with VIZ=1 to use the web based graph rewrite explorer
# hopefully now you understand it. the nodes in the graph are just UOps

View File

@ -24,6 +24,7 @@
::: tinygrad.Tensor.randn ::: tinygrad.Tensor.randn
::: tinygrad.Tensor.randn_like ::: tinygrad.Tensor.randn_like
::: tinygrad.Tensor.randint ::: tinygrad.Tensor.randint
::: tinygrad.Tensor.randperm
::: tinygrad.Tensor.normal ::: tinygrad.Tensor.normal
::: tinygrad.Tensor.uniform ::: tinygrad.Tensor.uniform
::: tinygrad.Tensor.scaled_uniform ::: tinygrad.Tensor.scaled_uniform

View File

@ -37,8 +37,10 @@
::: tinygrad.Tensor.scatter ::: tinygrad.Tensor.scatter
::: tinygrad.Tensor.scatter_reduce ::: tinygrad.Tensor.scatter_reduce
::: tinygrad.Tensor.masked_select ::: tinygrad.Tensor.masked_select
::: tinygrad.Tensor.masked_fill
::: tinygrad.Tensor.sort ::: tinygrad.Tensor.sort
::: tinygrad.Tensor.topk ::: tinygrad.Tensor.topk
::: tinygrad.Tensor.multinomial
## Neural Network (functional) ## Neural Network (functional)

View File

@ -78,10 +78,7 @@ if __name__ == "__main__":
@TinyJit @TinyJit
def get_action(obs:Tensor) -> Tensor: def get_action(obs:Tensor) -> Tensor:
# TODO: with no_grad
Tensor.no_grad = True
ret = model(obs)[0].exp().multinomial().realize() ret = model(obs)[0].exp().multinomial().realize()
Tensor.no_grad = False
return ret return ret
st, steps = time.perf_counter(), 0 st, steps = time.perf_counter(), 0

View File

@ -3,14 +3,19 @@ start_tm = time.perf_counter()
import math import math
from typing import Tuple, cast from typing import Tuple, cast
import numpy as np import numpy as np
from tinygrad import Tensor, nn, GlobalCounters, TinyJit, dtypes from tinygrad import Tensor, nn, GlobalCounters, TinyJit, dtypes, Device
from tinygrad.helpers import partition, trange, getenv, Context from tinygrad.helpers import partition, trange, getenv, Context
from extra.lr_scheduler import OneCycleLR from extra.lr_scheduler import OneCycleLR
GPUS = [f'{Device.DEFAULT}:{i}' for i in range(getenv("GPUS", 1))]
# override tinygrad defaults
dtypes.default_float = dtypes.half dtypes.default_float = dtypes.half
Context(FUSE_ARANGE=1, FUSE_OPTIM=1).__enter__()
# from https://github.com/tysam-code/hlb-CIFAR10/blob/main/main.py # from https://github.com/tysam-code/hlb-CIFAR10/blob/main/main.py
batchsize = getenv("BS", 1024) batchsize = getenv("BS", 1024)
assert batchsize % len(GPUS) == 0, f"{batchsize=} is not a multiple of {len(GPUS)=}"
bias_scaler = 64 bias_scaler = 64
hyp = { hyp = {
'opt': { 'opt': {
@ -67,7 +72,7 @@ class ConvGroup:
cast(Tensor, self.norm2.weight).requires_grad = False cast(Tensor, self.norm2.weight).requires_grad = False
def __call__(self, x:Tensor) -> Tensor: def __call__(self, x:Tensor) -> Tensor:
x = self.norm1(self.conv1(x).max_pool2d().float()).cast(dtypes.default_float).quick_gelu() x = self.norm1(self.conv1(x).max_pool2d().float()).cast(dtypes.default_float).quick_gelu()
return self.norm2(self.conv2(x).float()).cast(dtypes.default_float).quick_gelu() return self.norm2(self.conv2(x).float()).cast(dtypes.default_float).quick_gelu() + x
class SpeedyConvNet: class SpeedyConvNet:
def __init__(self): def __init__(self):
@ -78,23 +83,25 @@ class SpeedyConvNet:
self.linear = nn.Linear(depths['block3'], depths['num_classes'], bias=False) self.linear = nn.Linear(depths['block3'], depths['num_classes'], bias=False)
def __call__(self, x:Tensor) -> Tensor: def __call__(self, x:Tensor) -> Tensor:
x = self.whiten(x).quick_gelu() x = self.whiten(x).quick_gelu()
# ************* HACKS *************
x = x.pad((1,0,0,1)) # TODO: this pad should not be here! copied from hlb_cifar10 for speed
# ************* HACKS *************
x = x.sequential([self.conv_group_1, self.conv_group_2, self.conv_group_3]) x = x.sequential([self.conv_group_1, self.conv_group_2, self.conv_group_3])
return self.linear(x.max(axis=(2,3))) * hyp['opt']['scaling_factor'] return self.linear(x.max(axis=(2,3))) * hyp['opt']['scaling_factor']
if __name__ == "__main__": if __name__ == "__main__":
# *** dataset *** # *** dataset ***
X_train, Y_train, X_test, Y_test = nn.datasets.cifar() X_train, Y_train, X_test, Y_test = nn.datasets.cifar()
# TODO: without this line indexing doesn't fuse!
X_train, Y_train, X_test, Y_test = [x.contiguous() for x in [X_train, Y_train, X_test, Y_test]]
cifar10_std, cifar10_mean = X_train.float().std_mean(axis=(0, 2, 3)) cifar10_std, cifar10_mean = X_train.float().std_mean(axis=(0, 2, 3))
def preprocess(X:Tensor, Y:Tensor) -> Tuple[Tensor, Tensor]: def preprocess(X:Tensor) -> Tensor: return ((X - cifar10_mean.view(1, -1, 1, 1)) / cifar10_std.view(1, -1, 1, 1)).cast(dtypes.default_float)
return ((X - cifar10_mean.view(1, -1, 1, 1)) / cifar10_std.view(1, -1, 1, 1)).cast(dtypes.default_float), Y.one_hot(depths['num_classes'])
# *** model *** # *** model ***
model = SpeedyConvNet() model = SpeedyConvNet()
state_dict = nn.state.get_state_dict(model) state_dict = nn.state.get_state_dict(model)
if len(GPUS) > 1:
#for k,v in nn.state.torch_load("/tmp/cifar_net.pt").items(): print(k) cifar10_std.to_(GPUS)
cifar10_mean.to_(GPUS)
for x in state_dict.values(): x.to_(GPUS)
params_bias, params_non_bias = partition(state_dict.items(), lambda x: 'bias' in x[0]) params_bias, params_non_bias = partition(state_dict.items(), lambda x: 'bias' in x[0])
opt_bias = nn.optim.SGD([x[1] for x in params_bias], lr=0.01, momentum=.85, nesterov=True, weight_decay=hyp['opt']['bias_decay']) opt_bias = nn.optim.SGD([x[1] for x in params_bias], lr=0.01, momentum=.85, nesterov=True, weight_decay=hyp['opt']['bias_decay'])
@ -111,40 +118,37 @@ if __name__ == "__main__":
lr_sched_bias = OneCycleLR(opt_bias, max_lr=hyp['opt']['bias_lr'], pct_start=pct_start, div_factor=initial_div_factor, final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=total_train_steps) lr_sched_bias = OneCycleLR(opt_bias, max_lr=hyp['opt']['bias_lr'], pct_start=pct_start, div_factor=initial_div_factor, final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=total_train_steps)
lr_sched_non_bias = OneCycleLR(opt_non_bias, max_lr=hyp['opt']['non_bias_lr'], pct_start=pct_start, div_factor=initial_div_factor, final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=total_train_steps) lr_sched_non_bias = OneCycleLR(opt_non_bias, max_lr=hyp['opt']['non_bias_lr'], pct_start=pct_start, div_factor=initial_div_factor, final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=total_train_steps)
def loss_fn(out, Y): def loss_fn(out:Tensor, Y:Tensor) -> Tensor:
return out.cross_entropy(Y, reduction='none', label_smoothing=0.2).mul(hyp['opt']['loss_scale_scaler']*loss_batchsize_scaler).sum().div(hyp['opt']['loss_scale_scaler']) ret = out.sparse_categorical_crossentropy(Y, reduction='none', label_smoothing=0.2)
return ret.mul(hyp['opt']['loss_scale_scaler']*loss_batchsize_scaler).sum().div(hyp['opt']['loss_scale_scaler'])
@TinyJit @TinyJit
@Tensor.train() @Tensor.train()
def train_step(idxs:Tensor) -> Tensor: def train_step(idxs:Tensor) -> Tensor:
with Context(SPLIT_REDUCEOP=0, FUSE_ARANGE=1): X, Y = X_train[idxs], Y_train[idxs]
X = X_train[idxs] if len(GPUS) > 1:
Y = Y_train[idxs].realize(X) X.shard_(GPUS, axis=0)
X, Y = preprocess(X, Y) Y.shard_(GPUS, axis=0)
out = model(X) out = model(preprocess(X))
loss = loss_fn(out, Y) loss = loss_fn(out, Y)
opt.zero_grad() opt.zero_grad()
loss.backward() loss.backward()
opt.step() return (loss / (batchsize*loss_batchsize_scaler)).realize(*opt.schedule_step(),
lr_sched_bias.step() *lr_sched_bias.schedule_step(), *lr_sched_non_bias.schedule_step())
lr_sched_non_bias.step()
return loss / (batchsize*loss_batchsize_scaler)
eval_batchsize = 2500 eval_batchsize = 2500
@TinyJit @TinyJit
@Tensor.test()
def val_step() -> Tuple[Tensor, Tensor]: def val_step() -> Tuple[Tensor, Tensor]:
# TODO with Tensor.no_grad()
Tensor.no_grad = True
loss, acc = [], [] loss, acc = [], []
for i in range(0, X_test.size(0), eval_batchsize): for i in range(0, X_test.size(0), eval_batchsize):
X, Y = preprocess(X_test[i:i+eval_batchsize], Y_test[i:i+eval_batchsize]) X, Y = X_test[i:i+eval_batchsize], Y_test[i:i+eval_batchsize]
out = model(X) if len(GPUS) > 1:
X.shard_(GPUS, axis=0)
Y.shard_(GPUS, axis=0)
out = model(preprocess(X))
loss.append(loss_fn(out, Y)) loss.append(loss_fn(out, Y))
acc.append((out.argmax(-1).one_hot(depths['num_classes']) * Y).sum() / eval_batchsize) acc.append((out.argmax(-1) == Y).sum() / eval_batchsize)
ret = Tensor.stack(*loss).mean() / (batchsize*loss_batchsize_scaler), Tensor.stack(*acc).mean() return Tensor.stack(*loss).mean() / (batchsize*loss_batchsize_scaler), Tensor.stack(*acc).mean()
Tensor.no_grad = False
return ret
np.random.seed(1337) np.random.seed(1337)
for epoch in range(math.ceil(hyp['misc']['train_epochs'])): for epoch in range(math.ceil(hyp['misc']['train_epochs'])):

View File

@ -34,7 +34,6 @@ if __name__ == "__main__":
return loss return loss
@TinyJit @TinyJit
@Tensor.test()
def get_test_acc() -> Tensor: return (model(X_test).argmax(axis=1) == Y_test).mean()*100 def get_test_acc() -> Tensor: return (model(X_test).argmax(axis=1) == Y_test).mean()*100
test_acc = float('nan') test_acc = float('nan')

View File

@ -1,10 +1,10 @@
import sys, onnx, time, pickle import sys, time, pickle
from tinygrad import TinyJit, GlobalCounters, fetch, getenv from tinygrad import TinyJit, GlobalCounters, fetch, getenv
from tinygrad.frontend.onnx import OnnxRunner from tinygrad.frontend.onnx import OnnxRunner, onnx_load
from extra.onnx_helpers import get_example_inputs, validate from extra.onnx_helpers import get_example_inputs, validate
def load_onnx_model(onnx_file): def load_onnx_model(onnx_file):
onnx_model = onnx.load(onnx_file) onnx_model = onnx_load(onnx_file)
run_onnx = OnnxRunner(onnx_model) run_onnx = OnnxRunner(onnx_model)
run_onnx_jit = TinyJit(lambda **kwargs: next(iter(run_onnx({k:v.to(None) for k,v in kwargs.items()}).values())), prune=True, optimize=True) run_onnx_jit = TinyJit(lambda **kwargs: next(iter(run_onnx({k:v.to(None) for k,v in kwargs.items()}).values())), prune=True, optimize=True)
return run_onnx_jit, run_onnx.graph_inputs return run_onnx_jit, run_onnx.graph_inputs

View File

@ -23,8 +23,6 @@ def create_fixed_tokenizer(output_file):
# echo -en "write 2+2\nwrite hello world\ny\n" | TEMP=0 python3 examples/coder.py # echo -en "write 2+2\nwrite hello world\ny\n" | TEMP=0 python3 examples/coder.py
if __name__ == "__main__": if __name__ == "__main__":
Tensor.no_grad = True
# https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/blob/main/config.json # https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/blob/main/config.json
with Timing("create model: "): with Timing("create model: "):
model = Transformer(4096, 14336, n_heads=32, n_layers=32, norm_eps=1e-5, vocab_size=32002, n_kv_heads=8, max_context=4096, jit=getenv("JIT", 1)) model = Transformer(4096, 14336, n_heads=32, n_layers=32, norm_eps=1e-5, vocab_size=32002, n_kv_heads=8, max_context=4096, jit=getenv("JIT", 1))

View File

@ -159,7 +159,6 @@ def init_vits(
text_mapper = TextMapper(apply_cleaners=True, symbols=symbols) text_mapper = TextMapper(apply_cleaners=True, symbols=symbols)
# Load the model. # Load the model.
Tensor.no_grad = True
if seed is not None: if seed is not None:
Tensor.manual_seed(seed) Tensor.manual_seed(seed)
np.random.seed(seed) np.random.seed(seed)
@ -221,7 +220,6 @@ def mp_output_stream(q: mp.Queue, counter: mp.Value, num_channels: int, sample_r
if __name__ == "__main__": if __name__ == "__main__":
import nltk import nltk
nltk.download("punkt") nltk.download("punkt")
Tensor.no_grad = True
# Parse CLI arguments # Parse CLI arguments
parser = argparse.ArgumentParser("Have a tiny conversation with tinygrad") parser = argparse.ArgumentParser("Have a tiny conversation with tinygrad")

View File

@ -85,7 +85,10 @@ class Transformer:
seqlen = tokens.shape[1] seqlen = tokens.shape[1]
tok_emb = self.wte(tokens) tok_emb = self.wte(tokens)
pos_emb = self.wpe(self.allpos.shrink((None, (start_pos, start_pos+seqlen)))) # not symbolic when consuming the prompt
selected_pos = (0, seqlen) if start_pos.val == 0 else (start_pos, start_pos+1)
pos_emb = self.wpe(self.allpos.shrink((None, selected_pos)))
h = tok_emb + pos_emb h = tok_emb + pos_emb
if HALF: h = h.half() if HALF: h = h.half()
@ -190,7 +193,7 @@ class GPT2:
(f", {GlobalCounters.global_mem*1e-9/(GlobalCounters.time_sum_s-st):.2f} GB/s" if DEBUG>=2 else "")) if DEBUG else None, enabled=timing): (f", {GlobalCounters.global_mem*1e-9/(GlobalCounters.time_sum_s-st):.2f} GB/s" if DEBUG>=2 else "")) if DEBUG else None, enabled=timing):
with WallTimeEvent(BenchEvent.STEP): with WallTimeEvent(BenchEvent.STEP):
if batch_size == 1 and len(toks[0][start_pos:]) == 1: if batch_size == 1 and len(toks[0][start_pos:]) == 1:
tokens = Variable("tokens", 0, VOCAB_SIZE).bind(toks[0][start_pos]) tokens = Variable("tokens", 0, VOCAB_SIZE-1).bind(toks[0][start_pos])
else: else:
tokens = Tensor([x[start_pos:] for x in toks]) tokens = Tensor([x[start_pos:] for x in toks])
tok = self.model(tokens, Variable("start_pos", 1 if start_pos else 0, MAX_CONTEXT-1).bind(start_pos), temperature).tolist() tok = self.model(tokens, Variable("start_pos", 1 if start_pos else 0, MAX_CONTEXT-1).bind(start_pos), temperature).tolist()
@ -201,7 +204,6 @@ class GPT2:
# **** main code **** # **** main code ****
if __name__ == "__main__": if __name__ == "__main__":
Tensor.no_grad = True
print(f"using {Device.DEFAULT} backend") print(f"using {Device.DEFAULT} backend")
default_prompt = "What is the answer to life, the universe, and everything?" default_prompt = "What is the answer to life, the universe, and everything?"

View File

@ -118,7 +118,7 @@ class SpeedyResNet:
# hyper-parameters were exactly the same as the original repo # hyper-parameters were exactly the same as the original repo
bias_scaler = 58 bias_scaler = 58
hyp = { hyp = {
'seed' : 209, 'seed' : 200,
'opt': { 'opt': {
'bias_lr': 1.76 * bias_scaler/512, 'bias_lr': 1.76 * bias_scaler/512,
'non_bias_lr': 1.76 / 512, 'non_bias_lr': 1.76 / 512,
@ -267,13 +267,10 @@ def train_cifar():
@TinyJit @TinyJit
def update(self, net, decay): def update(self, net, decay):
# TODO with Tensor.no_grad()
Tensor.no_grad = True
for net_ema_param, (param_name, net_param) in zip(get_state_dict(self.net_ema).values(), get_state_dict(net).items()): for net_ema_param, (param_name, net_param) in zip(get_state_dict(self.net_ema).values(), get_state_dict(net).items()):
# batchnorm currently is not being tracked # batchnorm currently is not being tracked
if not ("num_batches_tracked" in param_name) and not ("running" in param_name): if not ("num_batches_tracked" in param_name) and not ("running" in param_name):
net_ema_param.assign(net_ema_param.detach()*decay + net_param.detach()*(1.-decay)).realize() net_ema_param.assign(net_ema_param.detach()*decay + net_param.detach()*(1.-decay)).realize()
Tensor.no_grad = False
set_seed(getenv('SEED', hyp['seed'])) set_seed(getenv('SEED', hyp['seed']))

View File

@ -240,7 +240,6 @@ class LLaMa:
#elif k.endswith('.weight'): v.shard_(device, axis=-1) #elif k.endswith('.weight'): v.shard_(device, axis=-1)
#elif 'norm.' in k: v.shard_(device, axis=-1) #elif 'norm.' in k: v.shard_(device, axis=-1)
else: v.shard_(device, axis=None) else: v.shard_(device, axis=None)
#print(k, v.shape, v.lazydata.axis)
# replace weights in model # replace weights in model
load_state_dict(model, weights, strict=False, consume=True) load_state_dict(model, weights, strict=False, consume=True)
@ -331,7 +330,6 @@ int main()
\end{code} \end{code}
""" """
if __name__ == "__main__": if __name__ == "__main__":
Tensor.no_grad = True
print(f"using {Device.DEFAULT} backend") print(f"using {Device.DEFAULT} backend")
parser = argparse.ArgumentParser(description="Run LLaMA in tinygrad", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = argparse.ArgumentParser(description="Run LLaMA in tinygrad", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
@ -447,7 +445,7 @@ After you are done speaking, output [EOS]. You are not Chad.
print(f"using LLaMA{LLAMA_SUFFIX}-{args.size} model") print(f"using LLaMA{LLAMA_SUFFIX}-{args.size} model")
device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
llama = LLaMa.build(MODEL_PATH, TOKENIZER_PATH, model_gen=args.gen, model_size=args.size, quantize=args.quantize, device=device) llama = LLaMa.build(MODEL_PATH, TOKENIZER_PATH, model_gen=args.gen, model_size=args.size, quantize=args.quantize, device=device)
param_bytes = sum(x.lazydata.size * x.dtype.itemsize for x in get_parameters(llama.model)) param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(llama.model))
outputted = pre_prompt if chatbot else args.prompt outputted = pre_prompt if chatbot else args.prompt
start_pos, toks = 0, [llama.tokenizer.bos_id()] + llama.tokenizer.encode(outputted) start_pos, toks = 0, [llama.tokenizer.bos_id()] + llama.tokenizer.encode(outputted)

View File

@ -233,8 +233,6 @@ def prefill(model, toks, start_pos=0):
return start_pos return start_pos
if __name__ == "__main__": if __name__ == "__main__":
Tensor.no_grad = True
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--download_model", action="store_true", help="Download a model") parser.add_argument("--download_model", action="store_true", help="Download a model")
parser.add_argument("--model", type=Path, help="Model path") parser.add_argument("--model", type=Path, help="Model path")
@ -286,7 +284,7 @@ if __name__ == "__main__":
device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
model = build_transformer(args.model, model_size=args.size, quantize=args.quantize, device=device) model = build_transformer(args.model, model_size=args.size, quantize=args.quantize, device=device)
param_bytes = sum(x.lazydata.size * x.dtype.itemsize for x in get_parameters(model)) param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(model))
if not args.no_api and not args.benchmark: if not args.no_api and not args.benchmark:
from bottle import Bottle, request, response, HTTPResponse, abort, static_file from bottle import Bottle, request, response, HTTPResponse, abort, static_file

View File

@ -16,7 +16,7 @@ if __name__ == "__main__":
#model.load_pretrained() #model.load_pretrained()
for p in nn.state.get_parameters(model): p.replace(Tensor.empty(p.shape, dtype=p.dtype)) # fake load pretrained for p in nn.state.get_parameters(model): p.replace(Tensor.empty(p.shape, dtype=p.dtype)) # fake load pretrained
#early_sched = create_schedule([x.lazydata for x in nn.state.get_parameters(model)]) #early_sched = create_schedule([x.uop for x in nn.state.get_parameters(model)])
#print(f"built model {len(early_sched)}") #print(f"built model {len(early_sched)}")
#B, T = Variable("B", 1, 128).bind(4), 64 #Variable("T", 1, 1024).bind(64) #B, T = Variable("B", 1, 128).bind(4), 64 #Variable("T", 1, 1024).bind(64)
@ -56,7 +56,7 @@ if __name__ == "__main__":
state_dict.update({'X': X, 'Y': Y, 'loss': loss}) state_dict.update({'X': X, 'Y': Y, 'loss': loss})
grad_state_dict = {} grad_state_dict = {}
for k,v in state_dict.items(): for k,v in state_dict.items():
if v.lazydata.base.buffer not in used_buffers: print(f"UNUSED: {k}") if v.uop.base.buffer not in used_buffers: print(f"UNUSED: {k}")
if v.grad is not None: grad_state_dict['grad_'+k] = v.grad if v.grad is not None: grad_state_dict['grad_'+k] = v.grad
state_dict.update(grad_state_dict) state_dict.update(grad_state_dict)
state_dict.update({'adam_b1_t': optimizer.b1_t, 'adam_b2_t': optimizer.b2_t, 'adam_lr': optimizer.lr}) state_dict.update({'adam_b1_t': optimizer.b1_t, 'adam_b2_t': optimizer.b2_t, 'adam_lr': optimizer.lr})
@ -65,7 +65,7 @@ if __name__ == "__main__":
nm = inverse_state_dict[p] nm = inverse_state_dict[p]
state_dict["adam_m_"+nm] = m state_dict["adam_m_"+nm] = m
state_dict["adam_v_"+nm] = v state_dict["adam_v_"+nm] = v
named_buffers = {v.lazydata.base.buffer:k.replace(".", "_") for k,v in state_dict.items()} named_buffers = {v.uop.base.buffer:k.replace(".", "_") for k,v in state_dict.items()}
c_code = ["#include <stdlib.h>", "#include <tgmath.h>", "#include <stdbool.h>"] c_code = ["#include <stdlib.h>", "#include <tgmath.h>", "#include <stdbool.h>"]
if TIMING: c_code += ["#include <stdio.h>", "#include <time.h>"] if TIMING: c_code += ["#include <stdio.h>", "#include <time.h>"]

View File

@ -146,7 +146,6 @@ if __name__ == "__main__":
return loss return loss
@TinyJit @TinyJit
@Tensor.test()
def sample(z:Tensor, cond:Tensor) -> Tensor: def sample(z:Tensor, cond:Tensor) -> Tensor:
return model.sample(z, cond, Tensor.full_like(cond, 10), sample_steps=getenv("SAMPLE_STEPS", 20))[-1] return model.sample(z, cond, Tensor.full_like(cond, 10), sample_steps=getenv("SAMPLE_STEPS", 20))[-1]

View File

@ -56,7 +56,7 @@ if __name__ == "__main__":
with Profiling(sort="time", frac=0.1, enabled=args.profile): with Profiling(sort="time", frac=0.1, enabled=args.profile):
with Timing("total ", enabled=args.timing, on_exit=lambda x: f", {1e9/x:.2f} tok/sec"): with Timing("total ", enabled=args.timing, on_exit=lambda x: f", {1e9/x:.2f} tok/sec"):
with WallTimeEvent(BenchEvent.STEP): with WallTimeEvent(BenchEvent.STEP):
tok = model(Tensor([toks[start_pos:]]), 0 if start_pos == 0 else Variable("start_pos", 1, 1024).bind(start_pos), args.temperature).item() tok = model(Tensor([toks[start_pos:]]), 0 if start_pos == 0 else Variable("start_pos", 1, 1024-1).bind(start_pos), args.temperature).item()
toks.append(tok) toks.append(tok)
start_pos += 1 start_pos += 1
print(spp.decode(toks)) print(spp.decode(toks))

View File

@ -71,7 +71,7 @@ def loader_process(q_in, q_out, X:Tensor, seed):
#storage_tensor._copyin(img_tensor.numpy()) #storage_tensor._copyin(img_tensor.numpy())
# faster # faster
X[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes() X[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
# ideal # ideal
#X[idx].assign(img.tobytes()) # NOTE: this is slow! #X[idx].assign(img.tobytes()) # NOTE: this is slow!
@ -262,8 +262,8 @@ def load_unet3d_data(preprocessed_dataset_dir, seed, queue_in, queue_out, X:Tens
x = random_brightness_augmentation(x) x = random_brightness_augmentation(x)
x = gaussian_noise(x) x = gaussian_noise(x)
X[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = x.tobytes() X[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = x.tobytes()
Y[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = y.tobytes() Y[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = y.tobytes()
queue_out.put(idx) queue_out.put(idx)
queue_out.put(None) queue_out.put(None)
@ -377,12 +377,12 @@ def load_retinanet_data(base_dir:Path, val:bool, queue_in:Queue, queue_out:Queue
clipped_match_idxs = np.clip(match_idxs, 0, None) clipped_match_idxs = np.clip(match_idxs, 0, None)
clipped_boxes, clipped_labels = tgt["boxes"][clipped_match_idxs], tgt["labels"][clipped_match_idxs] clipped_boxes, clipped_labels = tgt["boxes"][clipped_match_idxs], tgt["labels"][clipped_match_idxs]
boxes[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_boxes.tobytes() boxes[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_boxes.tobytes()
labels[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_labels.tobytes() labels[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_labels.tobytes()
matches[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = match_idxs.tobytes() matches[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = match_idxs.tobytes()
anchors[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = anchor.tobytes() anchors[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = anchor.tobytes()
imgs[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes() imgs[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
queue_out.put(idx) queue_out.put(idx)
queue_out.put(None) queue_out.put(None)

View File

@ -9,7 +9,6 @@ from extra.bench_log import BenchEvent, WallTimeEvent
def tlog(x): print(f"{x:25s} @ {time.perf_counter()-start:5.2f}s") def tlog(x): print(f"{x:25s} @ {time.perf_counter()-start:5.2f}s")
def eval_resnet(): def eval_resnet():
Tensor.no_grad = True
with WallTimeEvent(BenchEvent.FULL): with WallTimeEvent(BenchEvent.FULL):
# Resnet50-v1.5 # Resnet50-v1.5
from extra.models.resnet import ResNet50 from extra.models.resnet import ResNet50
@ -245,7 +244,6 @@ def eval_mrcnn():
if __name__ == "__main__": if __name__ == "__main__":
# inference only # inference only
Tensor.training = False Tensor.training = False
Tensor.no_grad = True
models = getenv("MODEL", "resnet,retinanet,unet3d,rnnt,bert,mrcnn").split(",") models = getenv("MODEL", "resnet,retinanet,unet3d,rnnt,bert,mrcnn").split(",")
for m in models: for m in models:

View File

@ -60,7 +60,6 @@ def spec_mrcnn():
if __name__ == "__main__": if __name__ == "__main__":
# inference only for now # inference only for now
Tensor.training = False Tensor.training = False
Tensor.no_grad = True
for m in getenv("MODEL", "resnet,retinanet,unet3d,rnnt,bert,mrcnn").split(","): for m in getenv("MODEL", "resnet,retinanet,unet3d,rnnt,bert,mrcnn").split(","):
nm = f"spec_{m}" nm = f"spec_{m}"

View File

@ -608,7 +608,7 @@ def train_retinanet():
if getenv("RESET_STEP", 1): _train_step.reset() if getenv("RESET_STEP", 1): _train_step.reset()
with Tensor.train(mode=False), Tensor.test(): with Tensor.train(mode=False):
if not RUNMLPERF: if not RUNMLPERF:
i, proc = 0, _fake_data_get(EVAL_BS, val=(val:=True)) i, proc = 0, _fake_data_get(EVAL_BS, val=(val:=True))
else: else:
@ -791,7 +791,6 @@ def train_unet3d():
return loss.realize() return loss.realize()
@Tensor.train(mode=False) @Tensor.train(mode=False)
@Tensor.test()
def eval_step(model, x, y): def eval_step(model, x, y):
y_hat, y = sliding_window_inference(model, x, y, gpus=GPUS) y_hat, y = sliding_window_inference(model, x, y, gpus=GPUS)
y_hat, y = Tensor(y_hat), Tensor(y, requires_grad=False) y_hat, y = Tensor(y_hat), Tensor(y, requires_grad=False)

View File

@ -5,7 +5,7 @@
"system_name": "tinybox 8xMI300X", "system_name": "tinybox 8xMI300X",
"number_of_nodes": "1", "number_of_nodes": "1",
"host_processors_per_node": "2", "host_processors_per_node": "2",
"host_processor_model_name": "AMD EPYC 9354 32-Core Processor", "host_processor_model_name": "AMD EPYC 9354",
"host_processor_core_count": "32", "host_processor_core_count": "32",
"host_processor_vcpu_count": "64", "host_processor_vcpu_count": "64",
"host_processor_frequency": "", "host_processor_frequency": "",
@ -18,7 +18,7 @@
"host_networking_topology": "", "host_networking_topology": "",
"host_memory_configuration": "24x 96GB DDR5", "host_memory_configuration": "24x 96GB DDR5",
"accelerators_per_node": "8", "accelerators_per_node": "8",
"accelerator_model_name": "AMD Instinct MI300X", "accelerator_model_name": "AMD Instinct MI300X 192GB HBM3",
"accelerator_host_interconnect": "PCIe 5.0 x16", "accelerator_host_interconnect": "PCIe 5.0 x16",
"accelerator_frequency": "", "accelerator_frequency": "",
"accelerator_on-chip_memories": "", "accelerator_on-chip_memories": "",
@ -30,10 +30,9 @@
"hw_notes": "", "hw_notes": "",
"framework": "tinygrad, branch mlperf_training_v5.0", "framework": "tinygrad, branch mlperf_training_v5.0",
"other_software_stack": { "other_software_stack": {
"python": "3.10.16", "python": "3.10.16",
"ROCm": "3.0.0+94441cb" "ROCm": "3.0.0+94441cb"
}, },
"operating_system": "Ubuntu 24.04.1 LTS", "operating_system": "Ubuntu 24.04.1 LTS",
"sw_notes": "" "sw_notes": ""
} }

View File

@ -5,7 +5,7 @@
"system_name": "tinybox green", "system_name": "tinybox green",
"number_of_nodes": "1", "number_of_nodes": "1",
"host_processors_per_node": "1", "host_processors_per_node": "1",
"host_processor_model_name": "AMD EPYC 7532 32-Core Processor", "host_processor_model_name": "AMD EPYC 7532",
"host_processor_core_count": "32", "host_processor_core_count": "32",
"host_processor_vcpu_count": "64", "host_processor_vcpu_count": "64",
"host_processor_frequency": "", "host_processor_frequency": "",

View File

@ -5,7 +5,7 @@
"system_name": "tinybox red", "system_name": "tinybox red",
"number_of_nodes": "1", "number_of_nodes": "1",
"host_processors_per_node": "1", "host_processors_per_node": "1",
"host_processor_model_name": "AMD EPYC 7532 32-Core Processor", "host_processor_model_name": "AMD EPYC 7532",
"host_processor_core_count": "32", "host_processor_core_count": "32",
"host_processor_vcpu_count": "64", "host_processor_vcpu_count": "64",
"host_processor_frequency": "", "host_processor_frequency": "",

View File

@ -0,0 +1,15 @@
#!/bin/bash
export PYTHONPATH="." AMD=1
export MODEL="bert"
export DEFAULT_FLOAT="HALF" GPUS=1 BS=128 EVAL_BS=128
export BEAM=3 BEAM_UOPS_MAX=4000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1
# export BEAM_LOG_SURPASS_MAX=1
# export BASEDIR="/raid/datasets/wiki"
export RESET_STEP=1
export BENCHMARK=10 BERT_LAYERS=2 DEBUG=2
python3 examples/mlperf/model_train.py

View File

@ -0,0 +1,69 @@
# 1. Problem
This problem uses BERT for NLP.
## Requirements
Install tinygrad and mlperf-logging (uncomment mlperf from setup.py) from branch mlperf_training_v5.0.
```
git clone https://github.com/tinygrad/tinygrad.git
python3 -m pip install -e ".[mlperf]"
```
Also install gdown (for dataset), numpy, tqdm and tensorflow.
```
pip install gdown numpy tqdm tensorflow
```
### tinybox_green
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
This is the default on production tinybox green.
# 2. Directions
## Steps to download and verify data
### 1. Download raw data
```
BASEDIR="/raid/datasets/wiki" WIKI_TRAIN=1 VERIFY_CHECKSUM=1 python3 extra/datasets/wikipedia_download.py
```
### 2. Preprocess train and validation data
Note: The number of threads used for preprocessing is limited by available memory. With 128GB of RAM, a maximum of 16 threads is recommended.
#### Training:
```
BASEDIR="/raid/datasets/wiki" NUM_WORKERS=16 python3 extra/datasets/wikipedia.py pre-train all
```
Generating a specific topic (Between 0 and 499)
```
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-train 42
```
#### Validation:
```
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-eval
```
## Running
### tinybox_green
#### Steps to run benchmark
```
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_green/run_and_time.sh
```
### tinybox_red
#### Steps to run benchmark
```
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_red/run_and_time.sh
```
### tinybox_8xMI300X
#### Steps to run benchmark
```
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI300X/run_and_time.sh
```

View File

@ -0,0 +1,14 @@
#!/bin/bash
export PYTHONPATH="." AMD=1
export MODEL="bert"
export DEFAULT_FLOAT="HALF" GPUS=8 BS=1024 EVAL_BS=1024
export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.85437 DECAY=0.1
export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
export BASEDIR="/raid/datasets/wiki"
export BENCHMARK=10 BERT_LAYERS=2 DEBUG=2
python3 examples/mlperf/model_train.py

View File

@ -0,0 +1,17 @@
#!/bin/bash
export PYTHONPATH="." AMD=1
export MODEL="bert"
export DEFAULT_FLOAT="HALF" GPUS=8 BS=1024 EVAL_BS=1024
# similar to https://github.com/mlcommons/training_results_v3.1/blob/d06288b2bd675a9d88e0e6181f5bb5626b71ec19/Quanta_Cloud_Technology/results/D54U-3U/bert/result_1.txt#L54
export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.85437 DECAY=0.1
export TRAIN_STEPS=3900
export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
export BASEDIR="/raid/datasets/wiki"
export WANDB=1 PARALLEL=0
RUNMLPERF=1 python3 examples/mlperf/model_train.py

View File

@ -0,0 +1,29 @@
#!/bin/bash
set -e # Exit on any error
set -o pipefail # Make pipeline fail if any command fails
export PYTHONPATH="." AMD=1
export MODEL="bert"
export SUBMISSION_PLATFORM="tinybox_8xMI300X"
export DEFAULT_FLOAT="HALF" GPUS=8 BS=1024 EVAL_BS=1024
# similar to https://github.com/mlcommons/training_results_v3.1/blob/d06288b2bd675a9d88e0e6181f5bb5626b71ec19/Quanta_Cloud_Technology/results/D54U-3U/bert/result_1.txt#L54
export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.85437 DECAY=0.1
export TRAIN_STEPS=3900
export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
export BASEDIR="/raid/datasets/wiki"
# pip install -e ".[mlperf]"
export LOGMLPERF=1
export SEED=$RANDOM
DATETIME=$(date "+%m%d%H%M")
LOGFILE="bert_8xMI300x_${DATETIME}_${SEED}.log"
# init # TODO: without DEBUG=2 it hangs
BENCHMARK=10 INITMLPERF=1 BERT_LAYERS=2 DEBUG=2 python3 examples/mlperf/model_train.py | tee $LOGFILE
# run
PARALLEL=0 RUNMLPERF=1 python3 examples/mlperf/model_train.py | tee -a $LOGFILE

View File

@ -0,0 +1,69 @@
# 1. Problem
This problem uses BERT for NLP.
## Requirements
Install tinygrad and mlperf-logging (uncomment mlperf from setup.py) from branch mlperf_training_v5.0.
```
git clone https://github.com/tinygrad/tinygrad.git
python3 -m pip install -e ".[mlperf]"
```
Also install gdown (for dataset), numpy, tqdm and tensorflow.
```
pip install gdown numpy tqdm tensorflow
```
### tinybox_green
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
This is the default on production tinybox green.
# 2. Directions
## Steps to download and verify data
### 1. Download raw data
```
BASEDIR="/raid/datasets/wiki" WIKI_TRAIN=1 VERIFY_CHECKSUM=1 python3 extra/datasets/wikipedia_download.py
```
### 2. Preprocess train and validation data
Note: The number of threads used for preprocessing is limited by available memory. With 128GB of RAM, a maximum of 16 threads is recommended.
#### Training:
```
BASEDIR="/raid/datasets/wiki" NUM_WORKERS=16 python3 extra/datasets/wikipedia.py pre-train all
```
Generating a specific topic (Between 0 and 499)
```
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-train 42
```
#### Validation:
```
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-eval
```
## Running
### tinybox_green
#### Steps to run benchmark
```
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_green/run_and_time.sh
```
### tinybox_red
#### Steps to run benchmark
```
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_red/run_and_time.sh
```
### tinybox_8xMI300X
#### Steps to run benchmark
```
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI300X/run_and_time.sh
```

View File

@ -0,0 +1,16 @@
#!/bin/bash
export PYTHONPATH="." NV=1
export MODEL="bert"
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
export BEAM=8 BEAM_UOPS_MAX=10000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1
export BEAM_LOG_SURPASS_MAX=1
export BASEDIR="/raid/datasets/wiki"
export BENCHMARK=10 BERT_LAYERS=2 DEBUG=2
python3 examples/mlperf/model_train.py

View File

@ -0,0 +1,15 @@
#!/bin/bash
export PYTHONPATH="." NV=1
export MODEL="bert"
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
export BEAM=8 BEAM_UOPS_MAX=10000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1
export BASEDIR="/raid/datasets/wiki"
export WANDB=1 PARALLEL=0
RUNMLPERF=1 python3 examples/mlperf/model_train.py

View File

@ -0,0 +1,27 @@
#!/bin/bash
set -e # Exit on any error
set -o pipefail # Make pipeline fail if any command fails
export PYTHONPATH="." NV=1
export MODEL="bert"
export SUBMISSION_PLATFORM="tinybox_green"
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
export BEAM=8 BEAM_UOPS_MAX=10000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1
export BASEDIR="/raid/datasets/wiki"
# pip install -e ".[mlperf]"
export LOGMLPERF=1
export SEED=$RANDOM
DATETIME=$(date "+%m%d%H%M")
LOGFILE="bert_green_${DATETIME}_${SEED}.log"
# init
BENCHMARK=10 INITMLPERF=1 BERT_LAYERS=2 python3 examples/mlperf/model_train.py | tee $LOGFILE
# run
PARALLEL=0 RUNMLPERF=1 python3 examples/mlperf/model_train.py | tee -a $LOGFILE

View File

@ -0,0 +1,69 @@
# 1. Problem
This problem uses BERT for NLP.
## Requirements
Install tinygrad and mlperf-logging (uncomment mlperf from setup.py) from branch mlperf_training_v5.0.
```
git clone https://github.com/tinygrad/tinygrad.git
python3 -m pip install -e ".[mlperf]"
```
Also install gdown (for dataset), numpy, tqdm and tensorflow.
```
pip install gdown numpy tqdm tensorflow
```
### tinybox_green
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
This is the default on production tinybox green.
# 2. Directions
## Steps to download and verify data
### 1. Download raw data
```
BASEDIR="/raid/datasets/wiki" WIKI_TRAIN=1 VERIFY_CHECKSUM=1 python3 extra/datasets/wikipedia_download.py
```
### 2. Preprocess train and validation data
Note: The number of threads used for preprocessing is limited by available memory. With 128GB of RAM, a maximum of 16 threads is recommended.
#### Training:
```
BASEDIR="/raid/datasets/wiki" NUM_WORKERS=16 python3 extra/datasets/wikipedia.py pre-train all
```
Generating a specific topic (Between 0 and 499)
```
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-train 42
```
#### Validation:
```
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-eval
```
## Running
### tinybox_green
#### Steps to run benchmark
```
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_green/run_and_time.sh
```
### tinybox_red
#### Steps to run benchmark
```
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_red/run_and_time.sh
```
### tinybox_8xMI300X
#### Steps to run benchmark
```
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI300X/run_and_time.sh
```

View File

@ -0,0 +1,17 @@
#!/bin/bash
export PYTHONPATH="." AMD=1
export MODEL="bert"
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
export BEAM=5 BEAM_UOPS_MAX=8000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1
export BEAM_LOG_SURPASS_MAX=1
export BASEDIR="/raid/datasets/wiki"
export RESET_STEP=1
export BENCHMARK=10 BERT_LAYERS=2 DEBUG=2
python3 examples/mlperf/model_train.py

View File

@ -0,0 +1,15 @@
#!/bin/bash
export PYTHONPATH="." AMD=1
export MODEL="bert"
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
export BEAM=5 BEAM_UOPS_MAX=8000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1
export BASEDIR="/raid/datasets/wiki"
export WANDB=1 PARALLEL=0
RUNMLPERF=1 python3 examples/mlperf/model_train.py

View File

@ -0,0 +1,32 @@
#!/bin/bash
set -e # Exit on any error
set -o pipefail # Make pipeline fail if any command fails
export PYTHONPATH="." AMD=1
export MODEL="bert"
export SUBMISSION_PLATFORM="tinybox_red"
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
export BEAM=5 BEAM_UOPS_MAX=8000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
export IGNORE_JIT_FIRST_BEAM=1
export BASEDIR="/raid/datasets/wiki"
# pip install -e ".[mlperf]"
export LOGMLPERF=1
export SEED=$RANDOM
DATETIME=$(date "+%m%d%H%M")
LOGFILE="bert_red_${DATETIME}_${SEED}.log"
export HCQDEV_WAIT_TIMEOUT_MS=100000 # prevents hang?
# init
sleep 5 && sudo rmmod amdgpu || true
BENCHMARK=10 INITMLPERF=1 BERT_LAYERS=2 python3 examples/mlperf/model_train.py | tee $LOGFILE
# run
# TODO: AM driver resulted in nan
sudo modprobe amdgpu
PARALLEL=0 RUNMLPERF=1 python3 examples/mlperf/model_train.py | tee -a $LOGFILE

View File

@ -0,0 +1,50 @@
# 1. Problem
This problem uses the ResNet-50 CNN to do image classification.
## Requirements
Install tinygrad and mlperf-logging from master.
```
git clone https://github.com/tinygrad/tinygrad.git
python3 -m pip install -e ".[mlperf]"
```
### tinybox_green
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
This is the default on production tinybox green.
### tinybox_red
Disable cwsr
This is the default on production tinybox red.
```
sudo vi /etc/modprobe.d/amdgpu.conf
cat <<EOF > /etc/modprobe.d/amdgpu.conf
options amdgpu cwsr_enable=0
EOF
sudo update-initramfs -u
sudo reboot
# validate
sudo cat /sys/module/amdgpu/parameters/cwsr_enable #= 0
```
# 2. Directions
## Steps to download and verify data
```
IMGNET_TRAIN=1 python3 extra/datasets/imagenet_download.py
```
## Steps for one time setup
### tinybox_red
```
examples/mlperf/training_submission_v4.0/tinycorp/benchmarks/resnet/implementations/tinybox_red/setup.sh
```
## Steps to run benchmark
```
examples/mlperf/training_submission_v4.0/tinycorp/benchmarks/resnet/implementations/tinybox_red/run_and_time.sh
```

View File

@ -0,0 +1,13 @@
#!/bin/bash
export PYTHONPATH="." NV=1
export MODEL="resnet"
export DEFAULT_FLOAT="HALF" GPUS=6 BS=1536 EVAL_BS=192
export RESET_STEP=0
export TRAIN_BEAM=4 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=10 BEAM_PADTO=0
export BENCHMARK=10 DEBUG=2
python3 examples/mlperf/model_train.py

View File

@ -0,0 +1,15 @@
#!/bin/bash
export PYTHONPATH="." NV=1
export MODEL="resnet"
export DEFAULT_FLOAT="HALF" GPUS=6 BS=1536 EVAL_BS=192
export RESET_STEP=0
export TRAIN_BEAM=4 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=10 BEAM_PADTO=0
export EVAL_START_EPOCH=3 EVAL_FREQ=4
export WANDB=1 PARALLEL=0
python3 examples/mlperf/model_train.py

View File

@ -0,0 +1,25 @@
#!/bin/bash
set -e # Exit on any error
set -o pipefail # Make pipeline fail if any command fails
export PYTHONPATH="." NV=1
export MODEL="resnet"
export SUBMISSION_PLATFORM="tinybox_green"
export DEFAULT_FLOAT="HALF" GPUS=6 BS=1536 EVAL_BS=192
export RESET_STEP=0
export TRAIN_BEAM=4 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=10 BEAM_PADTO=0
# pip install -e ".[mlperf]"
export LOGMLPERF=${LOGMLPERF:-1}
export SEED=$RANDOM
DATETIME=$(date "+%m%d%H%M")
LOGFILE="resnet_green_${DATETIME}_${SEED}.log"
# init
BENCHMARK=10 INITMLPERF=1 python3 examples/mlperf/model_train.py | tee $LOGFILE
# run
PARALLEL=0 RUNMLPERF=1 EVAL_START_EPOCH=3 EVAL_FREQ=4 python3 examples/mlperf/model_train.py | tee -a $LOGFILE

View File

@ -0,0 +1,50 @@
# 1. Problem
This problem uses the ResNet-50 CNN to do image classification.
## Requirements
Install tinygrad and mlperf-logging from master.
```
git clone https://github.com/tinygrad/tinygrad.git
python3 -m pip install -e ".[mlperf]"
```
### tinybox_green
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
This is the default on production tinybox green.
### tinybox_red
Disable cwsr
This is the default on production tinybox red.
```
sudo vi /etc/modprobe.d/amdgpu.conf
cat <<EOF > /etc/modprobe.d/amdgpu.conf
options amdgpu cwsr_enable=0
EOF
sudo update-initramfs -u
sudo reboot
# validate
sudo cat /sys/module/amdgpu/parameters/cwsr_enable #= 0
```
# 2. Directions
## Steps to download and verify data
```
IMGNET_TRAIN=1 python3 extra/datasets/imagenet_download.py
```
## Steps for one time setup
### tinybox_red
```
examples/mlperf/training_submission_v4.0/tinycorp/benchmarks/resnet/implementations/tinybox_red/setup.sh
```
## Steps to run benchmark
```
examples/mlperf/training_submission_v4.0/tinycorp/benchmarks/resnet/implementations/tinybox_red/run_and_time.sh
```

Some files were not shown because too many files have changed in this diff Show More