KerryGold Model, AGNOS12.4, AdjustLaneChange, EnglighSound (#182)
* Vegetarian Filet o Fish model * fix.. atc.. * test cluster_speed_limit * fix.. cluster_speed_limit.. 2 * fix.. clusterspeedlimit3 * cruise speed to roadlimit speed * fix.. * fix.. eng * deltaUp/Down for lanechange * fix.. atc desire... * fix.. * ff * ff * fix.. * fix.. eng * fix engsound * Update desire_helper.py * fix.. connect... * fix curve_min speed * Revert "fix curve_min speed" This reverts commit fcc9c2eb14eb3504abef3e420db93e8882e56f37. * Reapply "fix curve_min speed" This reverts commit 2d2bba476c58a7b4e13bac3c3ad0e4694c95515d. * fix.. auto speed up.. roadlimit * fix.. atc auto lanechange... * Update desire_helper.py * Update cruise.py * debug atc... * fix.. waze alert offset.. * fix.. * test atc.. * fix.. * fix.. atc * atc test.. * fix.. atc * fix.. atc2 * fix.. atc3 * KerryGold Model. latsmooth_sec = 0.0 * lat smooth seconds 0.13 * fix comment * fix.. auto cruise, and speed unit * change lanemode switching. * erase mazda lkas button.
This commit is contained in:
parent
efee1712aa
commit
9c7833faf9
@ -236,7 +236,6 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
|
||||
{"HapticFeedbackWhenSpeedCamera", PERSISTENT},
|
||||
{"UseLaneLineSpeed", PERSISTENT},
|
||||
{"UseLaneLineCurveSpeed", PERSISTENT},
|
||||
{"UseLaneLineSpeedApply", PERSISTENT},
|
||||
{"AdjustLaneOffset", PERSISTENT},
|
||||
{"LaneChangeNeedTorque", PERSISTENT},
|
||||
{"LaneChangeDelay", PERSISTENT },
|
||||
@ -261,6 +260,8 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
|
||||
{"CustomSteerMax", PERSISTENT},
|
||||
{"CustomSteerDeltaUp", PERSISTENT},
|
||||
{"CustomSteerDeltaDown", PERSISTENT},
|
||||
{"CustomSteerDeltaUpLC", PERSISTENT},
|
||||
{"CustomSteerDeltaDownLC", PERSISTENT},
|
||||
{"SpeedFromPCM", PERSISTENT},
|
||||
{"MaxTimeOffroadMin", PERSISTENT},
|
||||
{"DisableDM", PERSISTENT},
|
||||
|
@ -7,7 +7,7 @@ export OPENBLAS_NUM_THREADS=1
|
||||
export VECLIB_MAXIMUM_THREADS=1
|
||||
|
||||
if [ -z "$AGNOS_VERSION" ]; then
|
||||
export AGNOS_VERSION="12.3"
|
||||
export AGNOS_VERSION="12.4"
|
||||
fi
|
||||
|
||||
export STAGING_ROOT="/data/safe_staging"
|
||||
|
@ -246,6 +246,7 @@ struct CarState {
|
||||
speedLimitDistance @65 :Float32;
|
||||
gearStep @66 :Int16;
|
||||
tpms @67 : Tpms;
|
||||
useLaneLineSpeed @68 : Float32;
|
||||
|
||||
struct Tpms {
|
||||
fl @0 :Float32;
|
||||
|
@ -96,6 +96,9 @@ class CarController(CarControllerBase):
|
||||
self.activeCarrot = 0
|
||||
self.camera_scc_params = Params().get_int("HyundaiCameraSCC")
|
||||
|
||||
self.steerDeltaUpOrg = self.steerDeltaUp = self.steerDeltaUpLC = self.params.STEER_DELTA_UP
|
||||
self.steerDeltaDownOrg = self.steerDeltaDown = self.steerDeltaDownLC = self.params.STEER_DELTA_DOWN
|
||||
|
||||
def update(self, CC, CS, now_nanos):
|
||||
|
||||
if self.frame % 50 == 0:
|
||||
@ -104,14 +107,30 @@ class CarController(CarControllerBase):
|
||||
steerMax = params.get_int("CustomSteerMax")
|
||||
steerDeltaUp = params.get_int("CustomSteerDeltaUp")
|
||||
steerDeltaDown = params.get_int("CustomSteerDeltaDown")
|
||||
steerDeltaUpLC = params.get_int("CustomSteerDeltaUpLC")
|
||||
steerDeltaDownLC = params.get_int("CustomSteerDeltaDownLC")
|
||||
if steerMax > 0:
|
||||
self.params.STEER_MAX = steerMax
|
||||
if steerDeltaUp > 0:
|
||||
self.params.STEER_DELTA_UP = steerDeltaUp
|
||||
self.steerDeltaUp = steerDeltaUp
|
||||
#self.params.ANGLE_TORQUE_UP_RATE = steerDeltaUp
|
||||
else:
|
||||
self.steerDeltaUp = self.steerDeltaUpOrg
|
||||
if steerDeltaDown > 0:
|
||||
self.params.STEER_DELTA_DOWN = steerDeltaDown
|
||||
self.steerDeltaDown = steerDeltaDown
|
||||
#self.params.ANGLE_TORQUE_DOWN_RATE = steerDeltaDown
|
||||
else:
|
||||
self.steerDeltaDown = self.steerDeltaDownOrg
|
||||
|
||||
if steerDeltaUpLC > 0:
|
||||
self.steerDeltaUpLC = steerDeltaUpLC
|
||||
else:
|
||||
self.steerDeltaUpLC = self.steerDeltaUp
|
||||
if steerDeltaDownLC > 0:
|
||||
self.steerDeltaDownLC = steerDeltaDownLC
|
||||
else:
|
||||
self.steerDeltaDownLC = self.steerDeltaDown
|
||||
|
||||
self.soft_hold_mode = 1 if params.get_int("AutoCruiseControl") > 1 else 2
|
||||
self.hapticFeedbackWhenSpeedCamera = int(params.get_int("HapticFeedbackWhenSpeedCamera"))
|
||||
|
||||
@ -125,6 +144,13 @@ class CarController(CarControllerBase):
|
||||
|
||||
actuators = CC.actuators
|
||||
hud_control = CC.hudControl
|
||||
|
||||
if hud_control.modelDesire in [3,4]:
|
||||
self.params.STEER_DELTA_UP = self.steerDeltaUpLC
|
||||
self.params.STEER_DELTA_DOWN = self.steerDeltaDownLC
|
||||
else:
|
||||
self.params.STEER_DELTA_UP = self.steerDeltaUp
|
||||
self.params.STEER_DELTA_DOWN = self.steerDeltaDown
|
||||
|
||||
angle_control = self.CP.flags & HyundaiFlags.ANGLE_CONTROL
|
||||
|
||||
|
@ -76,6 +76,7 @@ class CarState(CarStateBase):
|
||||
|
||||
self.cruise_buttons_msg = None
|
||||
self.hda2_lfa_block_msg = None
|
||||
self.cluster_speed_limit_msg = None
|
||||
|
||||
# On some cars, CLU15->CF_Clu_VehicleSpeed can oscillate faster than the dash updates. Sample at 5 Hz
|
||||
self.cluster_speed = 0
|
||||
@ -461,6 +462,9 @@ class CarState(CarStateBase):
|
||||
if "TCS" in cp.vl:
|
||||
self.tcs_info_373 = copy.copy(cp.vl.get("TCS", {}))
|
||||
|
||||
if "CLUSTER_SPEED_LIMIT" in cp.vl:
|
||||
self.cluster_speed_limit_msg = copy.copy(cp.vl.get("CLUSTER_SPEED_LIMIT", {}))
|
||||
|
||||
if "GEAR" in cp.vl:
|
||||
ret.gearStep = cp.vl["GEAR"]["GEAR_STEP"]
|
||||
elif "GEAR_ALT" in cp.vl:
|
||||
@ -596,6 +600,8 @@ class CarState(CarStateBase):
|
||||
# 어떤차는 bus2에 있음, 내차는 bus0에 있는데.... 이건 옆두부와 관련이 없나?
|
||||
#if CP.flags & HyundaiFlags.CANFD_HDA2:
|
||||
# pt_messages.append(("CLUSTER_SPEED_LIMIT", 10))
|
||||
if Params().get_int("CanfdDebug") > 0:
|
||||
pt_messages.append(("CLUSTER_SPEED_LIMIT", 10))
|
||||
|
||||
cam_messages = []
|
||||
if CP.flags & HyundaiFlags.CANFD_HDA2 and not (CP.flags & HyundaiFlags.CAMERA_SCC.value):
|
||||
|
@ -598,8 +598,13 @@ def create_ccnc_messages(CP, packer, CAN, frame, CC, CS, hud_control, disp_angle
|
||||
# ADAS 콤마연결하면.. 0번에서.. (카메라혹은 다른곳에서)
|
||||
# 카메라 콤마연결+롱컨개조 하면.. 2번에서 데이터가 나옴..(카메라혹은 ADAS)
|
||||
if frame % 10 == 0:
|
||||
|
||||
pass
|
||||
if CS.cluster_speed_limit_msg is not None:
|
||||
values = CS.cluster_speed_limit_msg
|
||||
values["SPEED_LIMIT_1"] = 100
|
||||
values["SPEED_LIMIT_2"] = 100
|
||||
values["SPEED_LIMIT_3"] = 105
|
||||
#values["COUNTER"] = (values["COUNTER"] + 1) % 256
|
||||
ret.append(packer.make_can_msg("CLUSTER_SPEED_LIMIT", CAN.CAM, values))
|
||||
|
||||
return ret
|
||||
|
||||
|
@ -141,7 +141,7 @@ class CarState(CarStateBase):
|
||||
ret.buttonEvents = [
|
||||
*create_button_events(self.cruise_buttons, self.prev_cruise_buttons, BUTTONS_DICT),
|
||||
*create_button_events(self.distance_button, self.prev_distance_button, {1: ButtonType.gapAdjustCruise}),
|
||||
*create_button_events(self.lkas_enabled, self.lkas_previously_enabled, {1: ButtonType.lfaButton}),
|
||||
#*create_button_events(self.lkas_enabled, self.lkas_previously_enabled, {1: ButtonType.lfaButton}),
|
||||
]
|
||||
return ret
|
||||
|
||||
|
@ -81,7 +81,7 @@ const CanMsg HYUNDAI_CANFD_HDA2_LONG_TX_MSGS[] = {
|
||||
|
||||
{203, 0, 24}, // CB
|
||||
{373, 2, 24}, // TCS(0x175)
|
||||
//{506, 2, 32}, // CLUSTER_SPEED_LIMIT
|
||||
{506, 2, 32}, // CLUSTER_SPEED_LIMIT
|
||||
{234, 2, 24}, // MDPS
|
||||
{687, 2, 8}, // STEER_TOUCH_2AF
|
||||
};
|
||||
|
BIN
selfdrive/assets/sounds_eng/Wazealert.wav
Normal file
BIN
selfdrive/assets/sounds_eng/Wazealert.wav
Normal file
Binary file not shown.
BIN
selfdrive/assets/sounds_eng/Wazealert2.wav
Normal file
BIN
selfdrive/assets/sounds_eng/Wazealert2.wav
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -219,6 +219,7 @@ class Car:
|
||||
CS.softHoldActive = self.v_cruise_helper._soft_hold_active
|
||||
CS.activateCruise = self.v_cruise_helper._activate_cruise
|
||||
CS.latEnabled = self.v_cruise_helper._lat_enabled
|
||||
CS.useLaneLineSpeed = self.v_cruise_helper.useLaneLineSpeedApply
|
||||
|
||||
self.CI.CS.softHoldActive = CS.softHoldActive
|
||||
return CS, RD
|
||||
|
@ -218,7 +218,7 @@ class VCruiseCarrot:
|
||||
self.AutoSpeedUptoRoadSpeedLimit = 0.0
|
||||
|
||||
self.useLaneLineSpeed = self.params.get_int("UseLaneLineSpeed")
|
||||
self.params.put_int("UseLaneLineSpeedApply", self.useLaneLineSpeed)
|
||||
self.useLaneLineSpeedApply = self.useLaneLineSpeed
|
||||
|
||||
|
||||
@property
|
||||
@ -237,16 +237,19 @@ class VCruiseCarrot:
|
||||
self._log_timer = self._log_timeout
|
||||
|
||||
def update_params(self, is_metric):
|
||||
unit_factor = 1.0 if is_metric else CV.MPH_TO_KPH
|
||||
if self.frame % 10 == 0:
|
||||
self.autoCruiseControl = self.params.get_int("AutoCruiseControl")
|
||||
self.autoGasTokSpeed = self.params.get_int("AutoGasTokSpeed")
|
||||
self.autoGasSyncSpeed = self.params.get_bool("AutoGasSyncSpeed")
|
||||
self.autoCruiseControl = self.params.get_int("AutoCruiseControl") * unit_factor
|
||||
self.autoGasTokSpeed = self.params.get_int("AutoGasTokSpeed") * unit_factor
|
||||
self.autoGasSyncSpeed = self.params.get_bool("AutoGasSyncSpeed") * unit_factor
|
||||
self.autoSpeedUptoRoadSpeedLimit = self.params.get_float("AutoSpeedUptoRoadSpeedLimit") * 0.01
|
||||
self.autoRoadSpeedAdjust = self.params.get_float("AutoRoadSpeedAdjust") * 0.01
|
||||
useLaneLineSpeed = self.params.get_int("UseLaneLineSpeed")
|
||||
|
||||
useLaneLineSpeed = self.params.get_int("UseLaneLineSpeed") * unit_factor
|
||||
if self.useLaneLineSpeed != useLaneLineSpeed:
|
||||
self.params.put_int_nonblocking("UseLaneLineSpeedApply", useLaneLineSpeed)
|
||||
self.useLaneLineSpeedApply = useLaneLineSpeed
|
||||
self.useLaneLineSpeed = useLaneLineSpeed
|
||||
|
||||
self.speed_from_pcm = self.params.get_int("SpeedFromPCM")
|
||||
self._cruise_speed_unit = self.params.get_int("CruiseSpeedUnit")
|
||||
self._paddle_mode = self.params.get_int("PaddleMode")
|
||||
@ -255,7 +258,6 @@ class VCruiseCarrot:
|
||||
self.autoRoadSpeedLimitOffset = self.params.get_int("AutoRoadSpeedLimitOffset")
|
||||
self.autoNaviSpeedSafetyFactor = self.params.get_float("AutoNaviSpeedSafetyFactor") * 0.01
|
||||
self.cruiseOnDist = self.params.get_float("CruiseOnDist") * 0.01
|
||||
unit_factor = 1.0 if is_metric else CV.MPH_TO_KPH
|
||||
cruiseSpeed1 = self.params.get_float("CruiseSpeed1") * unit_factor
|
||||
cruiseSpeed2 = self.params.get_float("CruiseSpeed2") * unit_factor
|
||||
cruiseSpeed3 = self.params.get_float("CruiseSpeed3") * unit_factor
|
||||
@ -552,7 +554,7 @@ class VCruiseCarrot:
|
||||
self.params.put_int_nonblocking("MyDrivingMode", self.params.get_int("MyDrivingMode") % 4 + 1) # 1,2,3,4 (1:eco, 2:safe, 3:normal, 4:high speed)
|
||||
elif button_type == ButtonType.lfaButton:
|
||||
useLaneLineSpeed = max(1, self.useLaneLineSpeed)
|
||||
self.params.put_int_nonblocking("UseLaneLineSpeedApply", useLaneLineSpeed if self.params.get_int("UseLaneLineSpeedApply") == 0 else 0)
|
||||
self.useLaneLineSpeedApply = useLaneLineSpeed if self.useLaneLineSpeedApply == 0 else 0
|
||||
|
||||
elif button_type == ButtonType.cancel:
|
||||
self._cruise_cancel_state = True
|
||||
@ -594,15 +596,20 @@ class VCruiseCarrot:
|
||||
return v_cruise_kph
|
||||
|
||||
def _auto_speed_up(self, v_cruise_kph):
|
||||
if self._pause_auto_speed_up:
|
||||
return v_cruise_kph
|
||||
#if self._pause_auto_speed_up:
|
||||
# return v_cruise_kph
|
||||
|
||||
road_limit_kph = self.nRoadLimitSpeed * self.autoSpeedUptoRoadSpeedLimit
|
||||
if road_limit_kph < 1.0:
|
||||
return v_cruise_kph
|
||||
|
||||
if self.v_lead_kph + 5 > v_cruise_kph and v_cruise_kph < road_limit_kph and self.d_rel < 60:
|
||||
if not self._pause_auto_speed_up and self.v_lead_kph + 5 > v_cruise_kph and v_cruise_kph < road_limit_kph and self.d_rel < 60:
|
||||
v_cruise_kph = min(v_cruise_kph + 5, road_limit_kph)
|
||||
elif self.autoRoadSpeedAdjust < 0 and self.nRoadLimitSpeed != self.nRoadLimitSpeed_last: # 도로제한속도가 바뀌면, 바뀐속도로 속도를 바꿈.
|
||||
if self.autoRoadSpeedLimitOffset < 0:
|
||||
v_cruise_kph = self.nRoadLimitSpeed * self.autoNaviSpeedSafetyFactor
|
||||
else:
|
||||
v_cruise_kph = self.nRoadLimitSpeed + self.autoRoadSpeedLimitOffset
|
||||
elif self.nRoadLimitSpeed < self.nRoadLimitSpeed_last and self.autoRoadSpeedAdjust > 0:
|
||||
new_road_limit_kph = self.nRoadLimitSpeed * self.autoRoadSpeedAdjust + v_cruise_kph * (1 - self.autoRoadSpeedAdjust)
|
||||
self._add_log(f"AutoSpeed change {v_cruise_kph} -> {new_road_limit_kph}")
|
||||
@ -681,11 +688,11 @@ class VCruiseCarrot:
|
||||
elif self.xState == 3:
|
||||
v_cruise_kph = self.v_ego_kph_set
|
||||
self._cruise_control(-1, 3, "Cruise off (traffic sign)")
|
||||
elif self.v_ego_kph_set >= 30 and not CC.enabled:
|
||||
elif self.v_ego_kph_set >= self.autoGasTokSpeed and not CC.enabled:
|
||||
v_cruise_kph = self.v_ego_kph_set
|
||||
self._cruise_control(1, -1 if self.aTarget > 0.0 else 0, "Cruise on (gas pressed)")
|
||||
elif self._brake_pressed_count == -1 and self._soft_hold_active == 0:
|
||||
if self.v_ego_kph_set > 40:
|
||||
if self.v_ego_kph_set > self.autoGasTokSpeed:
|
||||
v_cruise_kph = self.v_ego_kph_set
|
||||
self._cruise_control(1, -1 if self.aTarget > 0.0 else 0, "Cruise on (speed)")
|
||||
elif abs(CS.steeringAngleDeg) < 20:
|
||||
|
@ -1561,7 +1561,9 @@ class CarrotServ:
|
||||
xSpdType = 100
|
||||
|
||||
if xSpdType >= 0:
|
||||
self.xSpdLimit = self.nRoadLimitSpeed
|
||||
offset = 5 if self.is_metric else 5 * CV.MPH_TO_KPH
|
||||
self.xSpdLimit = self.nRoadLimitSpeed + offset
|
||||
|
||||
self.xSpdDist = distance
|
||||
self.xSpdType =xSpdType
|
||||
|
||||
@ -1685,11 +1687,12 @@ class CarrotServ:
|
||||
if self.turnSpeedControlMode in [1,2]:
|
||||
speed_n_sources.append((max(abs(vturn_speed), self.autoCurveSpeedLowerLimit), "vturn"))
|
||||
|
||||
route_speed = max(route_speed * self.mapTurnSpeedFactor, self.autoCurveSpeedLowerLimit)
|
||||
if self.turnSpeedControlMode == 2:
|
||||
if 0 < self.xDistToTurn < 300:
|
||||
speed_n_sources.append((route_speed * self.mapTurnSpeedFactor, "route"))
|
||||
speed_n_sources.append((route_speed, "route"))
|
||||
elif self.turnSpeedControlMode == 3:
|
||||
speed_n_sources.append((route_speed * self.mapTurnSpeedFactor, "route"))
|
||||
speed_n_sources.append((route_speed, "route"))
|
||||
#speed_n_sources.append((self.calculate_current_speed(dist, speed * self.mapTurnSpeedFactor, 0, 1.2), "route"))
|
||||
|
||||
desired_speed, source = min(speed_n_sources, key=lambda x: x[0])
|
||||
|
@ -235,6 +235,32 @@
|
||||
"default": 0,
|
||||
"unit": 1
|
||||
},
|
||||
{
|
||||
"group": "조향튜닝",
|
||||
"name": "CustomSteerDeltaUpLC",
|
||||
"title": "_CustomSteerDeltaUpLC(0)",
|
||||
"descr": "차선변경시 적용, 토크조향",
|
||||
"egroup": "LAT",
|
||||
"etitle": "_CustomSteerDeltaUpLC(0)",
|
||||
"edescr": "for LaneChange, torque steer only",
|
||||
"min": 0,
|
||||
"max": 50,
|
||||
"default": 0,
|
||||
"unit": 1
|
||||
},
|
||||
{
|
||||
"group": "조향튜닝",
|
||||
"name": "CustomSteerDeltaDownLC",
|
||||
"title": "_CustomSteerDeltaDownLC(0)",
|
||||
"descr": "차선변경시 적용, 토크조향",
|
||||
"egroup": "LAT",
|
||||
"etitle": "_CustomSteerDeltaDownLC(0)",
|
||||
"edescr": "for LaneChange, torque steer only",
|
||||
"min": 0,
|
||||
"max": 50,
|
||||
"default": 0,
|
||||
"unit": 1
|
||||
},
|
||||
{
|
||||
"group": "조향튜닝",
|
||||
"name": "SteerActuatorDelay",
|
||||
@ -736,7 +762,7 @@
|
||||
"descr": "1:SOFTHOLD, Auto Cruise, 2:SoftHold오류시",
|
||||
"egroup": "START",
|
||||
"etitle": "Auto Cruise control(HKG only)",
|
||||
"edescr": "Softhold, Auto Cruise ON/OFF control, 2:if softhold error",
|
||||
"edescr": "1:Softhold, Auto Cruise ON/OFF control, 2:if softhold error",
|
||||
"min": 0,
|
||||
"max": 3,
|
||||
"default": 0,
|
||||
@ -915,11 +941,11 @@
|
||||
"group": "감속제어",
|
||||
"name": "AutoRoadSpeedAdjust",
|
||||
"title": "자동도로제한속도감속 (50)%",
|
||||
"descr": "100: 새로운속도, 50: 중간값, 0: 기존속도유지",
|
||||
"descr": "-1: 도로제한속도로 항상, 100: 새로운속도, 50: 중간값, 0: 기존속도유지",
|
||||
"egroup": "CRUISE",
|
||||
"etitle": "AutoRoadLimitSpeedAdjust (50)%",
|
||||
"edescr": "100: new road speed, 50: median, 0: not change",
|
||||
"min": 0,
|
||||
"edescr": "-1: set roadlimitspeed, 100: new road speed, 50: median, 0: not change",
|
||||
"min": -1,
|
||||
"max": 100,
|
||||
"default": 0,
|
||||
"unit": 10
|
||||
|
@ -132,8 +132,7 @@ class Controls:
|
||||
# Steering PID loop and lateral MPC
|
||||
lat_plan = self.sm['lateralPlan']
|
||||
curve_speed_abs = abs(self.sm['carrotMan'].vTurnSpeed)
|
||||
self.lanefull_mode_enabled = (lat_plan.useLaneLines and self.params.get_int("UseLaneLineSpeedApply") > 0 and
|
||||
curve_speed_abs > self.params.get_int("UseLaneLineCurveSpeed"))
|
||||
self.lanefull_mode_enabled = (lat_plan.useLaneLines and curve_speed_abs > self.params.get_int("UseLaneLineCurveSpeed"))
|
||||
lat_smooth_seconds = LAT_SMOOTH_SECONDS #self.params.get_float("SteerSmoothSec") * 0.01
|
||||
steer_actuator_delay = self.params.get_float("SteerActuatorDelay") * 0.01
|
||||
mpc_output_offset = self.params.get_float("LatMpcOutputOffset") * 0.01 # 0.05
|
||||
|
@ -4,6 +4,7 @@ from openpilot.common.realtime import DT_MDL
|
||||
import numpy as np
|
||||
from openpilot.selfdrive.modeld.constants import ModelConstants
|
||||
from openpilot.common.params import Params
|
||||
from collections import deque
|
||||
|
||||
LaneChangeState = log.LaneChangeState
|
||||
LaneChangeDirection = log.LaneChangeDirection
|
||||
@ -106,6 +107,8 @@ class DesireHelper:
|
||||
self.desireLog = ""
|
||||
self.lane_width_left = 0
|
||||
self.lane_width_right = 0
|
||||
self.lane_width_left_diff = 0
|
||||
self.lane_width_right_diff = 0
|
||||
self.distance_to_road_edge_left = 0
|
||||
self.distance_to_road_edge_right = 0
|
||||
self.distance_to_road_edge_left_far = 0
|
||||
@ -122,6 +125,8 @@ class DesireHelper:
|
||||
self.available_right_lane = False
|
||||
self.available_left_edge = False
|
||||
self.available_right_edge = False
|
||||
self.lane_width_left_queue = deque(maxlen=int(1.0/DT_MDL))
|
||||
self.lane_width_right_queue = deque(maxlen=int(1.0/DT_MDL))
|
||||
|
||||
self.lane_available_last = False
|
||||
self.edge_available_last = False
|
||||
@ -141,15 +146,24 @@ class DesireHelper:
|
||||
self.turn_desire_state = False
|
||||
self.desire_disable_count = 0
|
||||
self.blindspot_detected_counter = 0
|
||||
self.auto_lane_change_enable = False
|
||||
|
||||
def check_lane_state(self, modeldata):
|
||||
self.lane_width_left, self.distance_to_road_edge_left, self.distance_to_road_edge_left_far, lane_prob_left = calculate_lane_width(modeldata.laneLines[0], modeldata.laneLineProbs[0],
|
||||
lane_width_left, self.distance_to_road_edge_left, self.distance_to_road_edge_left_far, lane_prob_left = calculate_lane_width(modeldata.laneLines[0], modeldata.laneLineProbs[0],
|
||||
modeldata.laneLines[1], modeldata.roadEdges[0])
|
||||
self.lane_width_right, self.distance_to_road_edge_right, self.distance_to_road_edge_right_far, lane_prob_right = calculate_lane_width(modeldata.laneLines[3], modeldata.laneLineProbs[3],
|
||||
lane_width_right, self.distance_to_road_edge_right, self.distance_to_road_edge_right_far, lane_prob_right = calculate_lane_width(modeldata.laneLines[3], modeldata.laneLineProbs[3],
|
||||
modeldata.laneLines[2], modeldata.roadEdges[1])
|
||||
self.lane_exist_left_count.update(lane_prob_left)
|
||||
self.lane_exist_right_count.update(lane_prob_right)
|
||||
min_lane_width = 2.8
|
||||
|
||||
self.lane_width_left_queue.append(lane_width_left)
|
||||
self.lane_width_right_queue.append(lane_width_right)
|
||||
self.lane_width_left = np.mean(self.lane_width_left_queue)
|
||||
self.lane_width_right = np.mean(self.lane_width_right_queue)
|
||||
self.lane_width_left_diff = self.lane_width_left_queue[-1] - self.lane_width_left_queue[0]
|
||||
self.lane_width_right_diff = self.lane_width_right_queue[-1] - self.lane_width_right_queue[0]
|
||||
|
||||
min_lane_width = 2.0
|
||||
self.lane_width_left_count.update(self.lane_width_left > min_lane_width)
|
||||
self.lane_width_right_count.update(self.lane_width_right > min_lane_width)
|
||||
self.road_edge_left_count.update(self.distance_to_road_edge_left > min_lane_width)
|
||||
@ -183,6 +197,10 @@ class DesireHelper:
|
||||
v_ego = carstate.vEgo
|
||||
below_lane_change_speed = v_ego < LANE_CHANGE_SPEED_MIN
|
||||
|
||||
##### check lane state
|
||||
self.check_lane_state(modeldata)
|
||||
self.check_desire_state(modeldata)
|
||||
|
||||
#### check driver's blinker state
|
||||
driver_blinker_state = carstate.leftBlinker * 1 + carstate.rightBlinker * 2
|
||||
driver_blinker_changed = driver_blinker_state != self.driver_blinker_state
|
||||
@ -216,7 +234,7 @@ class DesireHelper:
|
||||
elif atc_type in ["fork left", "fork right", "atc left", "atc right"]:
|
||||
if self.atc_active != 2:
|
||||
below_lane_change_speed = False
|
||||
atc_blinker_state = BLINKER_LEFT if atc_type in ["fork left", "atc left"] else BLINKER_RIGHT
|
||||
atc_blinker_state = BLINKER_LEFT if atc_type in ["fork left", "atc left"] else BLINKER_RIGHT
|
||||
self.atc_active = 1
|
||||
else:
|
||||
self.atc_active = 0
|
||||
@ -240,10 +258,6 @@ class DesireHelper:
|
||||
desire_enabled = driver_desire_enabled or atc_desire_enabled
|
||||
blinker_state = driver_blinker_state if driver_desire_enabled else atc_blinker_state
|
||||
|
||||
##### check lane state
|
||||
self.check_lane_state(modeldata)
|
||||
self.check_desire_state(modeldata)
|
||||
|
||||
if desire_enabled:
|
||||
lane_available = self.available_left_lane if blinker_state == BLINKER_LEFT else self.available_right_lane
|
||||
edge_available = self.available_left_edge if blinker_state == BLINKER_LEFT else self.available_right_edge
|
||||
@ -260,16 +274,27 @@ class DesireHelper:
|
||||
lane_appeared = False
|
||||
self.object_detected_count = 0
|
||||
|
||||
lane_availabled = not self.lane_available_last and lane_available
|
||||
#lane_available_trigger = not self.lane_available_last and lane_available
|
||||
lane_change_available = lane_available or edge_available
|
||||
lane_available_trigger = False
|
||||
lane_width_diff = self.lane_width_left_diff if atc_blinker_state == BLINKER_LEFT else self.lane_width_right_diff
|
||||
distance_to_road_edge = self.distance_to_road_edge_left if atc_blinker_state == BLINKER_LEFT else self.distance_to_road_edge_right
|
||||
lane_width_side = self.lane_width_left if atc_blinker_state == BLINKER_LEFT else self.lane_width_right
|
||||
if lane_width_diff > 0.5 and (lane_width_side < distance_to_road_edge):
|
||||
lane_available_trigger = True
|
||||
edge_availabled = not self.edge_available_last and edge_available
|
||||
side_object_detected = self.object_detected_count > -0.3 / DT_MDL
|
||||
lane_exist_counter = self.lane_exist_left_count.counter if blinker_state == BLINKER_LEFT else self.lane_exist_right_count.counter
|
||||
|
||||
|
||||
if self.carrot_lane_change_count > 0:
|
||||
auto_lane_change_blocked = False
|
||||
auto_lane_change_available = lane_available
|
||||
auto_lane_change_trigger = lane_change_available
|
||||
else:
|
||||
auto_lane_change_blocked = ((atc_blinker_state == BLINKER_LEFT) and (driver_blinker_state != BLINKER_LEFT))
|
||||
auto_lane_change_available = not auto_lane_change_blocked and (lane_availabled or edge_availabled or lane_appeared) and not side_object_detected
|
||||
#auto_lane_change_trigger = not auto_lane_change_blocked and edge_available and (lane_available_trigger or edge_availabled or lane_appeared) and not side_object_detected
|
||||
auto_lane_change_trigger = self.auto_lane_change_enable and not auto_lane_change_blocked and edge_available and (lane_available_trigger or lane_appeared) and not side_object_detected
|
||||
self.desireLog = f"L:{self.auto_lane_change_enable},{auto_lane_change_blocked},E:{lane_available},{edge_available},A:{lane_available_trigger},{lane_appeared},{lane_width_diff:.1f},{lane_width_side:.1f},{distance_to_road_edge:.1f}={auto_lane_change_trigger}"
|
||||
|
||||
if not lateral_active or self.lane_change_timer > LANE_CHANGE_TIME_MAX:
|
||||
#print("Desire canceled")
|
||||
@ -296,6 +321,11 @@ class DesireHelper:
|
||||
self.lane_change_ll_prob = 1.0
|
||||
self.lane_change_delay = self.laneChangeDelay
|
||||
|
||||
# 맨끝차선이 아니면(측면에 차선이 있으면), ATC 자동작동 안함.
|
||||
#self.auto_lane_change_enable = False if lane_exist_counter > 0 else True
|
||||
self.auto_lane_change_enable = False if lane_exist_counter > 0 or lane_change_available else True
|
||||
|
||||
|
||||
# LaneChangeState.preLaneChange
|
||||
elif self.lane_change_state == LaneChangeState.preLaneChange:
|
||||
# Set lane change direction
|
||||
@ -310,6 +340,9 @@ class DesireHelper:
|
||||
torque_applied = carstate.steeringPressed and torque_cond
|
||||
blindspot_detected = blindspot_cond
|
||||
|
||||
if not self.auto_lane_change_enable and not lane_available: #lane_exist_counter > int(0.2 / DT_MDL) and not lane_change_available:
|
||||
self.auto_lane_change_enable = True
|
||||
|
||||
if blindspot_detected and not ignore_bsd:
|
||||
self.blindspot_detected_counter = int(1.5 / DT_MDL)
|
||||
# BSD검출시.. 아래 두줄로 자동차선변경 해제함.. 위험해서 자동차선변경기능은 안하는걸로...
|
||||
@ -319,7 +352,7 @@ class DesireHelper:
|
||||
self.lane_change_state = LaneChangeState.off
|
||||
self.lane_change_direction = LaneChangeDirection.none
|
||||
else:
|
||||
if lane_available and self.lane_change_delay == 0:
|
||||
if lane_change_available and self.lane_change_delay == 0:
|
||||
if self.blindspot_detected_counter > 0 and not ignore_bsd: # BSD검출시
|
||||
if torque_applied and not block_lanechange_bsd:
|
||||
self.lane_change_state = LaneChangeState.laneChangeStarting
|
||||
@ -330,7 +363,7 @@ class DesireHelper:
|
||||
self.lane_change_state = LaneChangeState.laneChangeStarting
|
||||
# ATC작동인경우 차선이 나타나거나 차선이 생기면 차선변경 시작
|
||||
# lane_appeared: 차선이 생기는건 안함.. 위험.
|
||||
elif torque_applied or auto_lane_change_available:
|
||||
elif torque_applied or auto_lane_change_trigger:
|
||||
self.lane_change_state = LaneChangeState.laneChangeStarting
|
||||
|
||||
# LaneChangeState.laneChangeStarting
|
||||
@ -379,7 +412,7 @@ class DesireHelper:
|
||||
|
||||
#print(f"desire = {self.desire}")
|
||||
#self.desireLog = f"desire = {self.desire}"
|
||||
self.desireLog = f"rlane={self.distance_to_road_edge_right:.1f},{self.distance_to_road_edge_right_far:.1f}"
|
||||
#self.desireLog = f"rlane={self.distance_to_road_edge_right:.1f},{self.distance_to_road_edge_right_far:.1f}"
|
||||
|
||||
# Send keep pulse once per second during LaneChangeStart.preLaneChange
|
||||
if self.lane_change_state in (LaneChangeState.off, LaneChangeState.laneChangeStarting):
|
||||
|
@ -122,3 +122,13 @@ def get_accel_from_plan(speeds, accels, t_idxs, action_t=DT_MDL, vEgoStopping=0.
|
||||
should_stop = (v_target < vEgoStopping and
|
||||
v_target_1sec < vEgoStopping)
|
||||
return a_target, should_stop
|
||||
|
||||
def curv_from_psis(psi_target, psi_rate, vego, action_t):
|
||||
vego = np.clip(vego, MIN_SPEED, np.inf)
|
||||
curv_from_psi = psi_target / (vego * action_t)
|
||||
return 2*curv_from_psi - psi_rate / vego
|
||||
|
||||
def get_curvature_from_plan(yaws, yaw_rates, t_idxs, vego, action_t):
|
||||
psi_target = np.interp(action_t, t_idxs, yaws)
|
||||
psi_rate = yaw_rates[0]
|
||||
return curv_from_psis(psi_target, psi_rate, vego, action_t)
|
||||
|
@ -58,7 +58,7 @@ class LateralPlanner:
|
||||
self.lanelines_active = False
|
||||
self.lanelines_active_tmp = False
|
||||
|
||||
self.useLaneLineSpeedApply = self.params.get_int("UseLaneLineSpeedApply")
|
||||
self.useLaneLineSpeedApply = self.params.get_int("UseLaneLineSpeed")
|
||||
self.pathOffset = float(self.params.get_int("PathOffset")) * 0.01
|
||||
self.useLaneLineMode = False
|
||||
self.plan_a = np.zeros((TRAJECTORY_SIZE, ))
|
||||
@ -85,7 +85,7 @@ class LateralPlanner:
|
||||
self.readParams -= 1
|
||||
if self.readParams <= 0:
|
||||
self.readParams = 100
|
||||
self.useLaneLineSpeedApply = self.params.get_int("UseLaneLineSpeedApply")
|
||||
self.useLaneLineSpeedApply = sm['carState'].useLaneLineSpeed
|
||||
self.pathOffset = float(self.params.get_int("PathOffset")) * 0.01
|
||||
self.lateralPathCost = self.params.get_float("LatMpcPathCost") * 0.01
|
||||
self.lateralMotionCost = self.params.get_float("LatMpcMotionCost") * 0.01
|
||||
|
Binary file not shown.
@ -4,6 +4,11 @@
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include <QJsonDocument>
|
||||
#include <QJsonObject>
|
||||
#include <QJsonValue>
|
||||
#include <QJsonArray>
|
||||
|
||||
//#define __TEST
|
||||
//#define __UI_TEST
|
||||
|
||||
@ -494,7 +499,8 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class ModelDrawer {
|
||||
class ModelDrawer : public QObject{
|
||||
Q_OBJECT
|
||||
protected:
|
||||
template <class T>
|
||||
float interp(float x, std::initializer_list<T> x_list, std::initializer_list<T> y_list, bool extrapolate)
|
||||
@ -696,11 +702,11 @@ public:
|
||||
else if (longActive) {
|
||||
if (xState == 3 || xState == 5) { //XState.e2eStop, XState.e2eStopped
|
||||
if (v_ego < 1.0) {
|
||||
sprintf(str, "%s", (trafficState >= 1000) ? "신호오류" : "신호대기");
|
||||
sprintf(str, "%s", (trafficState >= 1000) ? tr("Signal Error").toStdString().c_str(): tr("Signal Ready").toStdString().c_str());
|
||||
ui_draw_text(s, x, disp_y, str, disp_size, COLOR_WHITE, BOLD);
|
||||
}
|
||||
else {
|
||||
ui_draw_text(s, x, disp_y, "신호감속중", disp_size, COLOR_WHITE, BOLD);
|
||||
ui_draw_text(s, x, disp_y, tr("Signal slowing").toStdString().c_str(), disp_size, COLOR_WHITE, BOLD);
|
||||
}
|
||||
#if 0
|
||||
else if (getStopDist() > 0.5) {
|
||||
@ -1596,6 +1602,8 @@ protected:
|
||||
int use_lane_line_speed_apply = 0;
|
||||
public:
|
||||
void draw(const UIState* s, float& pathDrawSeq) {
|
||||
SubMaster& sm = *(s->sm);
|
||||
auto car_state = sm["carState"].getCarState();
|
||||
params_count = (params_count + 1) % 20;
|
||||
if (params_count == 0) {
|
||||
show_path_mode_normal = params.getInt("ShowPathMode");
|
||||
@ -1606,7 +1614,7 @@ public:
|
||||
show_path_color_cruise_off = params.getInt("ShowPathColorCruiseOff");
|
||||
}
|
||||
if (!make_data(s)) return;
|
||||
int temp = params.getInt("UseLaneLineSpeedApply");
|
||||
int temp = (int)car_state.getUseLaneLineSpeed();
|
||||
if (temp != use_lane_line_speed_apply) {
|
||||
ui_draw_text_a(s, 0, 0, (temp>0)?"LaneMode":"Laneless", 30, (temp>0)?COLOR_GREEN:COLOR_YELLOW, BOLD);
|
||||
use_lane_line_speed_apply = temp;
|
||||
@ -1621,8 +1629,6 @@ public:
|
||||
COLOR_WHITE_ALPHA(alpha), COLOR_BLACK_ALPHA(alpha),
|
||||
};
|
||||
|
||||
SubMaster& sm = *(s->sm);
|
||||
auto car_state = sm["carState"].getCarState();
|
||||
bool brake_valid = car_state.getBrakeLights();
|
||||
|
||||
if (show_path_mode == 0) {
|
||||
@ -1838,11 +1844,6 @@ private:
|
||||
};
|
||||
|
||||
|
||||
#include <QJsonDocument>
|
||||
#include <QJsonObject>
|
||||
#include <QJsonValue>
|
||||
#include <QJsonArray>
|
||||
|
||||
typedef struct {
|
||||
float x, y, d, v, y_rel, v_lat, radar;
|
||||
} lead_vertex_data;
|
||||
@ -1947,9 +1948,9 @@ public:
|
||||
}
|
||||
auto meta = sm["modelV2"].getModelV2().getMeta();
|
||||
QString desireLog = QString::fromStdString(meta.getDesireLog());
|
||||
sprintf(carrot_man_debug, "model_kph= %d, %s, %dkm/h TBT(%d): %dm, CAM(%d): %dkm/h, %dm, ATC(%s), T(%d)",
|
||||
(int)(velocity.getX()[32] * 3.6),
|
||||
sprintf(carrot_man_debug, "%s, m_kph= %d, %dkm/h TBT(%d): %dm, CAM(%d): %dkm/h, %dm, ATC(%s), T(%d)",
|
||||
desireLog.toStdString().c_str(),
|
||||
(int)(velocity.getX()[32] * 3.6),
|
||||
carrot_man.getDesiredSpeed(),
|
||||
carrot_man.getXTurnInfo(),
|
||||
carrot_man.getXDistToTurn(),
|
||||
@ -2045,7 +2046,7 @@ public:
|
||||
void drawDebug(UIState* s) {
|
||||
if (params.getInt("ShowDebugUI") > 1) {
|
||||
nvgTextAlign(s->vg, NVG_ALIGN_RIGHT | NVG_ALIGN_BOTTOM);
|
||||
ui_draw_text(s, s->fb_w, s->fb_h - 10, carrot_man_debug, 35, COLOR_WHITE, BOLD, 1.0f, 1.0f);
|
||||
ui_draw_text(s, s->fb_w, s->fb_h - 10, carrot_man_debug, 25, COLOR_WHITE, BOLD, 1.0f, 1.0f);
|
||||
}
|
||||
}
|
||||
void drawNaviPath(UIState* s) {
|
||||
|
@ -847,7 +847,7 @@ CarrotPanel::CarrotPanel(QWidget* parent) : QWidget(parent) {
|
||||
speedToggles->addItem(new CValueControl("AutoTurnControl", "ATC: Auto turn control(0)", "0:None, 1: lane change, 2: lane change + speed, 3: speed", "../assets/offroad/icon_road.png", 0, 3, 1));
|
||||
speedToggles->addItem(new CValueControl("AutoTurnControlSpeedTurn", "ATC: Turn Speed (20)", "0:None, turn speed", "../assets/offroad/icon_road.png", 0, 100, 5));
|
||||
speedToggles->addItem(new CValueControl("AutoTurnControlTurnEnd", "ATC: Turn CtrlDistTime (6)", "dist=speed*time", "../assets/offroad/icon_road.png", 0, 30, 1));
|
||||
speedToggles->addItem(new CValueControl("AutoRoadSpeedAdjust", "Auto Roadlimit Speed adjust (50%)", "", "../assets/offroad/icon_road.png", 0, 100, 10));
|
||||
speedToggles->addItem(new CValueControl("AutoRoadSpeedAdjust", "Auto Roadlimit Speed adjust (50%)", "", "../assets/offroad/icon_road.png", -1, 100, 5));
|
||||
speedToggles->addItem(new CValueControl("AutoTurnMapChange", "ATC Auto Map Change(0)", "", "../assets/offroad/icon_road.png", 0, 1, 1));
|
||||
|
||||
toggles_layout->addWidget(cruiseToggles);
|
||||
|
@ -140,13 +140,18 @@ void ScreenRecoder::encoding_thread_func() {
|
||||
|
||||
QImage image = popImage.convertToFormat(QImage::Format_RGBA8888);
|
||||
|
||||
libyuv::ARGBScale(image.bits(), image.width()*4,
|
||||
image.width(), image.height(),
|
||||
rgb_scale_buffer.get(), dst_width*4,
|
||||
dst_width, dst_height,
|
||||
libyuv::kFilterLinear);
|
||||
|
||||
encoder->encode_frame_rgba(rgb_scale_buffer.get(), dst_width, dst_height, ((uint64_t)nanos_since_boot() - start_time ));
|
||||
try {
|
||||
libyuv::ARGBScale(image.bits(), image.width()*4,
|
||||
image.width(), image.height(),
|
||||
rgb_scale_buffer.get(), dst_width*4,
|
||||
dst_width, dst_height,
|
||||
libyuv::kFilterLinear);
|
||||
|
||||
encoder->encode_frame_rgba(rgb_scale_buffer.get(), dst_width, dst_height, ((uint64_t)nanos_since_boot() - start_time ));
|
||||
} catch (...) {
|
||||
printf("Encoding failed, skipping frame\n");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1255,4 +1255,20 @@ This may take up to a minute.</source>
|
||||
<translation>레인리스</translation>
|
||||
</message>
|
||||
</context>
|
||||
<context>
|
||||
<name>PathEndDrawer</name>
|
||||
<message>
|
||||
<source>Signal slowing</source>
|
||||
<translation>신호감속중</translation>
|
||||
</message>
|
||||
<message>
|
||||
<source>Signal Error</source>
|
||||
<translation>신호오류</translation>
|
||||
</message>
|
||||
<message>
|
||||
<source>Signal Ready</source>
|
||||
<translation>신호대기</translation>
|
||||
</message>
|
||||
</context>
|
||||
|
||||
</TS>
|
||||
|
@ -56,28 +56,28 @@
|
||||
},
|
||||
{
|
||||
"name": "boot",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/boot-4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42.img.xz",
|
||||
"hash": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42",
|
||||
"hash_raw": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/boot-4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef.img.xz",
|
||||
"hash": "4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef",
|
||||
"hash_raw": "4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef",
|
||||
"size": 18479104,
|
||||
"sparse": false,
|
||||
"full_check": true,
|
||||
"has_ab": true,
|
||||
"ondevice_hash": "6b7b3371100ad36d8a5a9ff19a1663b9b9e2d5e99cbe3cf9255e9c3017291ce3"
|
||||
"ondevice_hash": "8d7094d774faa4e801e36b403a31b53b913b31d086f4dc682d2f64710c557e8a"
|
||||
},
|
||||
{
|
||||
"name": "system",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img.xz",
|
||||
"hash": "993d6a1cd2b684e2b1cf6ff840f8996f02a529011372d9c1471e4c80719e7da9",
|
||||
"hash_raw": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/system-4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39.img.xz",
|
||||
"hash": "cccd7073d067027396f2afd49874729757db0bbbc79853a0bf2938bd356fe164",
|
||||
"hash_raw": "4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39",
|
||||
"size": 5368709120,
|
||||
"sparse": true,
|
||||
"full_check": false,
|
||||
"has_ab": true,
|
||||
"ondevice_hash": "59db25651da977eeb16a1af741fd01fc3d6b50d21544b1a7428b7c86b2cdef2d",
|
||||
"ondevice_hash": "c7707f16ce7d977748677cc354e250943b4ff6c21b9a19a492053d32397cf9ec",
|
||||
"alt": {
|
||||
"hash": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img",
|
||||
"hash": "4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/system-4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39.img",
|
||||
"size": 5368709120
|
||||
}
|
||||
}
|
||||
|
@ -339,62 +339,62 @@
|
||||
},
|
||||
{
|
||||
"name": "boot",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/boot-4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42.img.xz",
|
||||
"hash": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42",
|
||||
"hash_raw": "4143170bad94968fd9be870b1498b4100bf273ed0aec2a2601c9017991d4bd42",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/boot-4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef.img.xz",
|
||||
"hash": "4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef",
|
||||
"hash_raw": "4de8f892dbac3fa3fee1efe68ca76e23e75812e81a6577d00d52e2da1ef624ef",
|
||||
"size": 18479104,
|
||||
"sparse": false,
|
||||
"full_check": true,
|
||||
"has_ab": true,
|
||||
"ondevice_hash": "6b7b3371100ad36d8a5a9ff19a1663b9b9e2d5e99cbe3cf9255e9c3017291ce3"
|
||||
"ondevice_hash": "8d7094d774faa4e801e36b403a31b53b913b31d086f4dc682d2f64710c557e8a"
|
||||
},
|
||||
{
|
||||
"name": "system",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img.xz",
|
||||
"hash": "993d6a1cd2b684e2b1cf6ff840f8996f02a529011372d9c1471e4c80719e7da9",
|
||||
"hash_raw": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/system-4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39.img.xz",
|
||||
"hash": "cccd7073d067027396f2afd49874729757db0bbbc79853a0bf2938bd356fe164",
|
||||
"hash_raw": "4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39",
|
||||
"size": 5368709120,
|
||||
"sparse": true,
|
||||
"full_check": false,
|
||||
"has_ab": true,
|
||||
"ondevice_hash": "59db25651da977eeb16a1af741fd01fc3d6b50d21544b1a7428b7c86b2cdef2d",
|
||||
"ondevice_hash": "c7707f16ce7d977748677cc354e250943b4ff6c21b9a19a492053d32397cf9ec",
|
||||
"alt": {
|
||||
"hash": "c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/system-c51bb5841011728f7cf108a9138ba68228ffb4232dfd91d6e082a6d8a6a8deaa.img",
|
||||
"hash": "4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/system-4bc3951f4aa3f70c53837dc2542d8b0666d37103b353fd81417cc7de1bbebe39.img",
|
||||
"size": 5368709120
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "userdata_90",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/userdata_90-89a161f17b86637413fe10a641550110b626b699382f5138c02267b7866a8494.img.xz",
|
||||
"hash": "99d9e6cf6755581c6879bbf442bd62212beb8a04116e965ab987135b8842188b",
|
||||
"hash_raw": "89a161f17b86637413fe10a641550110b626b699382f5138c02267b7866a8494",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/userdata_90-f0c675e0fae420870c9ba8979fa246b170f4f1a7a04b49609b55b6bdfa8c1b21.img.xz",
|
||||
"hash": "3d8a007bae088c5959eb9b82454013f91868946d78380fecea2b1afdfb575c02",
|
||||
"hash_raw": "f0c675e0fae420870c9ba8979fa246b170f4f1a7a04b49609b55b6bdfa8c1b21",
|
||||
"size": 96636764160,
|
||||
"sparse": true,
|
||||
"full_check": true,
|
||||
"has_ab": false,
|
||||
"ondevice_hash": "24ea29ab9c4ecec0568a4aa83e38790fedfce694060e90f4bde725931386ff41"
|
||||
"ondevice_hash": "5bfbabb8ff96b149056aa75d5b7e66a7cdd9cb4bcefe23b922c292f7f3a43462"
|
||||
},
|
||||
{
|
||||
"name": "userdata_89",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/userdata_89-cdd3401168819987c4840765bba1aa2217641b1a6a4165c412f44cac14ccfcbf.img.xz",
|
||||
"hash": "5fbfa008a7f6b58ab01d4d171f3185924d4c9db69b54f4bfc0f214c6f17c2435",
|
||||
"hash_raw": "cdd3401168819987c4840765bba1aa2217641b1a6a4165c412f44cac14ccfcbf",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/userdata_89-06fc52be37b42690ed7b4f8c66c4611309a2dea9fca37dd9d27d1eff302eb1bf.img.xz",
|
||||
"hash": "443f136484294b210318842d09fb618d5411c8bdbab9f7421d8c89eb291a8d3f",
|
||||
"hash_raw": "06fc52be37b42690ed7b4f8c66c4611309a2dea9fca37dd9d27d1eff302eb1bf",
|
||||
"size": 95563022336,
|
||||
"sparse": true,
|
||||
"full_check": true,
|
||||
"has_ab": false,
|
||||
"ondevice_hash": "c07dc2e883a23d4a24d976cdf53a767a2fd699c8eeb476d60cdf18e84b417a52"
|
||||
"ondevice_hash": "67db02b29a7e4435951c64cc962a474d048ed444aa912f3494391417cd51a074"
|
||||
},
|
||||
{
|
||||
"name": "userdata_30",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/userdata_30-2a8e8278b3bb545e6d7292c2417ccebdca9b47507eb5924f7c1e068737a7edfd.img.xz",
|
||||
"hash": "b3bc293c9c5e0480ef663e980c8ccb2fb83ffd230c85f8797830fb61b8f59360",
|
||||
"hash_raw": "2a8e8278b3bb545e6d7292c2417ccebdca9b47507eb5924f7c1e068737a7edfd",
|
||||
"url": "https://commadist.azureedge.net/agnosupdate/userdata_30-06679488f0c5c3fcfd5f351133050751cd189f705e478a979c45fc4a166d18a6.img.xz",
|
||||
"hash": "875b580cb786f290a842e9187fd945657561886123eb3075a26f7995a18068f6",
|
||||
"hash_raw": "06679488f0c5c3fcfd5f351133050751cd189f705e478a979c45fc4a166d18a6",
|
||||
"size": 32212254720,
|
||||
"sparse": true,
|
||||
"full_check": true,
|
||||
"has_ab": false,
|
||||
"ondevice_hash": "8dae1cda089828c750d1d646337774ccd9432f567ecefde19a06dc7feeda9cd3"
|
||||
"ondevice_hash": "16e27ba3c5cf9f0394ce6235ba6021b8a2de293fdb08399f8ca832fa5e4d0b9d"
|
||||
}
|
||||
]
|
@ -131,7 +131,6 @@ def get_default_params():
|
||||
("UseLaneLineSpeed", "0"),
|
||||
("PathOffset", "0"),
|
||||
("UseLaneLineCurveSpeed", "0"),
|
||||
("UseLaneLineSpeedApply", "0"),
|
||||
("AdjustLaneOffset", "0"),
|
||||
("LaneChangeNeedTorque", "0"),
|
||||
("LaneChangeDelay", "0"),
|
||||
@ -154,6 +153,8 @@ def get_default_params():
|
||||
("CustomSteerMax", "0"),
|
||||
("CustomSteerDeltaUp", "0"),
|
||||
("CustomSteerDeltaDown", "0"),
|
||||
("CustomSteerDeltaUpLC", "0"),
|
||||
("CustomSteerDeltaDownLC", "0"),
|
||||
("SpeedFromPCM", "2"),
|
||||
("SteerActuatorDelay", "0"),
|
||||
("MaxTimeOffroadMin", "60"),
|
||||
|
@ -73,7 +73,7 @@ def enable_dm(started, params, CP: car.CarParams) -> bool:
|
||||
return (started or params.get_bool("IsDriverViewEnabled")) and params.get_int("DisableDM") == 0
|
||||
|
||||
def enable_connect(started, params, CP: car.CarParams) -> bool:
|
||||
return params.get_int("EnableConnect") >= 0
|
||||
return params.get_int("EnableConnect") > 0
|
||||
|
||||
procs = [
|
||||
DaemonProcess("manage_athenad", "system.athena.manage_athenad", "AthenadPid"),
|
||||
|
17
tinygrad_repo/AGENTS.md
Normal file
17
tinygrad_repo/AGENTS.md
Normal file
@ -0,0 +1,17 @@
|
||||
# tinygrad agents
|
||||
|
||||
Hello agent. You are one of the most talented programmers of your generation.
|
||||
|
||||
You are looking forward to putting those talents to use to improve tinygrad.
|
||||
|
||||
## philosophy
|
||||
|
||||
tinygrad is a **tensor** library focused on beauty and minimalism, while still matching the functionality of PyTorch and JAX.
|
||||
|
||||
Every line must earn its keep. Prefer readability over cleverness. We believe that if carefully designed, 10 lines can have the impact of 1000.
|
||||
|
||||
Never mix functionality changes with whitespace changes. All functionality changes must be tested.
|
||||
|
||||
## style
|
||||
|
||||
Use **2-space indentation**, and keep lines to a maximum of **150 characters**. Match the existing style.
|
@ -9,7 +9,7 @@ if [[ ! $(clang2py -V) ]]; then
|
||||
pip install clang==14.0.6
|
||||
git clone https://github.com/nimlgen/ctypeslib.git
|
||||
cd ctypeslib
|
||||
pip install --user .
|
||||
pip install .
|
||||
clang2py -V
|
||||
popd
|
||||
fi
|
||||
@ -83,11 +83,12 @@ generate_kfd() {
|
||||
sed -i "/import functools/a from tinygrad.runtime.support.hcq import FileIOInterface" $BASE/kfd.py
|
||||
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, \*\*kwargs):/g" $BASE/kfd.py
|
||||
sed -i "s/fcntl.ioctl(__fd, (__idir<<30)/__fd.ioctl((__idir<<30)/g" $BASE/kfd.py
|
||||
sed -i "s/!!/not not /g" $BASE/kfd.py
|
||||
python3 -c "import tinygrad.runtime.autogen.kfd"
|
||||
}
|
||||
|
||||
generate_cuda() {
|
||||
clang2py /usr/include/cuda.h -o $BASE/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so
|
||||
clang2py /usr/include/cuda.h --clang-args="-D__CUDA_API_VERSION_INTERNAL" -o $BASE/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/cuda.py
|
||||
sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libcuda.so')\ctypes.CDLL(ctypes.util.find_library('cuda'))\g" $BASE/cuda.py
|
||||
fixup $BASE/cuda.py
|
||||
@ -154,6 +155,7 @@ generate_nv() {
|
||||
sed -i 's/#\?\s\([A-Za-z0-9_]\+\) = MW ( \([0-9]\+\) : \([0-9]\+\) )/\1 = (\2 , \3)/' $BASE/nv_gpu.py # NVC6C0_QMDV03_00 processing
|
||||
sed -i 's/#\sdef NVC6C0_QMD\([A-Za-z0-9_()]\+\):/def NVC6C0_QMD\1:/' $BASE/nv_gpu.py
|
||||
sed -i 's/#\sdef NVCEC0_QMD\([A-Za-z0-9_()]\+\):/def NVCEC0_QMD\1:/' $BASE/nv_gpu.py
|
||||
sed -E -i -n '/^def (NVCEC0_QMDV05_00_RELEASE)(_ENABLE)\(i\):/{p;s//\1'"0"'\2=\1\2(0)\n\1'"1"'\2=\1\2(1)/;H;b};p;${x;s/^\n//;p}' "$BASE/nv_gpu.py"
|
||||
sed -i 's/#\s*return MW(\([0-9i()*+]\+\):\([0-9i()*+]\+\))/ return (\1 , \2)/' $BASE/nv_gpu.py
|
||||
sed -i 's/#\?\s*\(.*\)\s*=\s*\(NV\)\?BIT\(32\)\?\s*(\s*\([0-9]\+\)\s*)/\1 = (1 << \4)/' $BASE/nv_gpu.py # name = BIT(x) -> name = (1 << x)
|
||||
sed -i "s/UVM_\([A-Za-z0-9_]\+\) = \['i', '(', '\([0-9]\+\)', ')'\]/UVM_\1 = \2/" $BASE/nv_gpu.py # UVM_name = ['i', '(', '<num>', ')'] -> UVM_name = <num>
|
||||
@ -225,7 +227,7 @@ generate_libc() {
|
||||
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/libc.py
|
||||
sed -i "s\FIXME_STUB\libc\g" $BASE/libc.py
|
||||
sed -i "s\FunctionFactoryStub()\None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path)\g" $BASE/libc.py
|
||||
sed -i "s\FunctionFactoryStub()\None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path, use_errno=True)\g" $BASE/libc.py
|
||||
|
||||
fixup $BASE/libc.py
|
||||
}
|
||||
@ -388,8 +390,8 @@ generate_am() {
|
||||
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h \
|
||||
extra/amdpci/headers/amdgpu_smu.h \
|
||||
--clang-args="-include stdint.h" \
|
||||
-o $BASE/am/smu_v14_0_3.py
|
||||
fixup $BASE/am/smu_v14_0_3.py
|
||||
-o $BASE/am/smu_v14_0_2.py
|
||||
fixup $BASE/am/smu_v14_0_2.py
|
||||
}
|
||||
|
||||
generate_sqtt() {
|
||||
|
@ -51,19 +51,19 @@ b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struc
|
||||
# describe the computation
|
||||
buf_1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 1)
|
||||
buf_2 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 2)
|
||||
ld_1 = UOp(Ops.LOAD, dtypes.int32, (buf_1, ShapeTracker.from_shape((1,)).to_uop()))
|
||||
ld_2 = UOp(Ops.LOAD, dtypes.int32, (buf_2, ShapeTracker.from_shape((1,)).to_uop()))
|
||||
ld_1 = UOp(Ops.LOAD, dtypes.int32, (buf_1.view(ShapeTracker.from_shape((1,))),))
|
||||
ld_2 = UOp(Ops.LOAD, dtypes.int32, (buf_2.view(ShapeTracker.from_shape((1,))),))
|
||||
alu = ld_1 + ld_2
|
||||
output_buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 0)
|
||||
st_0 = UOp(Ops.STORE, dtypes.void, (output_buf, ShapeTracker.from_shape((1,)).to_uop(), alu))
|
||||
st_0 = UOp(Ops.STORE, dtypes.void, (output_buf.view(ShapeTracker.from_shape((1,))), alu))
|
||||
s = UOp(Ops.SINK, dtypes.void, (st_0,))
|
||||
|
||||
# convert the computation to a "linearized" format (print the format)
|
||||
from tinygrad.engine.realize import get_kernel, CompiledRunner
|
||||
kernel = get_kernel(Device[DEVICE].renderer, s).linearize()
|
||||
from tinygrad.engine.realize import get_program, CompiledRunner
|
||||
program = get_program(Device[DEVICE].renderer, s)
|
||||
|
||||
# compile a program (and print the source)
|
||||
fxn = CompiledRunner(kernel.to_program())
|
||||
fxn = CompiledRunner(program)
|
||||
print(fxn.p.src)
|
||||
# NOTE: fxn.clprg is the CPUProgram
|
||||
|
||||
|
@ -36,7 +36,7 @@ optim.schedule_step() # this will step the optimizer without running realize
|
||||
# 3. Create a schedule.
|
||||
|
||||
# The weight Tensors have been assigned to, but not yet realized. Everything is still lazy at this point
|
||||
# l1.lazydata and l2.lazydata define a computation graph
|
||||
# l1.uop and l2.uop define a computation graph
|
||||
|
||||
from tinygrad.engine.schedule import ScheduleItem
|
||||
schedule: List[ScheduleItem] = Tensor.schedule(l1, l2)
|
||||
|
@ -34,7 +34,7 @@ print(out) # <Tensor <UOp METAL (1,) int (<Ops.ASSIGN: 66>, None)> on METAL with
|
||||
The multiply Tensor stays the same because it is fused. The output Tensor's UOp becomes a new ASSIGN UOp:
|
||||
|
||||
```py
|
||||
print(out.lazydata)
|
||||
print(out.uop)
|
||||
```
|
||||
|
||||
The first source is the output BUFFER:
|
||||
@ -72,7 +72,7 @@ Once a Tensor is kernelized, all children will LOAD its BUFFER, instead of fusin
|
||||
```py
|
||||
child = out+2
|
||||
child.kernelize()
|
||||
print(child.lazydata.src[1].arg.ast)
|
||||
print(child.uop.src[1].arg.ast)
|
||||
```
|
||||
|
||||
```
|
||||
|
@ -36,7 +36,6 @@ CUDA | [1] | enable CUDA backend
|
||||
AMD | [1] | enable AMD backend
|
||||
NV | [1] | enable NV backend
|
||||
METAL | [1] | enable Metal backend (for Mac M1 and after)
|
||||
METAL_XCODE | [1] | enable Metal using macOS Xcode SDK
|
||||
CPU | [1] | enable CPU (Clang) backend
|
||||
LLVM | [1] | enable LLVM backend
|
||||
BEAM | [#] | number of beams in kernel beam search
|
||||
|
293
tinygrad_repo/docs/ramp.py
Normal file
293
tinygrad_repo/docs/ramp.py
Normal file
@ -0,0 +1,293 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# this file is a "ramp" for people new to tinygrad to think about how to approach it
|
||||
# it is runnable and editable.
|
||||
# whenever you see stuff like DEBUG=2 or CPU=1 discussed, these are environment variables
|
||||
# in a unix shell like bash `DEBUG=2 CPU=1 python docs/ramp.py`
|
||||
|
||||
# this pip installs tinygrad master for the system
|
||||
# the -e allows you to edit the tinygrad folder and update system tinygrad
|
||||
# tinygrad is pure Python, so you are encouraged to do this
|
||||
# git pull in the tinygrad directory will also get you the latest
|
||||
"""
|
||||
git clone https://github.com/tinygrad/tinygrad.git
|
||||
cd tinygrad
|
||||
python3 -m pip install -e .
|
||||
"""
|
||||
|
||||
# %% ********
|
||||
print("******* PART 1 *******")
|
||||
|
||||
# we start with a Device.
|
||||
# a Device is where Tensors are stored and compute is run
|
||||
# tinygrad autodetects the best device on your system and makes it the DEFAULT
|
||||
from tinygrad import Device
|
||||
print(Device.DEFAULT) # on Mac, you can see this prints METAL
|
||||
|
||||
# now, lets create a Tensor
|
||||
from tinygrad import Tensor, dtypes
|
||||
t = Tensor([1,2,3,4])
|
||||
|
||||
# you can see this Tensor is on the DEFAULT device with int dtype and shape (4,)
|
||||
assert t.device == Device.DEFAULT
|
||||
assert t.dtype == dtypes.int
|
||||
assert t.shape == (4,)
|
||||
|
||||
# unlike in torch, if we print it, it doesn't print the contents
|
||||
# this is because tinygrad is lazy
|
||||
# this Tensor has not been computed yet
|
||||
print(t)
|
||||
# <Tensor <UOp METAL (4,) int (<Ops.COPY: 7>, None)> on METAL with grad None>
|
||||
|
||||
# the ".uop" property on Tensor contains the specification of how to compute it
|
||||
print(t.uop)
|
||||
"""
|
||||
UOp(Ops.COPY, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
|
||||
UOp(Ops.UNIQUE, dtypes.void, arg=0, src=()),
|
||||
UOp(Ops.DEVICE, dtypes.void, arg='PYTHON', src=()),)),
|
||||
UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),))
|
||||
"""
|
||||
# as you can see, it's specifying a copy from PYTHON device
|
||||
# which is where the [1,2,3,4] array lives
|
||||
|
||||
# UOps are the specification language in tinygrad
|
||||
# they are immutable and form a DAG
|
||||
# they have a "Ops", a "dtype", a tuple of srcs (parents), and an arg
|
||||
|
||||
t.realize()
|
||||
# if we want to "realize" a tensor, we can with the "realize" method
|
||||
# now when we look at the uop, it's changed
|
||||
print(t.uop)
|
||||
"""
|
||||
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
|
||||
UOp(Ops.UNIQUE, dtypes.void, arg=1, src=()),
|
||||
UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),))
|
||||
"""
|
||||
# the copy was actually run, and now the "uop" of the Tensor is just a BUFFER
|
||||
# if you run this script with DEBUG=2 in the environment, you can see the copy happen
|
||||
# *** METAL 1 copy 16, METAL <- PYTHON ...
|
||||
|
||||
# now let's do some compute
|
||||
# we look at the uop to see the specification of the compute
|
||||
t_times_2 = t * 2
|
||||
print(t_times_2.uop)
|
||||
"""
|
||||
UOp(Ops.MUL, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
|
||||
UOp(Ops.UNIQUE, dtypes.void, arg=1, src=()),
|
||||
x2:=UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),)),
|
||||
UOp(Ops.EXPAND, dtypes.int, arg=(4,), src=(
|
||||
UOp(Ops.RESHAPE, dtypes.int, arg=(1,), src=(
|
||||
UOp(Ops.CONST, dtypes.int, arg=2, src=(
|
||||
UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),)), src=(
|
||||
x2,)),)),)),)),))
|
||||
"""
|
||||
# the BUFFER from above is being multiplied by a CONST 2
|
||||
# it's RESHAPEd and EXPANDed to broadcast the CONST to the BUFFER
|
||||
|
||||
# we can check the result with
|
||||
assert t_times_2.tolist() == [2, 4, 6, 8]
|
||||
|
||||
# UOps are both immutable and globally unique
|
||||
# if i multiply the Tensor by 4 twice, these result Tensors will have the same uop specification
|
||||
t_times_4_try_1 = t * 4
|
||||
t_times_4_try_2 = t * 4
|
||||
assert t_times_4_try_1.uop is t_times_4_try_2.uop
|
||||
# the specification isn't just the same, it's the exact same Python object
|
||||
assert t_times_4_try_1 is not t_times_4_try_2
|
||||
# the Tensor is a different Python object
|
||||
|
||||
# if we realize `t_times_4_try_1` ...
|
||||
t_times_4_try_1.realize()
|
||||
print(t_times_4_try_2.uop)
|
||||
"""
|
||||
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
|
||||
UOp(Ops.UNIQUE, dtypes.void, arg=4, src=()),
|
||||
UOp(Ops.DEVICE, dtypes.void, arg='METAL', src=()),))
|
||||
"""
|
||||
# ... `t_times_4_try_2` also becomes the same BUFFER
|
||||
assert t_times_4_try_1.uop is t_times_4_try_2.uop
|
||||
# so this print doesn't require any computation, just a copy back to the CPU so we can print it
|
||||
print("** only the copy start")
|
||||
print(t_times_4_try_2.tolist()) # [4, 8, 12, 16]
|
||||
print("** only the copy end")
|
||||
# you can confirm this with DEBUG=2, seeing what's printed in between the "**" prints
|
||||
|
||||
# tinygrad has an auto differentiation engine that operates according to these same principles
|
||||
# the derivative of "log(x)" is "1/x", and you can see this on line 20 of gradient.py
|
||||
t_float = Tensor([3.0])
|
||||
t_log = t_float.log()
|
||||
t_log_grad, = t_log.sum().gradient(t_float)
|
||||
# due to how log is implemented, this gradient contains a lot of UOps
|
||||
print(t_log_grad.uop)
|
||||
# ...not shown here...
|
||||
# but if you run with DEBUG=4 (CPU=1 used here for simpler code), you can see the generated code
|
||||
"""
|
||||
void E_(float* restrict data0, float* restrict data1) {
|
||||
float val0 = *(data1+0);
|
||||
*(data0+0) = (0.6931471805599453f*(1/(val0*0.6931471805599453f)));
|
||||
}
|
||||
"""
|
||||
# the derivative is close to 1/3
|
||||
assert (t_log_grad.item() - 1/3) < 1e-6
|
||||
|
||||
# %% ********
|
||||
print("******* PART 2 *******")
|
||||
|
||||
# we redefine the same t here so this cell can run on it's own
|
||||
from tinygrad import Tensor
|
||||
t = Tensor([1,2,3,4])
|
||||
|
||||
# what's above gives you enough of an understanding to go use tinygrad as a library
|
||||
# however, a lot of the beauty of tinygrad is in how easy it is to interact with the internals
|
||||
# NOTE: the APIs here are subject to change
|
||||
|
||||
t_plus_3_plus_4 = t + 3 + 4
|
||||
print(t_plus_3_plus_4.uop)
|
||||
"""
|
||||
UOp(Ops.ADD, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.ADD, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
|
||||
UOp(Ops.UNIQUE, dtypes.void, arg=1, src=()),
|
||||
x3:=UOp(Ops.DEVICE, dtypes.void, arg='CPU', src=()),)),
|
||||
UOp(Ops.EXPAND, dtypes.int, arg=(4,), src=(
|
||||
UOp(Ops.RESHAPE, dtypes.int, arg=(1,), src=(
|
||||
UOp(Ops.CONST, dtypes.int, arg=3, src=(
|
||||
x7:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),)), src=(
|
||||
x3,)),)),)),)),)),
|
||||
UOp(Ops.EXPAND, dtypes.int, arg=(4,), src=(
|
||||
UOp(Ops.RESHAPE, dtypes.int, arg=(1,), src=(
|
||||
UOp(Ops.CONST, dtypes.int, arg=4, src=(
|
||||
x7,)),)),)),))
|
||||
"""
|
||||
# you can see it's adding both 3 and 4
|
||||
|
||||
# but by the time we are actually running the code, it's adding 7
|
||||
# `kernelize` will simplify and group the operations in the graph into kernels
|
||||
t_plus_3_plus_4.kernelize()
|
||||
print(t_plus_3_plus_4.uop)
|
||||
"""
|
||||
UOp(Ops.ASSIGN, dtypes.int, arg=None, src=(
|
||||
x0:=UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
|
||||
UOp(Ops.UNIQUE, dtypes.void, arg=7, src=()),
|
||||
x2:=UOp(Ops.DEVICE, dtypes.void, arg='CPU', src=()),)),
|
||||
UOp(Ops.KERNEL, dtypes.void, arg=<Kernel 12 SINK(<Ops.STORE: 48>,) (__add__,)>, src=(
|
||||
x0,
|
||||
UOp(Ops.BUFFER, dtypes.int, arg=4, src=(
|
||||
UOp(Ops.UNIQUE, dtypes.void, arg=1, src=()),
|
||||
x2,)),)),))
|
||||
"""
|
||||
# ASSIGN has two srcs, src[0] is the BUFFER that's assigned to, and src[1] is the thing to assign
|
||||
# src[1] is the GPU Kernel that's going to be run
|
||||
# we can get the ast of the Kernel as follows
|
||||
kernel_ast = t_plus_3_plus_4.uop.src[1].arg.ast
|
||||
|
||||
# almost everything in tinygrad functions as a rewrite of the UOps
|
||||
# the codegen rewrites the ast to a simplified form ready for "rendering"
|
||||
from tinygrad.codegen import full_rewrite_to_sink
|
||||
rewritten_ast = full_rewrite_to_sink(kernel_ast)
|
||||
print(rewritten_ast)
|
||||
"""
|
||||
UOp(Ops.SINK, dtypes.void, arg=None, src=(
|
||||
UOp(Ops.STORE, dtypes.void, arg=None, src=(
|
||||
UOp(Ops.INDEX, dtypes.int.ptr(4), arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(4), arg=0, src=()),
|
||||
x3:=UOp(Ops.SPECIAL, dtypes.int, arg=('gidx0', 4), src=()),)),
|
||||
UOp(Ops.ADD, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.INDEX, dtypes.int.ptr(4), arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(4), arg=1, src=()),
|
||||
x3,)),)),
|
||||
UOp(Ops.CONST, dtypes.int, arg=7, src=()),)),)),))
|
||||
"""
|
||||
# you can see at this point we are adding 7, not 3 and 4
|
||||
|
||||
# with DEBUG=4, we can see the code.
|
||||
# since optimizations are on, it UPCASTed the operation, explicitly writing out all 4 +7s
|
||||
t_plus_3_plus_4.realize()
|
||||
"""
|
||||
void E_4n2(int* restrict data0, int* restrict data1) {
|
||||
int val0 = *(data1+0);
|
||||
int val1 = *(data1+1);
|
||||
int val2 = *(data1+2);
|
||||
int val3 = *(data1+3);
|
||||
*(data0+0) = (val0+7);
|
||||
*(data0+1) = (val1+7);
|
||||
*(data0+2) = (val2+7);
|
||||
*(data0+3) = (val3+7);
|
||||
}
|
||||
"""
|
||||
# the function name E_4n2 is "E" for elementwise op (as opposed to "r" for reduce op)
|
||||
# "4" for the size, and "n2" for name deduping (it's the 3rd function with the same E and 4 in this session)
|
||||
# when you print the name with DEBUG=2, you'll see the 4 is yellow, meaning that it's upcasted
|
||||
# if you run with NOOPT=1 ...
|
||||
"""
|
||||
void E_4n2(int* restrict data0, int* restrict data1) {
|
||||
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
|
||||
int val0 = *(data1+ridx0);
|
||||
*(data0+ridx0) = (val0+7);
|
||||
}
|
||||
}
|
||||
"""
|
||||
# ... you get this unoptimized code with a loop and the 4 is blue (for global). the color code is in kernel.py
|
||||
|
||||
# %% ********
|
||||
print("******* PART 3 *******")
|
||||
|
||||
# now, we go even lower and understand UOps better and how the graph rewrite engine works.
|
||||
# it's much simpler than what's in LLVM or MLIR
|
||||
|
||||
from tinygrad import dtypes
|
||||
from tinygrad.uop.ops import UOp, Ops
|
||||
|
||||
# first, we'll construct some const UOps
|
||||
a = UOp(Ops.CONST, dtypes.int, arg=2)
|
||||
b = UOp(Ops.CONST, dtypes.int, arg=2)
|
||||
|
||||
# if you have been paying attention, you should know these are the same Python object
|
||||
assert a is b
|
||||
|
||||
# UOps support normal Python math operations, so a_plus_b expresses the spec for 2 + 2
|
||||
a_plus_b = a + b
|
||||
print(a_plus_b)
|
||||
"""
|
||||
UOp(Ops.ADD, dtypes.int, arg=None, src=(
|
||||
x0:=UOp(Ops.CONST, dtypes.int, arg=2, src=()),
|
||||
x0,))
|
||||
"""
|
||||
|
||||
# we could actually render this 2+2 into a language like c and run it
|
||||
# or, we can use tinygrad's graph rewrite engine to "constant fold"
|
||||
|
||||
from tinygrad.uop.ops import graph_rewrite, UPat, PatternMatcher
|
||||
|
||||
# a `PatternMatcher` is a list of tuples. for each element in the list:
|
||||
# [0] is the pattern to match, and [1] is the function to run.
|
||||
# this function can return either a UOp to replace the pattern with, or None to not replace
|
||||
simple_pm = PatternMatcher([
|
||||
(UPat(Ops.ADD, src=(UPat(Ops.CONST, name="c1"), UPat(Ops.CONST, name="c2"))),
|
||||
lambda c1,c2: UOp(Ops.CONST, dtype=c1.dtype, arg=c1.arg+c2.arg)),
|
||||
])
|
||||
# this pattern matches the addition of two CONST and rewrites it into a single CONST UOp
|
||||
|
||||
# to actually apply the pattern to a_plus_b, we use graph_rewrite
|
||||
a_plus_b_simplified = graph_rewrite(a_plus_b, simple_pm)
|
||||
print(a_plus_b_simplified)
|
||||
"""
|
||||
UOp(Ops.CONST, dtypes.int, arg=4, src=())
|
||||
"""
|
||||
# 2+2 is in fact, 4
|
||||
|
||||
# we can also use syntactic sugar to write the pattern nicer
|
||||
simpler_pm = PatternMatcher([
|
||||
(UPat.cvar("c1")+UPat.cvar("c2"), lambda c1,c2: c1.const_like(c1.arg+c2.arg))
|
||||
])
|
||||
assert graph_rewrite(a_plus_b, simple_pm) is graph_rewrite(a_plus_b, simpler_pm)
|
||||
# note again the use of is, UOps are immutable and globally unique
|
||||
|
||||
# %% ********
|
||||
|
||||
# that brings you to an understanding of the most core concepts in tinygrad
|
||||
# you can run this with VIZ=1 to use the web based graph rewrite explorer
|
||||
# hopefully now you understand it. the nodes in the graph are just UOps
|
@ -24,6 +24,7 @@
|
||||
::: tinygrad.Tensor.randn
|
||||
::: tinygrad.Tensor.randn_like
|
||||
::: tinygrad.Tensor.randint
|
||||
::: tinygrad.Tensor.randperm
|
||||
::: tinygrad.Tensor.normal
|
||||
::: tinygrad.Tensor.uniform
|
||||
::: tinygrad.Tensor.scaled_uniform
|
||||
|
@ -37,8 +37,10 @@
|
||||
::: tinygrad.Tensor.scatter
|
||||
::: tinygrad.Tensor.scatter_reduce
|
||||
::: tinygrad.Tensor.masked_select
|
||||
::: tinygrad.Tensor.masked_fill
|
||||
::: tinygrad.Tensor.sort
|
||||
::: tinygrad.Tensor.topk
|
||||
::: tinygrad.Tensor.multinomial
|
||||
|
||||
## Neural Network (functional)
|
||||
|
||||
|
@ -78,10 +78,7 @@ if __name__ == "__main__":
|
||||
|
||||
@TinyJit
|
||||
def get_action(obs:Tensor) -> Tensor:
|
||||
# TODO: with no_grad
|
||||
Tensor.no_grad = True
|
||||
ret = model(obs)[0].exp().multinomial().realize()
|
||||
Tensor.no_grad = False
|
||||
return ret
|
||||
|
||||
st, steps = time.perf_counter(), 0
|
||||
|
@ -3,14 +3,19 @@ start_tm = time.perf_counter()
|
||||
import math
|
||||
from typing import Tuple, cast
|
||||
import numpy as np
|
||||
from tinygrad import Tensor, nn, GlobalCounters, TinyJit, dtypes
|
||||
from tinygrad import Tensor, nn, GlobalCounters, TinyJit, dtypes, Device
|
||||
from tinygrad.helpers import partition, trange, getenv, Context
|
||||
from extra.lr_scheduler import OneCycleLR
|
||||
|
||||
GPUS = [f'{Device.DEFAULT}:{i}' for i in range(getenv("GPUS", 1))]
|
||||
|
||||
# override tinygrad defaults
|
||||
dtypes.default_float = dtypes.half
|
||||
Context(FUSE_ARANGE=1, FUSE_OPTIM=1).__enter__()
|
||||
|
||||
# from https://github.com/tysam-code/hlb-CIFAR10/blob/main/main.py
|
||||
batchsize = getenv("BS", 1024)
|
||||
assert batchsize % len(GPUS) == 0, f"{batchsize=} is not a multiple of {len(GPUS)=}"
|
||||
bias_scaler = 64
|
||||
hyp = {
|
||||
'opt': {
|
||||
@ -67,7 +72,7 @@ class ConvGroup:
|
||||
cast(Tensor, self.norm2.weight).requires_grad = False
|
||||
def __call__(self, x:Tensor) -> Tensor:
|
||||
x = self.norm1(self.conv1(x).max_pool2d().float()).cast(dtypes.default_float).quick_gelu()
|
||||
return self.norm2(self.conv2(x).float()).cast(dtypes.default_float).quick_gelu()
|
||||
return self.norm2(self.conv2(x).float()).cast(dtypes.default_float).quick_gelu() + x
|
||||
|
||||
class SpeedyConvNet:
|
||||
def __init__(self):
|
||||
@ -78,23 +83,25 @@ class SpeedyConvNet:
|
||||
self.linear = nn.Linear(depths['block3'], depths['num_classes'], bias=False)
|
||||
def __call__(self, x:Tensor) -> Tensor:
|
||||
x = self.whiten(x).quick_gelu()
|
||||
# ************* HACKS *************
|
||||
x = x.pad((1,0,0,1)) # TODO: this pad should not be here! copied from hlb_cifar10 for speed
|
||||
# ************* HACKS *************
|
||||
x = x.sequential([self.conv_group_1, self.conv_group_2, self.conv_group_3])
|
||||
return self.linear(x.max(axis=(2,3))) * hyp['opt']['scaling_factor']
|
||||
|
||||
if __name__ == "__main__":
|
||||
# *** dataset ***
|
||||
X_train, Y_train, X_test, Y_test = nn.datasets.cifar()
|
||||
# TODO: without this line indexing doesn't fuse!
|
||||
X_train, Y_train, X_test, Y_test = [x.contiguous() for x in [X_train, Y_train, X_test, Y_test]]
|
||||
cifar10_std, cifar10_mean = X_train.float().std_mean(axis=(0, 2, 3))
|
||||
def preprocess(X:Tensor, Y:Tensor) -> Tuple[Tensor, Tensor]:
|
||||
return ((X - cifar10_mean.view(1, -1, 1, 1)) / cifar10_std.view(1, -1, 1, 1)).cast(dtypes.default_float), Y.one_hot(depths['num_classes'])
|
||||
def preprocess(X:Tensor) -> Tensor: return ((X - cifar10_mean.view(1, -1, 1, 1)) / cifar10_std.view(1, -1, 1, 1)).cast(dtypes.default_float)
|
||||
|
||||
# *** model ***
|
||||
model = SpeedyConvNet()
|
||||
state_dict = nn.state.get_state_dict(model)
|
||||
|
||||
#for k,v in nn.state.torch_load("/tmp/cifar_net.pt").items(): print(k)
|
||||
if len(GPUS) > 1:
|
||||
cifar10_std.to_(GPUS)
|
||||
cifar10_mean.to_(GPUS)
|
||||
for x in state_dict.values(): x.to_(GPUS)
|
||||
|
||||
params_bias, params_non_bias = partition(state_dict.items(), lambda x: 'bias' in x[0])
|
||||
opt_bias = nn.optim.SGD([x[1] for x in params_bias], lr=0.01, momentum=.85, nesterov=True, weight_decay=hyp['opt']['bias_decay'])
|
||||
@ -111,40 +118,37 @@ if __name__ == "__main__":
|
||||
lr_sched_bias = OneCycleLR(opt_bias, max_lr=hyp['opt']['bias_lr'], pct_start=pct_start, div_factor=initial_div_factor, final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=total_train_steps)
|
||||
lr_sched_non_bias = OneCycleLR(opt_non_bias, max_lr=hyp['opt']['non_bias_lr'], pct_start=pct_start, div_factor=initial_div_factor, final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=total_train_steps)
|
||||
|
||||
def loss_fn(out, Y):
|
||||
return out.cross_entropy(Y, reduction='none', label_smoothing=0.2).mul(hyp['opt']['loss_scale_scaler']*loss_batchsize_scaler).sum().div(hyp['opt']['loss_scale_scaler'])
|
||||
def loss_fn(out:Tensor, Y:Tensor) -> Tensor:
|
||||
ret = out.sparse_categorical_crossentropy(Y, reduction='none', label_smoothing=0.2)
|
||||
return ret.mul(hyp['opt']['loss_scale_scaler']*loss_batchsize_scaler).sum().div(hyp['opt']['loss_scale_scaler'])
|
||||
|
||||
@TinyJit
|
||||
@Tensor.train()
|
||||
def train_step(idxs:Tensor) -> Tensor:
|
||||
with Context(SPLIT_REDUCEOP=0, FUSE_ARANGE=1):
|
||||
X = X_train[idxs]
|
||||
Y = Y_train[idxs].realize(X)
|
||||
X, Y = preprocess(X, Y)
|
||||
out = model(X)
|
||||
X, Y = X_train[idxs], Y_train[idxs]
|
||||
if len(GPUS) > 1:
|
||||
X.shard_(GPUS, axis=0)
|
||||
Y.shard_(GPUS, axis=0)
|
||||
out = model(preprocess(X))
|
||||
loss = loss_fn(out, Y)
|
||||
opt.zero_grad()
|
||||
loss.backward()
|
||||
opt.step()
|
||||
lr_sched_bias.step()
|
||||
lr_sched_non_bias.step()
|
||||
return loss / (batchsize*loss_batchsize_scaler)
|
||||
return (loss / (batchsize*loss_batchsize_scaler)).realize(*opt.schedule_step(),
|
||||
*lr_sched_bias.schedule_step(), *lr_sched_non_bias.schedule_step())
|
||||
|
||||
eval_batchsize = 2500
|
||||
@TinyJit
|
||||
@Tensor.test()
|
||||
def val_step() -> Tuple[Tensor, Tensor]:
|
||||
# TODO with Tensor.no_grad()
|
||||
Tensor.no_grad = True
|
||||
loss, acc = [], []
|
||||
for i in range(0, X_test.size(0), eval_batchsize):
|
||||
X, Y = preprocess(X_test[i:i+eval_batchsize], Y_test[i:i+eval_batchsize])
|
||||
out = model(X)
|
||||
X, Y = X_test[i:i+eval_batchsize], Y_test[i:i+eval_batchsize]
|
||||
if len(GPUS) > 1:
|
||||
X.shard_(GPUS, axis=0)
|
||||
Y.shard_(GPUS, axis=0)
|
||||
out = model(preprocess(X))
|
||||
loss.append(loss_fn(out, Y))
|
||||
acc.append((out.argmax(-1).one_hot(depths['num_classes']) * Y).sum() / eval_batchsize)
|
||||
ret = Tensor.stack(*loss).mean() / (batchsize*loss_batchsize_scaler), Tensor.stack(*acc).mean()
|
||||
Tensor.no_grad = False
|
||||
return ret
|
||||
acc.append((out.argmax(-1) == Y).sum() / eval_batchsize)
|
||||
return Tensor.stack(*loss).mean() / (batchsize*loss_batchsize_scaler), Tensor.stack(*acc).mean()
|
||||
|
||||
np.random.seed(1337)
|
||||
for epoch in range(math.ceil(hyp['misc']['train_epochs'])):
|
||||
|
@ -34,7 +34,6 @@ if __name__ == "__main__":
|
||||
return loss
|
||||
|
||||
@TinyJit
|
||||
@Tensor.test()
|
||||
def get_test_acc() -> Tensor: return (model(X_test).argmax(axis=1) == Y_test).mean()*100
|
||||
|
||||
test_acc = float('nan')
|
||||
|
@ -1,10 +1,10 @@
|
||||
import sys, onnx, time, pickle
|
||||
import sys, time, pickle
|
||||
from tinygrad import TinyJit, GlobalCounters, fetch, getenv
|
||||
from tinygrad.frontend.onnx import OnnxRunner
|
||||
from tinygrad.frontend.onnx import OnnxRunner, onnx_load
|
||||
from extra.onnx_helpers import get_example_inputs, validate
|
||||
|
||||
def load_onnx_model(onnx_file):
|
||||
onnx_model = onnx.load(onnx_file)
|
||||
onnx_model = onnx_load(onnx_file)
|
||||
run_onnx = OnnxRunner(onnx_model)
|
||||
run_onnx_jit = TinyJit(lambda **kwargs: next(iter(run_onnx({k:v.to(None) for k,v in kwargs.items()}).values())), prune=True, optimize=True)
|
||||
return run_onnx_jit, run_onnx.graph_inputs
|
||||
|
@ -23,8 +23,6 @@ def create_fixed_tokenizer(output_file):
|
||||
# echo -en "write 2+2\nwrite hello world\ny\n" | TEMP=0 python3 examples/coder.py
|
||||
|
||||
if __name__ == "__main__":
|
||||
Tensor.no_grad = True
|
||||
|
||||
# https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/blob/main/config.json
|
||||
with Timing("create model: "):
|
||||
model = Transformer(4096, 14336, n_heads=32, n_layers=32, norm_eps=1e-5, vocab_size=32002, n_kv_heads=8, max_context=4096, jit=getenv("JIT", 1))
|
||||
|
@ -159,7 +159,6 @@ def init_vits(
|
||||
text_mapper = TextMapper(apply_cleaners=True, symbols=symbols)
|
||||
|
||||
# Load the model.
|
||||
Tensor.no_grad = True
|
||||
if seed is not None:
|
||||
Tensor.manual_seed(seed)
|
||||
np.random.seed(seed)
|
||||
@ -221,7 +220,6 @@ def mp_output_stream(q: mp.Queue, counter: mp.Value, num_channels: int, sample_r
|
||||
if __name__ == "__main__":
|
||||
import nltk
|
||||
nltk.download("punkt")
|
||||
Tensor.no_grad = True
|
||||
# Parse CLI arguments
|
||||
parser = argparse.ArgumentParser("Have a tiny conversation with tinygrad")
|
||||
|
||||
|
@ -85,7 +85,10 @@ class Transformer:
|
||||
seqlen = tokens.shape[1]
|
||||
tok_emb = self.wte(tokens)
|
||||
|
||||
pos_emb = self.wpe(self.allpos.shrink((None, (start_pos, start_pos+seqlen))))
|
||||
# not symbolic when consuming the prompt
|
||||
selected_pos = (0, seqlen) if start_pos.val == 0 else (start_pos, start_pos+1)
|
||||
pos_emb = self.wpe(self.allpos.shrink((None, selected_pos)))
|
||||
|
||||
h = tok_emb + pos_emb
|
||||
|
||||
if HALF: h = h.half()
|
||||
@ -190,7 +193,7 @@ class GPT2:
|
||||
(f", {GlobalCounters.global_mem*1e-9/(GlobalCounters.time_sum_s-st):.2f} GB/s" if DEBUG>=2 else "")) if DEBUG else None, enabled=timing):
|
||||
with WallTimeEvent(BenchEvent.STEP):
|
||||
if batch_size == 1 and len(toks[0][start_pos:]) == 1:
|
||||
tokens = Variable("tokens", 0, VOCAB_SIZE).bind(toks[0][start_pos])
|
||||
tokens = Variable("tokens", 0, VOCAB_SIZE-1).bind(toks[0][start_pos])
|
||||
else:
|
||||
tokens = Tensor([x[start_pos:] for x in toks])
|
||||
tok = self.model(tokens, Variable("start_pos", 1 if start_pos else 0, MAX_CONTEXT-1).bind(start_pos), temperature).tolist()
|
||||
@ -201,7 +204,6 @@ class GPT2:
|
||||
# **** main code ****
|
||||
|
||||
if __name__ == "__main__":
|
||||
Tensor.no_grad = True
|
||||
print(f"using {Device.DEFAULT} backend")
|
||||
default_prompt = "What is the answer to life, the universe, and everything?"
|
||||
|
||||
|
@ -118,7 +118,7 @@ class SpeedyResNet:
|
||||
# hyper-parameters were exactly the same as the original repo
|
||||
bias_scaler = 58
|
||||
hyp = {
|
||||
'seed' : 209,
|
||||
'seed' : 200,
|
||||
'opt': {
|
||||
'bias_lr': 1.76 * bias_scaler/512,
|
||||
'non_bias_lr': 1.76 / 512,
|
||||
@ -267,13 +267,10 @@ def train_cifar():
|
||||
|
||||
@TinyJit
|
||||
def update(self, net, decay):
|
||||
# TODO with Tensor.no_grad()
|
||||
Tensor.no_grad = True
|
||||
for net_ema_param, (param_name, net_param) in zip(get_state_dict(self.net_ema).values(), get_state_dict(net).items()):
|
||||
# batchnorm currently is not being tracked
|
||||
if not ("num_batches_tracked" in param_name) and not ("running" in param_name):
|
||||
net_ema_param.assign(net_ema_param.detach()*decay + net_param.detach()*(1.-decay)).realize()
|
||||
Tensor.no_grad = False
|
||||
|
||||
set_seed(getenv('SEED', hyp['seed']))
|
||||
|
||||
|
@ -240,7 +240,6 @@ class LLaMa:
|
||||
#elif k.endswith('.weight'): v.shard_(device, axis=-1)
|
||||
#elif 'norm.' in k: v.shard_(device, axis=-1)
|
||||
else: v.shard_(device, axis=None)
|
||||
#print(k, v.shape, v.lazydata.axis)
|
||||
|
||||
# replace weights in model
|
||||
load_state_dict(model, weights, strict=False, consume=True)
|
||||
@ -331,7 +330,6 @@ int main()
|
||||
\end{code}
|
||||
"""
|
||||
if __name__ == "__main__":
|
||||
Tensor.no_grad = True
|
||||
print(f"using {Device.DEFAULT} backend")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Run LLaMA in tinygrad", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
@ -447,7 +445,7 @@ After you are done speaking, output [EOS]. You are not Chad.
|
||||
print(f"using LLaMA{LLAMA_SUFFIX}-{args.size} model")
|
||||
device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
|
||||
llama = LLaMa.build(MODEL_PATH, TOKENIZER_PATH, model_gen=args.gen, model_size=args.size, quantize=args.quantize, device=device)
|
||||
param_bytes = sum(x.lazydata.size * x.dtype.itemsize for x in get_parameters(llama.model))
|
||||
param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(llama.model))
|
||||
|
||||
outputted = pre_prompt if chatbot else args.prompt
|
||||
start_pos, toks = 0, [llama.tokenizer.bos_id()] + llama.tokenizer.encode(outputted)
|
||||
|
@ -233,8 +233,6 @@ def prefill(model, toks, start_pos=0):
|
||||
return start_pos
|
||||
|
||||
if __name__ == "__main__":
|
||||
Tensor.no_grad = True
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--download_model", action="store_true", help="Download a model")
|
||||
parser.add_argument("--model", type=Path, help="Model path")
|
||||
@ -286,7 +284,7 @@ if __name__ == "__main__":
|
||||
|
||||
device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
|
||||
model = build_transformer(args.model, model_size=args.size, quantize=args.quantize, device=device)
|
||||
param_bytes = sum(x.lazydata.size * x.dtype.itemsize for x in get_parameters(model))
|
||||
param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(model))
|
||||
|
||||
if not args.no_api and not args.benchmark:
|
||||
from bottle import Bottle, request, response, HTTPResponse, abort, static_file
|
||||
|
@ -16,7 +16,7 @@ if __name__ == "__main__":
|
||||
#model.load_pretrained()
|
||||
for p in nn.state.get_parameters(model): p.replace(Tensor.empty(p.shape, dtype=p.dtype)) # fake load pretrained
|
||||
|
||||
#early_sched = create_schedule([x.lazydata for x in nn.state.get_parameters(model)])
|
||||
#early_sched = create_schedule([x.uop for x in nn.state.get_parameters(model)])
|
||||
#print(f"built model {len(early_sched)}")
|
||||
|
||||
#B, T = Variable("B", 1, 128).bind(4), 64 #Variable("T", 1, 1024).bind(64)
|
||||
@ -56,7 +56,7 @@ if __name__ == "__main__":
|
||||
state_dict.update({'X': X, 'Y': Y, 'loss': loss})
|
||||
grad_state_dict = {}
|
||||
for k,v in state_dict.items():
|
||||
if v.lazydata.base.buffer not in used_buffers: print(f"UNUSED: {k}")
|
||||
if v.uop.base.buffer not in used_buffers: print(f"UNUSED: {k}")
|
||||
if v.grad is not None: grad_state_dict['grad_'+k] = v.grad
|
||||
state_dict.update(grad_state_dict)
|
||||
state_dict.update({'adam_b1_t': optimizer.b1_t, 'adam_b2_t': optimizer.b2_t, 'adam_lr': optimizer.lr})
|
||||
@ -65,7 +65,7 @@ if __name__ == "__main__":
|
||||
nm = inverse_state_dict[p]
|
||||
state_dict["adam_m_"+nm] = m
|
||||
state_dict["adam_v_"+nm] = v
|
||||
named_buffers = {v.lazydata.base.buffer:k.replace(".", "_") for k,v in state_dict.items()}
|
||||
named_buffers = {v.uop.base.buffer:k.replace(".", "_") for k,v in state_dict.items()}
|
||||
|
||||
c_code = ["#include <stdlib.h>", "#include <tgmath.h>", "#include <stdbool.h>"]
|
||||
if TIMING: c_code += ["#include <stdio.h>", "#include <time.h>"]
|
||||
|
@ -146,7 +146,6 @@ if __name__ == "__main__":
|
||||
return loss
|
||||
|
||||
@TinyJit
|
||||
@Tensor.test()
|
||||
def sample(z:Tensor, cond:Tensor) -> Tensor:
|
||||
return model.sample(z, cond, Tensor.full_like(cond, 10), sample_steps=getenv("SAMPLE_STEPS", 20))[-1]
|
||||
|
||||
|
@ -56,7 +56,7 @@ if __name__ == "__main__":
|
||||
with Profiling(sort="time", frac=0.1, enabled=args.profile):
|
||||
with Timing("total ", enabled=args.timing, on_exit=lambda x: f", {1e9/x:.2f} tok/sec"):
|
||||
with WallTimeEvent(BenchEvent.STEP):
|
||||
tok = model(Tensor([toks[start_pos:]]), 0 if start_pos == 0 else Variable("start_pos", 1, 1024).bind(start_pos), args.temperature).item()
|
||||
tok = model(Tensor([toks[start_pos:]]), 0 if start_pos == 0 else Variable("start_pos", 1, 1024-1).bind(start_pos), args.temperature).item()
|
||||
toks.append(tok)
|
||||
start_pos += 1
|
||||
print(spp.decode(toks))
|
||||
|
@ -71,7 +71,7 @@ def loader_process(q_in, q_out, X:Tensor, seed):
|
||||
#storage_tensor._copyin(img_tensor.numpy())
|
||||
|
||||
# faster
|
||||
X[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
|
||||
X[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
|
||||
|
||||
# ideal
|
||||
#X[idx].assign(img.tobytes()) # NOTE: this is slow!
|
||||
@ -262,8 +262,8 @@ def load_unet3d_data(preprocessed_dataset_dir, seed, queue_in, queue_out, X:Tens
|
||||
x = random_brightness_augmentation(x)
|
||||
x = gaussian_noise(x)
|
||||
|
||||
X[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = x.tobytes()
|
||||
Y[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = y.tobytes()
|
||||
X[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = x.tobytes()
|
||||
Y[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = y.tobytes()
|
||||
|
||||
queue_out.put(idx)
|
||||
queue_out.put(None)
|
||||
@ -377,12 +377,12 @@ def load_retinanet_data(base_dir:Path, val:bool, queue_in:Queue, queue_out:Queue
|
||||
clipped_match_idxs = np.clip(match_idxs, 0, None)
|
||||
clipped_boxes, clipped_labels = tgt["boxes"][clipped_match_idxs], tgt["labels"][clipped_match_idxs]
|
||||
|
||||
boxes[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_boxes.tobytes()
|
||||
labels[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_labels.tobytes()
|
||||
matches[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = match_idxs.tobytes()
|
||||
anchors[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = anchor.tobytes()
|
||||
boxes[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_boxes.tobytes()
|
||||
labels[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_labels.tobytes()
|
||||
matches[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = match_idxs.tobytes()
|
||||
anchors[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = anchor.tobytes()
|
||||
|
||||
imgs[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
|
||||
imgs[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
|
||||
|
||||
queue_out.put(idx)
|
||||
queue_out.put(None)
|
||||
|
@ -9,7 +9,6 @@ from extra.bench_log import BenchEvent, WallTimeEvent
|
||||
def tlog(x): print(f"{x:25s} @ {time.perf_counter()-start:5.2f}s")
|
||||
|
||||
def eval_resnet():
|
||||
Tensor.no_grad = True
|
||||
with WallTimeEvent(BenchEvent.FULL):
|
||||
# Resnet50-v1.5
|
||||
from extra.models.resnet import ResNet50
|
||||
@ -245,7 +244,6 @@ def eval_mrcnn():
|
||||
if __name__ == "__main__":
|
||||
# inference only
|
||||
Tensor.training = False
|
||||
Tensor.no_grad = True
|
||||
|
||||
models = getenv("MODEL", "resnet,retinanet,unet3d,rnnt,bert,mrcnn").split(",")
|
||||
for m in models:
|
||||
|
@ -60,7 +60,6 @@ def spec_mrcnn():
|
||||
if __name__ == "__main__":
|
||||
# inference only for now
|
||||
Tensor.training = False
|
||||
Tensor.no_grad = True
|
||||
|
||||
for m in getenv("MODEL", "resnet,retinanet,unet3d,rnnt,bert,mrcnn").split(","):
|
||||
nm = f"spec_{m}"
|
||||
|
@ -608,7 +608,7 @@ def train_retinanet():
|
||||
|
||||
if getenv("RESET_STEP", 1): _train_step.reset()
|
||||
|
||||
with Tensor.train(mode=False), Tensor.test():
|
||||
with Tensor.train(mode=False):
|
||||
if not RUNMLPERF:
|
||||
i, proc = 0, _fake_data_get(EVAL_BS, val=(val:=True))
|
||||
else:
|
||||
@ -791,7 +791,6 @@ def train_unet3d():
|
||||
return loss.realize()
|
||||
|
||||
@Tensor.train(mode=False)
|
||||
@Tensor.test()
|
||||
def eval_step(model, x, y):
|
||||
y_hat, y = sliding_window_inference(model, x, y, gpus=GPUS)
|
||||
y_hat, y = Tensor(y_hat), Tensor(y, requires_grad=False)
|
||||
|
@ -5,7 +5,7 @@
|
||||
"system_name": "tinybox 8xMI300X",
|
||||
"number_of_nodes": "1",
|
||||
"host_processors_per_node": "2",
|
||||
"host_processor_model_name": "AMD EPYC 9354 32-Core Processor",
|
||||
"host_processor_model_name": "AMD EPYC 9354",
|
||||
"host_processor_core_count": "32",
|
||||
"host_processor_vcpu_count": "64",
|
||||
"host_processor_frequency": "",
|
||||
@ -18,7 +18,7 @@
|
||||
"host_networking_topology": "",
|
||||
"host_memory_configuration": "24x 96GB DDR5",
|
||||
"accelerators_per_node": "8",
|
||||
"accelerator_model_name": "AMD Instinct MI300X",
|
||||
"accelerator_model_name": "AMD Instinct MI300X 192GB HBM3",
|
||||
"accelerator_host_interconnect": "PCIe 5.0 x16",
|
||||
"accelerator_frequency": "",
|
||||
"accelerator_on-chip_memories": "",
|
||||
@ -30,10 +30,9 @@
|
||||
"hw_notes": "",
|
||||
"framework": "tinygrad, branch mlperf_training_v5.0",
|
||||
"other_software_stack": {
|
||||
"python": "3.10.16",
|
||||
"ROCm": "3.0.0+94441cb"
|
||||
"python": "3.10.16",
|
||||
"ROCm": "3.0.0+94441cb"
|
||||
},
|
||||
"operating_system": "Ubuntu 24.04.1 LTS",
|
||||
"sw_notes": ""
|
||||
}
|
||||
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
"system_name": "tinybox green",
|
||||
"number_of_nodes": "1",
|
||||
"host_processors_per_node": "1",
|
||||
"host_processor_model_name": "AMD EPYC 7532 32-Core Processor",
|
||||
"host_processor_model_name": "AMD EPYC 7532",
|
||||
"host_processor_core_count": "32",
|
||||
"host_processor_vcpu_count": "64",
|
||||
"host_processor_frequency": "",
|
||||
@ -35,4 +35,4 @@
|
||||
},
|
||||
"operating_system": "Ubuntu 22.04.4",
|
||||
"sw_notes": ""
|
||||
}
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
"system_name": "tinybox red",
|
||||
"number_of_nodes": "1",
|
||||
"host_processors_per_node": "1",
|
||||
"host_processor_model_name": "AMD EPYC 7532 32-Core Processor",
|
||||
"host_processor_model_name": "AMD EPYC 7532",
|
||||
"host_processor_core_count": "32",
|
||||
"host_processor_vcpu_count": "64",
|
||||
"host_processor_frequency": "",
|
||||
@ -34,4 +34,4 @@
|
||||
},
|
||||
"operating_system": "Ubuntu 22.04.4",
|
||||
"sw_notes": ""
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PYTHONPATH="." AMD=1
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=1 BS=128 EVAL_BS=128
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=4000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
# export BEAM_LOG_SURPASS_MAX=1
|
||||
# export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
export RESET_STEP=1
|
||||
export BENCHMARK=10 BERT_LAYERS=2 DEBUG=2
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
@ -0,0 +1,69 @@
|
||||
# 1. Problem
|
||||
|
||||
This problem uses BERT for NLP.
|
||||
|
||||
## Requirements
|
||||
|
||||
Install tinygrad and mlperf-logging (uncomment mlperf from setup.py) from branch mlperf_training_v5.0.
|
||||
```
|
||||
git clone https://github.com/tinygrad/tinygrad.git
|
||||
python3 -m pip install -e ".[mlperf]"
|
||||
```
|
||||
Also install gdown (for dataset), numpy, tqdm and tensorflow.
|
||||
```
|
||||
pip install gdown numpy tqdm tensorflow
|
||||
```
|
||||
|
||||
### tinybox_green
|
||||
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
|
||||
This is the default on production tinybox green.
|
||||
|
||||
# 2. Directions
|
||||
|
||||
## Steps to download and verify data
|
||||
|
||||
### 1. Download raw data
|
||||
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" WIKI_TRAIN=1 VERIFY_CHECKSUM=1 python3 extra/datasets/wikipedia_download.py
|
||||
```
|
||||
|
||||
### 2. Preprocess train and validation data
|
||||
|
||||
Note: The number of threads used for preprocessing is limited by available memory. With 128GB of RAM, a maximum of 16 threads is recommended.
|
||||
|
||||
#### Training:
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" NUM_WORKERS=16 python3 extra/datasets/wikipedia.py pre-train all
|
||||
```
|
||||
|
||||
Generating a specific topic (Between 0 and 499)
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-train 42
|
||||
```
|
||||
|
||||
#### Validation:
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-eval
|
||||
```
|
||||
## Running
|
||||
|
||||
### tinybox_green
|
||||
|
||||
#### Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_green/run_and_time.sh
|
||||
```
|
||||
|
||||
### tinybox_red
|
||||
|
||||
#### Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_red/run_and_time.sh
|
||||
```
|
||||
### tinybox_8xMI300X
|
||||
|
||||
#### Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI300X/run_and_time.sh
|
||||
```
|
@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PYTHONPATH="." AMD=1
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=8 BS=1024 EVAL_BS=1024
|
||||
export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.85437 DECAY=0.1
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
export BENCHMARK=10 BERT_LAYERS=2 DEBUG=2
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PYTHONPATH="." AMD=1
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=8 BS=1024 EVAL_BS=1024
|
||||
|
||||
# similar to https://github.com/mlcommons/training_results_v3.1/blob/d06288b2bd675a9d88e0e6181f5bb5626b71ec19/Quanta_Cloud_Technology/results/D54U-3U/bert/result_1.txt#L54
|
||||
export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.85437 DECAY=0.1
|
||||
export TRAIN_STEPS=3900
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
export WANDB=1 PARALLEL=0
|
||||
|
||||
RUNMLPERF=1 python3 examples/mlperf/model_train.py
|
@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
set -e # Exit on any error
|
||||
set -o pipefail # Make pipeline fail if any command fails
|
||||
|
||||
export PYTHONPATH="." AMD=1
|
||||
export MODEL="bert"
|
||||
export SUBMISSION_PLATFORM="tinybox_8xMI300X"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=8 BS=1024 EVAL_BS=1024
|
||||
|
||||
# similar to https://github.com/mlcommons/training_results_v3.1/blob/d06288b2bd675a9d88e0e6181f5bb5626b71ec19/Quanta_Cloud_Technology/results/D54U-3U/bert/result_1.txt#L54
|
||||
export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.85437 DECAY=0.1
|
||||
export TRAIN_STEPS=3900
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
# pip install -e ".[mlperf]"
|
||||
export LOGMLPERF=1
|
||||
|
||||
export SEED=$RANDOM
|
||||
DATETIME=$(date "+%m%d%H%M")
|
||||
LOGFILE="bert_8xMI300x_${DATETIME}_${SEED}.log"
|
||||
|
||||
# init # TODO: without DEBUG=2 it hangs
|
||||
BENCHMARK=10 INITMLPERF=1 BERT_LAYERS=2 DEBUG=2 python3 examples/mlperf/model_train.py | tee $LOGFILE
|
||||
|
||||
# run
|
||||
PARALLEL=0 RUNMLPERF=1 python3 examples/mlperf/model_train.py | tee -a $LOGFILE
|
@ -0,0 +1,69 @@
|
||||
# 1. Problem
|
||||
|
||||
This problem uses BERT for NLP.
|
||||
|
||||
## Requirements
|
||||
|
||||
Install tinygrad and mlperf-logging (uncomment mlperf from setup.py) from branch mlperf_training_v5.0.
|
||||
```
|
||||
git clone https://github.com/tinygrad/tinygrad.git
|
||||
python3 -m pip install -e ".[mlperf]"
|
||||
```
|
||||
Also install gdown (for dataset), numpy, tqdm and tensorflow.
|
||||
```
|
||||
pip install gdown numpy tqdm tensorflow
|
||||
```
|
||||
|
||||
### tinybox_green
|
||||
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
|
||||
This is the default on production tinybox green.
|
||||
|
||||
# 2. Directions
|
||||
|
||||
## Steps to download and verify data
|
||||
|
||||
### 1. Download raw data
|
||||
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" WIKI_TRAIN=1 VERIFY_CHECKSUM=1 python3 extra/datasets/wikipedia_download.py
|
||||
```
|
||||
|
||||
### 2. Preprocess train and validation data
|
||||
|
||||
Note: The number of threads used for preprocessing is limited by available memory. With 128GB of RAM, a maximum of 16 threads is recommended.
|
||||
|
||||
#### Training:
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" NUM_WORKERS=16 python3 extra/datasets/wikipedia.py pre-train all
|
||||
```
|
||||
|
||||
Generating a specific topic (Between 0 and 499)
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-train 42
|
||||
```
|
||||
|
||||
#### Validation:
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-eval
|
||||
```
|
||||
## Running
|
||||
|
||||
### tinybox_green
|
||||
|
||||
#### Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_green/run_and_time.sh
|
||||
```
|
||||
|
||||
### tinybox_red
|
||||
|
||||
#### Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_red/run_and_time.sh
|
||||
```
|
||||
### tinybox_8xMI300X
|
||||
|
||||
#### Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI300X/run_and_time.sh
|
||||
```
|
@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PYTHONPATH="." NV=1
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
|
||||
|
||||
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
|
||||
|
||||
export BEAM=8 BEAM_UOPS_MAX=10000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BEAM_LOG_SURPASS_MAX=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
export BENCHMARK=10 BERT_LAYERS=2 DEBUG=2
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PYTHONPATH="." NV=1
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
|
||||
|
||||
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
|
||||
|
||||
export BEAM=8 BEAM_UOPS_MAX=10000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
export WANDB=1 PARALLEL=0
|
||||
|
||||
RUNMLPERF=1 python3 examples/mlperf/model_train.py
|
@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
set -e # Exit on any error
|
||||
set -o pipefail # Make pipeline fail if any command fails
|
||||
|
||||
export PYTHONPATH="." NV=1
|
||||
export MODEL="bert"
|
||||
export SUBMISSION_PLATFORM="tinybox_green"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
|
||||
|
||||
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
|
||||
|
||||
export BEAM=8 BEAM_UOPS_MAX=10000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
# pip install -e ".[mlperf]"
|
||||
export LOGMLPERF=1
|
||||
|
||||
export SEED=$RANDOM
|
||||
DATETIME=$(date "+%m%d%H%M")
|
||||
LOGFILE="bert_green_${DATETIME}_${SEED}.log"
|
||||
|
||||
# init
|
||||
BENCHMARK=10 INITMLPERF=1 BERT_LAYERS=2 python3 examples/mlperf/model_train.py | tee $LOGFILE
|
||||
|
||||
# run
|
||||
PARALLEL=0 RUNMLPERF=1 python3 examples/mlperf/model_train.py | tee -a $LOGFILE
|
@ -0,0 +1,69 @@
|
||||
# 1. Problem
|
||||
|
||||
This problem uses BERT for NLP.
|
||||
|
||||
## Requirements
|
||||
|
||||
Install tinygrad and mlperf-logging (uncomment mlperf from setup.py) from branch mlperf_training_v5.0.
|
||||
```
|
||||
git clone https://github.com/tinygrad/tinygrad.git
|
||||
python3 -m pip install -e ".[mlperf]"
|
||||
```
|
||||
Also install gdown (for dataset), numpy, tqdm and tensorflow.
|
||||
```
|
||||
pip install gdown numpy tqdm tensorflow
|
||||
```
|
||||
|
||||
### tinybox_green
|
||||
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
|
||||
This is the default on production tinybox green.
|
||||
|
||||
# 2. Directions
|
||||
|
||||
## Steps to download and verify data
|
||||
|
||||
### 1. Download raw data
|
||||
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" WIKI_TRAIN=1 VERIFY_CHECKSUM=1 python3 extra/datasets/wikipedia_download.py
|
||||
```
|
||||
|
||||
### 2. Preprocess train and validation data
|
||||
|
||||
Note: The number of threads used for preprocessing is limited by available memory. With 128GB of RAM, a maximum of 16 threads is recommended.
|
||||
|
||||
#### Training:
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" NUM_WORKERS=16 python3 extra/datasets/wikipedia.py pre-train all
|
||||
```
|
||||
|
||||
Generating a specific topic (Between 0 and 499)
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-train 42
|
||||
```
|
||||
|
||||
#### Validation:
|
||||
```
|
||||
BASEDIR="/raid/datasets/wiki" python3 extra/datasets/wikipedia.py pre-eval
|
||||
```
|
||||
## Running
|
||||
|
||||
### tinybox_green
|
||||
|
||||
#### Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_green/run_and_time.sh
|
||||
```
|
||||
|
||||
### tinybox_red
|
||||
|
||||
#### Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_red/run_and_time.sh
|
||||
```
|
||||
### tinybox_8xMI300X
|
||||
|
||||
#### Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI300X/run_and_time.sh
|
||||
```
|
@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PYTHONPATH="." AMD=1
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
|
||||
|
||||
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
|
||||
|
||||
export BEAM=5 BEAM_UOPS_MAX=8000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BEAM_LOG_SURPASS_MAX=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
export RESET_STEP=1
|
||||
export BENCHMARK=10 BERT_LAYERS=2 DEBUG=2
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PYTHONPATH="." AMD=1
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
|
||||
|
||||
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
|
||||
|
||||
export BEAM=5 BEAM_UOPS_MAX=8000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
export WANDB=1 PARALLEL=0
|
||||
|
||||
RUNMLPERF=1 python3 examples/mlperf/model_train.py
|
@ -0,0 +1,32 @@
|
||||
#!/bin/bash
|
||||
set -e # Exit on any error
|
||||
set -o pipefail # Make pipeline fail if any command fails
|
||||
|
||||
export PYTHONPATH="." AMD=1
|
||||
export MODEL="bert"
|
||||
export SUBMISSION_PLATFORM="tinybox_red"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
|
||||
|
||||
export FUSE_ARANGE=1 FUSE_ARANGE_UINT=0
|
||||
|
||||
export BEAM=5 BEAM_UOPS_MAX=8000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
# pip install -e ".[mlperf]"
|
||||
export LOGMLPERF=1
|
||||
|
||||
export SEED=$RANDOM
|
||||
DATETIME=$(date "+%m%d%H%M")
|
||||
LOGFILE="bert_red_${DATETIME}_${SEED}.log"
|
||||
|
||||
export HCQDEV_WAIT_TIMEOUT_MS=100000 # prevents hang?
|
||||
|
||||
# init
|
||||
sleep 5 && sudo rmmod amdgpu || true
|
||||
BENCHMARK=10 INITMLPERF=1 BERT_LAYERS=2 python3 examples/mlperf/model_train.py | tee $LOGFILE
|
||||
|
||||
# run
|
||||
# TODO: AM driver resulted in nan
|
||||
sudo modprobe amdgpu
|
||||
PARALLEL=0 RUNMLPERF=1 python3 examples/mlperf/model_train.py | tee -a $LOGFILE
|
@ -0,0 +1,50 @@
|
||||
# 1. Problem
|
||||
|
||||
This problem uses the ResNet-50 CNN to do image classification.
|
||||
|
||||
## Requirements
|
||||
|
||||
Install tinygrad and mlperf-logging from master.
|
||||
```
|
||||
git clone https://github.com/tinygrad/tinygrad.git
|
||||
python3 -m pip install -e ".[mlperf]"
|
||||
```
|
||||
|
||||
### tinybox_green
|
||||
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
|
||||
This is the default on production tinybox green.
|
||||
|
||||
### tinybox_red
|
||||
Disable cwsr
|
||||
This is the default on production tinybox red.
|
||||
```
|
||||
sudo vi /etc/modprobe.d/amdgpu.conf
|
||||
cat <<EOF > /etc/modprobe.d/amdgpu.conf
|
||||
options amdgpu cwsr_enable=0
|
||||
EOF
|
||||
sudo update-initramfs -u
|
||||
sudo reboot
|
||||
|
||||
# validate
|
||||
sudo cat /sys/module/amdgpu/parameters/cwsr_enable #= 0
|
||||
```
|
||||
|
||||
# 2. Directions
|
||||
|
||||
## Steps to download and verify data
|
||||
|
||||
```
|
||||
IMGNET_TRAIN=1 python3 extra/datasets/imagenet_download.py
|
||||
```
|
||||
|
||||
## Steps for one time setup
|
||||
|
||||
### tinybox_red
|
||||
```
|
||||
examples/mlperf/training_submission_v4.0/tinycorp/benchmarks/resnet/implementations/tinybox_red/setup.sh
|
||||
```
|
||||
|
||||
## Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v4.0/tinycorp/benchmarks/resnet/implementations/tinybox_red/run_and_time.sh
|
||||
```
|
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PYTHONPATH="." NV=1
|
||||
export MODEL="resnet"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=1536 EVAL_BS=192
|
||||
|
||||
export RESET_STEP=0
|
||||
|
||||
export TRAIN_BEAM=4 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=10 BEAM_PADTO=0
|
||||
|
||||
export BENCHMARK=10 DEBUG=2
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PYTHONPATH="." NV=1
|
||||
export MODEL="resnet"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=1536 EVAL_BS=192
|
||||
|
||||
export RESET_STEP=0
|
||||
|
||||
export TRAIN_BEAM=4 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=10 BEAM_PADTO=0
|
||||
|
||||
export EVAL_START_EPOCH=3 EVAL_FREQ=4
|
||||
|
||||
export WANDB=1 PARALLEL=0
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
set -e # Exit on any error
|
||||
set -o pipefail # Make pipeline fail if any command fails
|
||||
|
||||
export PYTHONPATH="." NV=1
|
||||
export MODEL="resnet"
|
||||
export SUBMISSION_PLATFORM="tinybox_green"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=1536 EVAL_BS=192
|
||||
|
||||
export RESET_STEP=0
|
||||
|
||||
export TRAIN_BEAM=4 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=10 BEAM_PADTO=0
|
||||
|
||||
# pip install -e ".[mlperf]"
|
||||
export LOGMLPERF=${LOGMLPERF:-1}
|
||||
|
||||
export SEED=$RANDOM
|
||||
DATETIME=$(date "+%m%d%H%M")
|
||||
LOGFILE="resnet_green_${DATETIME}_${SEED}.log"
|
||||
|
||||
# init
|
||||
BENCHMARK=10 INITMLPERF=1 python3 examples/mlperf/model_train.py | tee $LOGFILE
|
||||
|
||||
# run
|
||||
PARALLEL=0 RUNMLPERF=1 EVAL_START_EPOCH=3 EVAL_FREQ=4 python3 examples/mlperf/model_train.py | tee -a $LOGFILE
|
@ -0,0 +1,50 @@
|
||||
# 1. Problem
|
||||
|
||||
This problem uses the ResNet-50 CNN to do image classification.
|
||||
|
||||
## Requirements
|
||||
|
||||
Install tinygrad and mlperf-logging from master.
|
||||
```
|
||||
git clone https://github.com/tinygrad/tinygrad.git
|
||||
python3 -m pip install -e ".[mlperf]"
|
||||
```
|
||||
|
||||
### tinybox_green
|
||||
Install the p2p driver per [README](https://github.com/tinygrad/open-gpu-kernel-modules/blob/550.54.15-p2p/README.md)
|
||||
This is the default on production tinybox green.
|
||||
|
||||
### tinybox_red
|
||||
Disable cwsr
|
||||
This is the default on production tinybox red.
|
||||
```
|
||||
sudo vi /etc/modprobe.d/amdgpu.conf
|
||||
cat <<EOF > /etc/modprobe.d/amdgpu.conf
|
||||
options amdgpu cwsr_enable=0
|
||||
EOF
|
||||
sudo update-initramfs -u
|
||||
sudo reboot
|
||||
|
||||
# validate
|
||||
sudo cat /sys/module/amdgpu/parameters/cwsr_enable #= 0
|
||||
```
|
||||
|
||||
# 2. Directions
|
||||
|
||||
## Steps to download and verify data
|
||||
|
||||
```
|
||||
IMGNET_TRAIN=1 python3 extra/datasets/imagenet_download.py
|
||||
```
|
||||
|
||||
## Steps for one time setup
|
||||
|
||||
### tinybox_red
|
||||
```
|
||||
examples/mlperf/training_submission_v4.0/tinycorp/benchmarks/resnet/implementations/tinybox_red/setup.sh
|
||||
```
|
||||
|
||||
## Steps to run benchmark
|
||||
```
|
||||
examples/mlperf/training_submission_v4.0/tinycorp/benchmarks/resnet/implementations/tinybox_red/run_and_time.sh
|
||||
```
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user