carrot/common/file_helpers.py

import io
import os
import tempfile
import contextlib
import zstandard as zstd

LOG_COMPRESSION_LEVEL = 10 # little benefit up to level 15. level ~17 is a small step change


class CallbackReader:
  """Wraps a file, but overrides the read method to also
  call a callback function with the number of bytes read so far."""
  def __init__(self, f, callback, *args):
    self.f = f
    self.callback = callback
    self.cb_args = args
    self.total_read = 0

  def __getattr__(self, attr):
    return getattr(self.f, attr)

  def read(self, *args, **kwargs):
    chunk = self.f.read(*args, **kwargs)
    self.total_read += len(chunk)
    self.callback(*self.cb_args, self.total_read)
    return chunk


@contextlib.contextmanager
def atomic_write_in_dir(path: str, mode: str = 'w', buffering: int = -1, encoding: str = None, newline: str = None,
                        overwrite: bool = False):
  """Write to a file atomically using a temporary file in the same directory as the destination file."""
  dir_name = os.path.dirname(path)

  if not overwrite and os.path.exists(path):
    raise FileExistsError(f"File '{path}' already exists. To overwrite it, set 'overwrite' to True.")

  with tempfile.NamedTemporaryFile(mode=mode, buffering=buffering, encoding=encoding, newline=newline, dir=dir_name, delete=False) as tmp_file:
    yield tmp_file
    tmp_file_name = tmp_file.name
  os.replace(tmp_file_name, path)


def get_upload_stream(filepath: str, should_compress: bool) -> tuple[io.BufferedIOBase, int]:
  if not should_compress:
    file_size = os.path.getsize(filepath)
    file_stream = open(filepath, "rb")
    return file_stream, file_size

  # Compress the file on the fly
  compressed_stream = io.BytesIO()
  compressor = zstd.ZstdCompressor(level=LOG_COMPRESSION_LEVEL)

  with open(filepath, "rb") as f:
    compressor.copy_stream(f, compressed_stream)
    compressed_size = compressed_stream.tell()
    compressed_stream.seek(0)
    return compressed_stream, compressed_size
openpilot v0.9.9 release date: 2025-03-08T09:09:29 master commit: ce355250be726f9bc8f0ac165a6cde41586a983d 2025-03-08 09:09:31 +00:00			`import io`
openpilot v0.9.4 release date: 2023-07-27T18:38:32 master commit: fa310d9e2542cf497d92f007baec8fd751ffa99c 2023-09-27 15:45:31 -07:00			`import os`
			`import tempfile`
openpilot v0.9.6 release date: 2024-02-21T23:02:42 master commit: 0b4d08fab8e35a264bc7383e878538f8083c33e5 2024-02-21 23:02:43 +00:00			`import contextlib`
openpilot v0.9.9 release date: 2025-03-08T09:09:29 master commit: ce355250be726f9bc8f0ac165a6cde41586a983d 2025-03-08 09:09:31 +00:00			`import zstandard as zstd`

			`LOG_COMPRESSION_LEVEL = 10 # little benefit up to level 15. level ~17 is a small step change`
openpilot v0.9.4 release date: 2023-07-27T18:38:32 master commit: fa310d9e2542cf497d92f007baec8fd751ffa99c 2023-09-27 15:45:31 -07:00

			`class CallbackReader:`
			`"""Wraps a file, but overrides the read method to also`
			`call a callback function with the number of bytes read so far."""`
			`def __init__(self, f, callback, *args):`
			`self.f = f`
			`self.callback = callback`
			`self.cb_args = args`
			`self.total_read = 0`

			`def __getattr__(self, attr):`
			`return getattr(self.f, attr)`

			`def read(self, args, *kwargs):`
			`chunk = self.f.read(args, *kwargs)`
			`self.total_read += len(chunk)`
			`self.callback(*self.cb_args, self.total_read)`
			`return chunk`


openpilot v0.9.6 release date: 2024-02-21T23:02:42 master commit: 0b4d08fab8e35a264bc7383e878538f8083c33e5 2024-02-21 23:02:43 +00:00			`@contextlib.contextmanager`
openpilot v0.9.7 release date: 2024-06-11T01:36:39 master commit: f8cb04e4a8b032b72a909f68b808a50936184bee 2024-06-11 01:36:40 +00:00			`def atomic_write_in_dir(path: str, mode: str = 'w', buffering: int = -1, encoding: str = None, newline: str = None,`
openpilot v0.9.6 release date: 2024-02-21T23:02:42 master commit: 0b4d08fab8e35a264bc7383e878538f8083c33e5 2024-02-21 23:02:43 +00:00			`overwrite: bool = False):`
			`"""Write to a file atomically using a temporary file in the same directory as the destination file."""`
			`dir_name = os.path.dirname(path)`
openpilot v0.9.4 release date: 2023-07-27T18:38:32 master commit: fa310d9e2542cf497d92f007baec8fd751ffa99c 2023-09-27 15:45:31 -07:00
openpilot v0.9.6 release date: 2024-02-21T23:02:42 master commit: 0b4d08fab8e35a264bc7383e878538f8083c33e5 2024-02-21 23:02:43 +00:00			`if not overwrite and os.path.exists(path):`
			`raise FileExistsError(f"File '{path}' already exists. To overwrite it, set 'overwrite' to True.")`
openpilot v0.9.4 release date: 2023-07-27T18:38:32 master commit: fa310d9e2542cf497d92f007baec8fd751ffa99c 2023-09-27 15:45:31 -07:00
openpilot v0.9.6 release date: 2024-02-21T23:02:42 master commit: 0b4d08fab8e35a264bc7383e878538f8083c33e5 2024-02-21 23:02:43 +00:00			`with tempfile.NamedTemporaryFile(mode=mode, buffering=buffering, encoding=encoding, newline=newline, dir=dir_name, delete=False) as tmp_file:`
			`yield tmp_file`
			`tmp_file_name = tmp_file.name`
			`os.replace(tmp_file_name, path)`
openpilot v0.9.9 release date: 2025-03-08T09:09:29 master commit: ce355250be726f9bc8f0ac165a6cde41586a983d 2025-03-08 09:09:31 +00:00

			`def get_upload_stream(filepath: str, should_compress: bool) -> tuple[io.BufferedIOBase, int]:`
			`if not should_compress:`
			`file_size = os.path.getsize(filepath)`
			`file_stream = open(filepath, "rb")`
			`return file_stream, file_size`

			`# Compress the file on the fly`
			`compressed_stream = io.BytesIO()`
			`compressor = zstd.ZstdCompressor(level=LOG_COMPRESSION_LEVEL)`

			`with open(filepath, "rb") as f:`
			`compressor.copy_stream(f, compressed_stream)`
			`compressed_size = compressed_stream.tell()`
			`compressed_stream.seek(0)`
			`return compressed_stream, compressed_size`