2025-04-18 20:38:55 +09:00
|
|
|
import unittest
|
2025-06-13 15:59:36 +09:00
|
|
|
from tinygrad import Tensor
|
2025-04-18 20:38:55 +09:00
|
|
|
from tinygrad.helpers import Context
|
2025-06-13 15:59:36 +09:00
|
|
|
from tinygrad.uop.ops import Ops
|
2025-04-18 20:38:55 +09:00
|
|
|
|
|
|
|
class TestRingAllReduce(unittest.TestCase):
|
2025-06-13 15:59:36 +09:00
|
|
|
@unittest.skip("still broken")
|
2025-04-18 20:38:55 +09:00
|
|
|
def test_schedule_ring(self):
|
|
|
|
with Context(RING=2):
|
2025-06-13 15:59:36 +09:00
|
|
|
N = 4
|
|
|
|
ds = tuple(f"CPU:{i}" for i in range(N))
|
2025-04-18 20:38:55 +09:00
|
|
|
t = Tensor.empty(N, N*100).shard(ds, axis=0).realize()
|
|
|
|
schedules = t.sum(0).schedule_with_vars()[0]
|
|
|
|
copies = [si for si in schedules if si.ast.op is Ops.COPY]
|
|
|
|
pairs = [(c.bufs[0].device, c.bufs[1].device) for c in copies]
|
|
|
|
# N*(N-1) scatter reduce, and N*(N-1) allgather
|
|
|
|
self.assertEqual(len(pairs), N*(N-1)*2)
|
|
|
|
# copy topology forms a ring
|
|
|
|
self.assertEqual(len(set(pairs)), N)
|
|
|
|
|
2025-06-13 15:59:36 +09:00
|
|
|
def test_correct_ring(self):
|
|
|
|
with Context(RING=2):
|
|
|
|
N = 4
|
|
|
|
ds = tuple(f"CPU:{i}" for i in range(N))
|
|
|
|
t = Tensor.ones(N, N*100).contiguous().shard(ds, axis=0).realize()
|
|
|
|
out = t.sum(0)
|
|
|
|
self.assertListEqual(out.tolist(), [4]*N*100)
|
|
|
|
|
2025-04-18 20:38:55 +09:00
|
|
|
if __name__ == '__main__':
|
|
|
|
unittest.main()
|