Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/zeroband/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class DataConfig(BaseConfig):
num_workers: int = 1
max_train_samples: int | None = None
max_eval_samples: int | None = None
dataset_ratio: str | None = None
dataset_ratio: str = "100"
data_rank: int | None = None
data_world_size: int | None = None
reverse_data_files: bool = False
Expand Down
1 change: 1 addition & 0 deletions src/zeroband/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import zlib
from dataclasses import asdict
from logging import Logger
from typing import TYPE_CHECKING, Optional, Iterator, List, Dict, Tuple

Check failure on line 7 in src/zeroband/train.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

src/zeroband/train.py:7:67: F401 `typing.Tuple` imported but unused

import torch
import torch.distributed as dist
Expand Down Expand Up @@ -632,6 +632,7 @@

num_inner_steps = config.diloco.inner_steps if config.diloco is not None else 1

logger.info(f"Attempting to connect PCCL to {config.pccl.ccoip_host}")
# initialize PCCL
communicator = Communicator(config.pccl.ccoip_host, mpi_config.mpi_rank if mpi_config is not None else 0)
communicator.connect(n_attempts=15)
Expand Down
Loading