Ty checks passed on examples/client_selection; migrated AFL to new server API.

baochunli · baochunli · commit d872c0a69eb2 · 2025-10-25T23:40:14.000Z
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -8,7 +8,7 @@ theme:
     repo: fontawesome/brands/github
   font:
     text: Inter
-    code: IBM Plex Mono
+    code: Google Sans Code 
   palette:
     # Light mode
     - scheme: astral-light
diff --git a/examples/client_selection/afl/afl_callbacks.py b/examples/client_selection/afl/afl_callbacks.py
@@ -5,7 +5,8 @@
 from __future__ import annotations
 
 import logging
-from collections.abc import Iterable
+from collections.abc import Iterable, Sized
+from typing import Any
 
 import torch
 
@@ -86,12 +87,11 @@ def on_train_epoch_start(self, trainer, config, **kwargs):
         self._recorded = True
 
     @staticmethod
-    def _has_batches(loader: Iterable) -> bool:
+    def _has_batches(loader: Iterable[Any] | Sized) -> bool:
         """Best-effort check that the data loader yields at least one batch."""
-        length = None
-        if hasattr(loader, "__len__"):
+        if isinstance(loader, Sized):
             try:
-                length = len(loader)
+                return len(loader) > 0
             except TypeError:
-                length = None
-        return bool(length) if length is not None else True
+                return True
+        return True
diff --git a/examples/client_selection/afl/afl_server.py b/examples/client_selection/afl/afl_server.py
@@ -1,7 +1,8 @@
 """
-A federated learning server using Active Federated Learning, where in each round
-clients are selected not uniformly at random, but with a probability conditioned
-on the current model, as well as the data on the client, to maximize efficiency.
+A federated learning server using Active Federated Learning with the
+strategy-based server API.
+
+Clients are sampled according to valuation metrics computed on the client.
 
 Reference:
 
@@ -10,110 +11,33 @@
 https://arxiv.org/pdf/1909.12641.pdf
 """
 
-import logging
-import math
-import random
-
-import numpy as np
+from __future__ import annotations
 
 from plato.config import Config
 from plato.servers import fedavg
 
+from afl_selection_strategy import AFLSelectionStrategy
+
 
 class Server(fedavg.Server):
-    """A federated learning server using the AFL algorithm."""
+    """An AFL server configured with the strategy-based client selection API."""
 
     def __init__(
         self, model=None, datasource=None, algorithm=None, trainer=None, callbacks=None
     ):
+        algo_cfg = getattr(Config(), "algorithm", None)
+
+        selection_strategy = AFLSelectionStrategy(
+            alpha1=getattr(algo_cfg, "alpha1", 0.75) if algo_cfg else 0.75,
+            alpha2=getattr(algo_cfg, "alpha2", 0.01) if algo_cfg else 0.01,
+            alpha3=getattr(algo_cfg, "alpha3", 0.1) if algo_cfg else 0.1,
+        )
+
         super().__init__(
             model=model,
             datasource=datasource,
             algorithm=algorithm,
             trainer=trainer,
             callbacks=callbacks,
+            client_selection_strategy=selection_strategy,
         )
-
-        self.local_values = {}
-
-    def weights_aggregated(self, updates):
-        """Extract required information from client reports after aggregating weights."""
-        for update in updates:
-            self.local_values[update.client_id]["valuation"] = update.report.valuation
-
-    def calc_sample_distribution(self, clients_pool):
-        """Calculate the sampling probability of each client for the next round."""
-        # First, initialize valuations and probabilities when new clients are connected
-        for client_id in clients_pool:
-            if client_id not in self.local_values:
-                self.local_values[client_id] = {}
-                self.local_values[client_id]["valuation"] = -float("inf")
-                self.local_values[client_id]["prob"] = 0.0
-
-        # For a proportion of clients with the smallest valuations, reset these valuations
-        # to negative infinities
-        num_smallest = int(Config().algorithm.alpha1 * len(clients_pool))
-        smallest_valuations = dict(
-            sorted(self.local_values.items(), key=lambda item: item[1]["valuation"])[
-                :num_smallest
-            ]
-        )
-
-        for client_id in smallest_valuations.keys():
-            self.local_values[client_id]["valuation"] = -float("inf")
-
-        for client_id in clients_pool:
-            self.local_values[client_id]["prob"] = math.exp(
-                Config().algorithm.alpha2 * self.local_values[client_id]["valuation"]
-            )
-
-    def choose_clients(self, clients_pool, clients_count):
-        """Choose a subset of the clients to participate in each round."""
-        assert clients_count <= len(clients_pool)
-        random.setstate(self.prng_state)
-        # Update the clients sampling distribution
-        self.calc_sample_distribution(clients_pool)
-
-        # 1. Sample a subset of the clients according to the sampling distribution
-        num1 = int(math.floor((1 - Config().algorithm.alpha3) * clients_count))
-        weighted_candidates = [
-            client_id
-            for client_id in clients_pool
-            if self.local_values[client_id]["prob"] > 0.0
-        ]
-        num1 = min(num1, len(weighted_candidates))
-
-        subset1 = []
-        if num1 > 0:
-            probs = np.array(
-                [
-                    self.local_values[client_id]["prob"]
-                    for client_id in weighted_candidates
-                ]
-            )
-            total_prob = probs.sum()
-            if total_prob <= 0:
-                probs = np.ones(len(weighted_candidates), dtype=float) / len(
-                    weighted_candidates
-                )
-            else:
-                probs = probs / total_prob
-            subset1 = np.random.choice(
-                weighted_candidates, num1, p=probs, replace=False
-            ).tolist()
-
-        # 2. Sample a subset of the remaining clients uniformly at random
-        num2 = clients_count - len(subset1)
-        remaining = clients_pool.copy()
-
-        for client_id in subset1:
-            remaining.remove(client_id)
-
-        subset2 = random.sample(remaining, num2) if num2 > 0 else []
-
-        # 3. Selected clients are the union of these two subsets
-        selected_clients = subset1 + subset2
-
-        self.prng_state = random.getstate()
-        logging.info("[%s] Selected clients: %s", self, selected_clients)
-        return selected_clients
diff --git a/examples/client_selection/afl/afl_server_strategy.py b/examples/client_selection/afl/afl_server_strategy.py
@@ -1,40 +1,10 @@
 """
-A federated learning server using Active Federated Learning with strategy pattern.
+Legacy entry point for the AFL server using the strategy-based API.
 
-This is the updated version using the strategy-based API instead of inheritance.
-
-Reference:
-
-Goetz et al., "Active Federated Learning", 2019.
-
-https://arxiv.org/pdf/1909.12641.pdf
+This module simply re-exports the server defined in ``afl_server`` to avoid
+breaking older entry points.
 """
 
-from plato.config import Config
-from plato.servers import fedavg
-from plato.servers.strategies import AFLSelectionStrategy
-
-
-class Server(fedavg.Server):
-    """A federated learning server using the AFL client selection strategy."""
-
-    def __init__(
-        self, model=None, datasource=None, algorithm=None, trainer=None, callbacks=None
-    ):
-        # Load AFL parameters from config
-        alpha1 = Config().algorithm.alpha1
-        alpha2 = Config().algorithm.alpha2
-        alpha3 = Config().algorithm.alpha3
+from afl_server import Server
 
-        super().__init__(
-            model=model,
-            datasource=datasource,
-            algorithm=algorithm,
-            trainer=trainer,
-            callbacks=callbacks,
-            client_selection_strategy=AFLSelectionStrategy(
-                alpha1=alpha1,
-                alpha2=alpha2,
-                alpha3=alpha3,
-            ),
-        )
+__all__ = ["Server"]
diff --git a/examples/client_selection/afl/afl_strategy.py b/examples/client_selection/afl/afl_strategy.py
@@ -11,13 +11,13 @@
 """
 
 import afl_client
-import afl_server_strategy
+import afl_server
 
 
 def main():
     """A Plato federated learning training session using AFL strategy."""
     client = afl_client.create_client()
-    server = afl_server_strategy.Server()
+    server = afl_server.Server()
     server.run(client)
 
 
diff --git a/examples/client_selection/oort/oort_trainer.py b/examples/client_selection/oort/oort_trainer.py
@@ -8,26 +8,29 @@
 (OSDI 2021), July 2021.
 """
 
+from __future__ import annotations
+
 import numpy as np
 import torch
 from torch import nn
 
 from plato.trainers.composable import ComposableTrainer
 from plato.trainers.strategies.base import LossCriterionStrategy
+from plato.trainers.tracking import RunHistory
 
 
 class OortLossStrategy(LossCriterionStrategy):
     """Loss strategy for Oort that tracks sum of squared per-sample losses."""
 
     def __init__(self):
-        self._criterion = None
-        self._run_history = None
+        self._criterion: nn.CrossEntropyLoss | None = None
+        self._run_history: RunHistory | None = None
 
     def setup(self, context):
         """Initialize the loss criterion."""
         self._criterion = nn.CrossEntropyLoss(reduction="none")
 
-    def attach_run_history(self, run_history):
+    def attach_run_history(self, run_history: RunHistory) -> None:
         """Attach run history for metric tracking."""
         self._run_history = run_history
 
@@ -38,6 +41,11 @@ def compute_loss(self, outputs, labels, context):
         This computes per-sample losses, tracks the sum of squares
         (used by Oort for client selection), and returns the mean loss.
         """
+        if self._criterion is None:
+            raise RuntimeError(
+                "OortLossStrategy has not been initialised. Did you call setup()?"
+            )
+
         per_sample_loss = self._criterion(outputs, labels)
 
         if self._run_history is not None:
@@ -71,5 +79,6 @@ def __init__(self, model=None, callbacks=None):
             loss_strategy=loss_strategy,
         )
 
-        if hasattr(self.loss_strategy, "attach_run_history"):
-            self.loss_strategy.attach_run_history(self.run_history)
+        attach_run_history = getattr(self.loss_strategy, "attach_run_history", None)
+        if callable(attach_run_history):
+            attach_run_history(self.run_history)
diff --git a/examples/client_selection/pisces/pisces_aggregation_strategy.py b/examples/client_selection/pisces/pisces_aggregation_strategy.py
@@ -9,8 +9,6 @@
 import asyncio
 import logging
 from types import SimpleNamespace
-from typing import Dict, List
-
 import numpy as np
 
 from plato.config import Config
@@ -52,10 +50,22 @@ async def aggregate_deltas(
 
         total_samples = sum(update.report.num_samples for update in updates)
 
-        avg_update = {
-            name: context.trainer.zeros(delta.shape)
-            for name, delta in deltas_received[0].items()
-        }
+        trainer = getattr(context, "trainer", None)
+        zeros_fn = getattr(trainer, "zeros", None) if trainer is not None else None
+
+        avg_update = {}
+        for name, delta in deltas_received[0].items():
+            if callable(zeros_fn):
+                avg_update[name] = zeros_fn(delta.shape)
+            elif hasattr(delta, "clone") and callable(getattr(delta, "clone")):
+                cloned = delta.clone()
+                if hasattr(cloned, "zero_") and callable(getattr(cloned, "zero_")):
+                    cloned.zero_()
+                    avg_update[name] = cloned
+                else:
+                    avg_update[name] = delta * 0
+            else:
+                avg_update[name] = np.zeros_like(delta)
 
         for i, delta in enumerate(deltas_received):
             client_id = updates[i].client_id
diff --git a/examples/client_selection/pisces/pisces_trainer.py b/examples/client_selection/pisces/pisces_trainer.py
@@ -9,23 +9,31 @@
 URL: https://arxiv.org/abs/2206.09264
 """
 
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import Any
+
+import torch
+
 from plato.trainers import loss_criterion
 from plato.trainers.composable import ComposableTrainer
 from plato.trainers.strategies.base import LossCriterionStrategy
+from plato.trainers.tracking import RunHistory
 
 
 class PiscesLossStrategy(LossCriterionStrategy):
     """Loss strategy for Pisces that tracks per-batch loss values."""
 
     def __init__(self):
-        self._criterion = None
-        self._run_history = None
+        self._criterion: Callable[[Any, Any], torch.Tensor] | None = None
+        self._run_history: RunHistory | None = None
 
     def setup(self, context):
         """Initialize the loss criterion."""
         self._criterion = loss_criterion.get()
 
-    def attach_run_history(self, run_history):
+    def attach_run_history(self, run_history: RunHistory) -> None:
         """Attach run history for metric tracking."""
         self._run_history = run_history
 
@@ -36,6 +44,11 @@ def compute_loss(self, outputs, labels, context):
         This computes the batch loss and stores it in run_history
         for Pisces client selection algorithm.
         """
+        if self._criterion is None:
+            raise RuntimeError(
+                "PiscesLossStrategy has not been initialised. Did you call setup()?"
+            )
+
         per_batch_loss = self._criterion(outputs, labels)
 
         current_epoch = getattr(context, "current_epoch", 1)
@@ -67,5 +80,6 @@ def __init__(self, model=None, callbacks=None):
             loss_strategy=loss_strategy,
         )
 
-        if hasattr(self.loss_strategy, "attach_run_history"):
-            self.loss_strategy.attach_run_history(self.run_history)
+        attach_run_history = getattr(self.loss_strategy, "attach_run_history", None)
+        if callable(attach_run_history):
+            attach_run_history(self.run_history)
diff --git a/examples/client_selection/polaris/polaris_selection_strategy.py b/examples/client_selection/polaris/polaris_selection_strategy.py
diff --git a/plato/datasources/base.py b/plato/datasources/base.py
diff --git a/plato/datasources/femnist.py b/plato/datasources/femnist.py