Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
3a0e2e5
Feature/SK-1574 | Change TaskStream to task polling with Unary calls …
carl-andersson May 19, 2025
f8792c1
Feature/SK-1612 | Tracking model_updates with correlation_id (#926)
carl-andersson May 20, 2025
d7a3efe
Merge remote-tracking branch 'origin/master' into feature/SK-1336
carl-andersson May 20, 2025
77779f9
Fixed merge error
carl-andersson May 20, 2025
f847225
Feature/SK-1622 | Rework timeout and canceling on combiner (#929)
carl-andersson Jun 2, 2025
5e2eb07
Merge remote-tracking branch 'origin/master' into feature/SK-1336
carl-andersson Jun 4, 2025
f3954b2
use model_id in FileChunk message instead of metadata
Wrede Jun 5, 2025
d6c4f2b
fix import
Wrede Jun 5, 2025
de712b0
Merge branch 'master' into feature/SK-1336
Wrede Jun 10, 2025
04fc453
Merge branch 'master' into feature/SK-1336
Wrede Jun 12, 2025
23cdf7e
Feature/SK-1648 | Split controller and api-server (#946)
carl-andersson Jun 24, 2025
651badb
Merge remote-tracking branch 'origin/master' into feature/SK-1336
carl-andersson Jun 26, 2025
487f177
Fix
carl-andersson Jun 26, 2025
090e513
Removed test on api since controller is not running
carl-andersson Jun 26, 2025
2e7f0c8
Feature/SK-1666 | Adding control flow to controller and combiner (#959)
carl-andersson Jun 30, 2025
f990612
fix
Wrede Jul 2, 2025
bc5787f
Changed from '_' to '-' in Update metadata
carl-andersson Jul 2, 2025
ec9191c
Merge remote-tracking branch 'origin/master' into feature/SK-1336
carl-andersson Sep 8, 2025
c4bf79b
Feature/SK-1683 | Cleaning model handling within Fedn (#989)
carl-andersson Sep 16, 2025
00f2464
Feature/SK-1659 | Enable combiner to control tasks on client (#991)
carl-andersson Sep 16, 2025
018a9e3
Feature/SK-000 | Add config to create bucket and get region from AWS …
Wrede Sep 15, 2025
a32e151
Fixed some badly configured calls to the client (#995)
carl-andersson Sep 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion .ci/tests/examples/print_logs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,23 @@ if [ "$service" == "api-server" ]; then
fi

if [ "$service" == "combiner" ]; then
echo "Reducer logs"
echo "Combiner logs"
docker logs "$(basename $PWD)-combiner-1"
exit 0
fi

if [ "$service" == "controller" ]; then
echo "Controller logs"
docker logs "$(basename $PWD)-controller-1"
exit 0
fi

if [ "$service" == "hooks" ]; then
echo "Hooks logs"
docker logs "hook"
exit 0
fi

if [ "$service" == "client" ]; then
echo "Client 0 logs"
if [ "$example" == "mnist-keras" ]; then
Expand Down
5 changes: 4 additions & 1 deletion .ci/tests/examples/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ else
docker compose \
-f ../../docker-compose.yaml \
-f docker-compose.override.yaml \
up -d --build combiner api-server mongo minio client1
up -d --build combiner controller api-server hooks mongo minio client1
fi

# add server functions to python path to import server functions code
Expand All @@ -40,6 +40,9 @@ python ../../.ci/tests/examples/wait_for.py reducer
>&2 echo "Wait for combiners to connect"
python ../../.ci/tests/examples/wait_for.py combiners

>&2 echo "Wait for controller to connect"
python ../../.ci/tests/examples/wait_for.py controller

>&2 echo "Upload compute package"
python ../../.ci/tests/examples/api_test.py set_package --path package.tgz --helper "$helper" --name test

Expand Down
17 changes: 17 additions & 0 deletions .ci/tests/examples/wait_for.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,20 @@ def _test_nodes(n_nodes, node_type, reducer_host='localhost', reducer_port='8092
_eprint(f'Request exception enconuntered: {e}.')
return False

def _test_controller(reducer_host='localhost', reducer_port='8092'):
try:
response = requests.get(
f'http://{reducer_host}:{reducer_port}/get_controller_status', verify=False)

if response.status_code == 200:
data = json.loads(response.content)
_eprint(f'Controller is running: {data}')
return True

except Exception as e:
_eprint(f'Request exception encountered: {e}.')
return False


def rounds(n_rounds=3):
assert (_retry(_test_rounds, n_rounds=n_rounds))
Expand All @@ -79,6 +93,9 @@ def combiners(n_combiners=1):
def reducer():
assert (_retry(_test_nodes, n_nodes=1, node_type='reducer'))

def controller():
assert (_retry(_test_controller))


if __name__ == '__main__':
fire.Fire()
2 changes: 2 additions & 0 deletions .github/workflows/code-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ jobs:
--exclude-dir='flower-client'
--exclude='tests.py'
--exclude='controller_cmd.py'
--exclude='api_server_cmd.py'
--exclude='fedn_pb2_grpc.py'
--exclude='fedn_pb2.pyi'
--exclude='combiner_cmd.py'
--exclude='run_cmd.py'
--exclude='README.rst'
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,15 @@ jobs:
- name: print logs combiner
if: failure()
run: .ci/tests/examples/print_logs.sh combiner ${{ matrix.to_test }}

- name: print logs controller
if: failure()
run: .ci/tests/examples/print_logs.sh controller ${{ matrix.to_test }}

- name: print logs hooks
if: failure()
run: .ci/tests/examples/print_logs.sh hooks ${{ matrix.to_test }}

- name: print logs client
if: failure()
run: .ci/tests/examples/print_logs.sh client ${{ matrix.to_test }}
Expand All @@ -63,3 +71,5 @@ jobs:
- name: print logs minio
if: failure()
run: .ci/tests/examples/print_logs.sh minio ${{ matrix.to_test }}


4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ config/settings-combiner.yaml
config/extra-hosts-client.yaml
config/extra-hosts-reducer.yaml
config/settings-client.yaml
config/settings-reducer.yaml
config/settings-combiner.yaml
config/settings-api-server.yaml
config/settings-controller.yaml
config/settings-hooks.yaml

./tmp/*
Expand Down
3 changes: 2 additions & 1 deletion Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ COPY . /app
COPY config/settings-client.yaml.template /app/config/settings-client.yaml
COPY config/settings-combiner.yaml.template /app/config/settings-combiner.yaml
COPY config/settings-hooks.yaml.template /app/config/settings-hooks.yaml
COPY config/settings-reducer.yaml.template /app/config/settings-reducer.yaml
COPY config/settings-api-server.yaml.template /app/config/settings-api-server.yaml
COPY config/settings-controller.yaml.template /app/config/settings-controller.yaml
COPY $REQUIREMENTS /app/config/requirements.txt

# Install developer tools (needed for psutil)
Expand Down
2 changes: 1 addition & 1 deletion config/reducer-settings.override.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ services:
reducer:
volumes:
- ${HOST_REPO_DIR:-.}:/app
- ${HOST_REPO_DIR:-.}/config/settings-reducer.yaml:/app/config/settings-reducer.yaml
- ${HOST_REPO_DIR:-.}/config/settings-api-server.yaml:/app/config/settings-api-server.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
network_id: fedn-network
controller:
api:
host: api-server
port: 8092
debug: True

controller:
host: controller
port: 12090

statestore:
# Available DB types are MongoDB, PostgreSQL, SQLite
type: MongoDB
Expand Down
2 changes: 1 addition & 1 deletion config/settings-controller.yaml.local.template
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
network_id: fedn-network
controller:
api:
host: localhost
port: 8092
debug: True
Expand Down
35 changes: 35 additions & 0 deletions config/settings-controller.yaml.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
network_id: fedn-network

api:
host: api-server
port: 8092

controller:
host: controller
port: 12090
debug: True

statestore:
# Available DB types are MongoDB, PostgreSQL, SQLite
type: MongoDB
mongo_config:
username: fedn_admin
password: password
host: mongo
port: 6534
postgres_config:
username: fedn_admin
password: password
host: fedn_postgres
port: 5432

storage:
storage_type: BOTO3
storage_config:
storage_endpoint_url: http://minio:9000
storage_access_key: fedn_admin
storage_secret_key: password
storage_bucket: fedn-models
context_bucket: fedn-context
storage_secure_mode: False
storage_verify_ssl: False
36 changes: 33 additions & 3 deletions docker-compose.dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ services:
- USER=test
- PROJECT=project
- FLASK_DEBUG=1
- STATESTORE_CONFIG=/app/config/settings-reducer.yaml.template
- MODELSTORAGE_CONFIG=/app/config/settings-reducer.yaml.template
- STATESTORE_CONFIG=/app/config/settings-api-server.yaml.template
- MODELSTORAGE_CONFIG=/app/config/settings-api-server.yaml.template
- FEDN_COMPUTE_PACKAGE_DIR=/app
- TMPDIR=/app/tmp
build:
Expand All @@ -84,11 +84,41 @@ services:
- mongo
- fedn_postgres
command:
- controller
- api-server
- start
ports:
- 8092:8092

controller:
environment:
- PYTHONUNBUFFERED=0
- GET_HOSTS_FROM=dns
- STATESTORE_CONFIG=/app/config/settings-controller.yaml.template
- MODELSTORAGE_CONFIG=/app/config/settings-controller.yaml.template
- TMPDIR=/app/tmp
build:
context: .
args:
BASE_IMG: ${BASE_IMG:-python:3.12-slim}
working_dir: /app
volumes:
- ${HOST_REPO_DIR:-.}/fedn:/app/fedn
healthcheck:
test: [ "CMD", "/app/grpc_health_probe", "-addr=localhost:12090" ]
interval: 20s
timeout: 10s
retries: 5
depends_on:
- minio
- mongo
command:
- controller
- start
- --init
- config/settings-controller.yaml.template
ports:
- 12090:12090

# Combiner
combiner:
environment:
Expand Down
40 changes: 37 additions & 3 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ services:
- USER=test
- PROJECT=project
- FLASK_DEBUG=1
- STATESTORE_CONFIG=/app/config/settings-reducer.yaml.template
- MODELSTORAGE_CONFIG=/app/config/settings-reducer.yaml.template
- STATESTORE_CONFIG=/app/config/settings-api-server.yaml.template
- MODELSTORAGE_CONFIG=/app/config/settings-api-server.yaml.template
- FEDN_COMPUTE_PACKAGE_DIR=/app
- TMPDIR=/app/tmp
build:
Expand All @@ -73,11 +73,43 @@ services:
- minio
- mongo
command:
- controller
- api-server
- start
ports:
- 8092:8092

controller:
environment:
- PYTHONUNBUFFERED=0
- GET_HOSTS_FROM=dns
- STATESTORE_CONFIG=/app/config/settings-controller.yaml.template
- MODELSTORAGE_CONFIG=/app/config/settings-controller.yaml.template
- TMPDIR=/app/tmp
build:
context: .
args:
BASE_IMG: ${BASE_IMG:-python:3.12-slim}
working_dir: /app
volumes:
- ${HOST_REPO_DIR:-.}/fedn:/app/fedn
healthcheck:
test: [ "CMD", "/app/grpc_health_probe", "-addr=localhost:12090" ]
interval: 20s
timeout: 10s
retries: 5
depends_on:
- minio
- mongo
command:
- controller
- start
- --init
- config/settings-controller.yaml.template
ports:
- 12090:12090



# Combiner
combiner:
environment:
Expand Down Expand Up @@ -110,6 +142,8 @@ services:
depends_on:
- api-server
- hooks
- controller

# Hooks
hooks:
container_name: hook
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
author = "Scaleout Systems AB"

# The full version, including alpha/beta/rc tags
release = "0.32.0"
release = "0.33.0"

# Add any Sphinx extension module names here, as strings
extensions = [
Expand Down
2 changes: 2 additions & 0 deletions examples/pytorch-keyworddetection-api/sc_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def train(self, model_params, settings):
for epoch in range(n_epochs):
sc_model.train()
for idx, (y_labels, x_spectrograms) in enumerate(dataloader_train):
self.check_task_abort()

optimizer.zero_grad()
_, logits = sc_model(x_spectrograms)

Expand Down
1 change: 1 addition & 0 deletions fedn/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .api_server_cmd import api_server_cmd # noqa: F401
from .client_cmd import client_cmd # noqa: F401
from .combiner_cmd import combiner_cmd # noqa: F401
from .config_cmd import config_cmd # noqa: F401
Expand Down
18 changes: 18 additions & 0 deletions fedn/cli/api_server_cmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import click

from fedn.cli.main import main


@main.group("api-server")
@click.pass_context
def api_server_cmd(ctx):
""":param ctx:"""
pass


@api_server_cmd.command("start")
@click.pass_context
def api_server_cmd(ctx):
from fedn.network.api.server import start_api_server # noqa: PLC0415

start_api_server()
30 changes: 27 additions & 3 deletions fedn/cli/controller_cmd.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import click

from fedn.cli.main import main
from fedn.cli.shared import apply_config
from fedn.common.config import get_modelstorage_config, get_network_config, get_statestore_config
from fedn.network.controller.control import Control
from fedn.network.storage.dbconnection import DatabaseConnection
from fedn.network.storage.s3.repository import Repository


@main.group("controller")
Expand All @@ -11,8 +16,27 @@ def controller_cmd(ctx):


@controller_cmd.command("start")
@click.option("-h", "--host", required=False, default="controller", help="Set hostname.")
@click.option("-i", "--port", required=False, default=12090, help="Set port.")
@click.option("-s", "--secure", is_flag=True, help="Enable SSL/TLS encrypted gRPC channels.")
@click.option("-in", "--init", required=False, default=None, help="Path to configuration file.")
@click.pass_context
def controller_cmd(ctx):
from fedn.network.api.server import start_server_api # noqa: PLC0415
def controller_cmd(ctx, host, port, secure, init):
config = {
"host": host,
"port": port,
"secure": secure,
}

if init:
apply_config(init, config)
click.echo(f"\nController configuration loaded from file: {init}")

network_id = get_network_config()
modelstorage_config = get_modelstorage_config()
statestore_config = get_statestore_config()

start_server_api()
db = DatabaseConnection(statestore_config, network_id)
repository = Repository(modelstorage_config["storage_config"], storage_type=modelstorage_config["storage_type"])
controller = Control(config, network_id, repository, db)
controller.run()
Loading
Loading