Skip to content

Commit cf4dbf9

Browse files
authored
Merge pull request #100 from adf-python/feature/99
feat: カーネル接続の確認機能を追加し、タイムアウト設定を導入
2 parents 34856e8 + edcaf71 commit cf4dbf9

File tree

4 files changed

+87
-7
lines changed

4 files changed

+87
-7
lines changed

adf_core_python/core/launcher/agent_launcher.py

+29-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import importlib
2+
import socket
23
import threading
34
from typing import Optional
45

@@ -59,13 +60,27 @@ def init_connector(self) -> None:
5960
def launch(self) -> None:
6061
kernel_host: str = self.config.get_value(ConfigKey.KEY_KERNEL_HOST, "localhost")
6162
kernel_port: int = self.config.get_value(ConfigKey.KEY_KERNEL_PORT, 27931)
62-
self.logger.info(
63-
f"Start agent launcher (host: {kernel_host}, port: {kernel_port})"
64-
)
6563

6664
component_launcher: ComponentLauncher = ComponentLauncher(
6765
kernel_host, kernel_port, self.logger
6866
)
67+
timeout: int = self.config.get_value(
68+
ConfigKey.KEY_KERNEL_TIMEOUT,
69+
30,
70+
)
71+
if component_launcher.check_kernel_connection(timeout=timeout):
72+
self.logger.info(
73+
f"Kernel is running (host: {kernel_host}, port: {kernel_port})"
74+
)
75+
else:
76+
self.logger.error(
77+
f"Kernel is not running (host: {kernel_host}, port: {kernel_port})"
78+
)
79+
return
80+
81+
self.logger.info(
82+
f"Start agent launcher (host: {kernel_host}, port: {kernel_port})"
83+
)
6984

7085
gateway_launcher: Optional[GatewayLauncher] = None
7186
gateway_flag: bool = self.config.get_value(ConfigKey.KEY_GATEWAY_FLAG, False)
@@ -104,3 +119,14 @@ def connect() -> None:
104119

105120
for thread in self.agent_thread_list:
106121
thread.join()
122+
123+
def check_kernel_connection(self, host: str, port: int, timeout: int = 5) -> bool:
124+
try:
125+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
126+
sock.settimeout(timeout)
127+
result = sock.connect_ex((host, port))
128+
sock.close()
129+
return result == 0
130+
except Exception as e:
131+
self.logger.error(f"カーネルへの接続確認中にエラーが発生しました: {e}")
132+
return False

adf_core_python/core/launcher/config_key.py

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ class ConfigKey:
66
KEY_LOADER_CLASS: Final[str] = "adf_core_python.launcher.loader"
77
KEY_KERNEL_HOST: Final[str] = "kernel.host"
88
KEY_KERNEL_PORT: Final[str] = "kernel.port"
9+
KEY_KERNEL_TIMEOUT: Final[str] = "kernel.timeout"
910
KEY_TEAM_NAME: Final[str] = "team.name"
1011
KEY_DEBUG_FLAG: Final[str] = "adf.debug.flag"
1112
KEY_DEVELOP_FLAG: Final[str] = "adf.develop.flag"

adf_core_python/core/launcher/connect/component_launcher.py

+46
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import socket
2+
import time
23

34
from structlog import BoundLogger
45

@@ -53,3 +54,48 @@ def connect(self, agent: Agent, _request_id: int) -> None:
5354
def generate_request_id(self) -> int:
5455
self.request_id += 1
5556
return self.request_id
57+
58+
def check_kernel_connection(
59+
self, timeout: int = 30, retry_interval: float = 5.0
60+
) -> bool:
61+
"""Attempts to connect to the kernel multiple times within the specified timeout period.
62+
63+
Args:
64+
timeout (int): Total timeout duration in seconds
65+
retry_interval (float): Interval between retry attempts in seconds
66+
67+
Returns:
68+
bool: True if connection successful, False otherwise
69+
"""
70+
start_time = time.time()
71+
attempt = 1
72+
73+
while True:
74+
try:
75+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
76+
sock.settimeout(retry_interval)
77+
result = sock.connect_ex((self.host, self.port))
78+
sock.close()
79+
80+
if result == 0:
81+
self.logger.info(
82+
f"Successfully connected to kernel (attempt: {attempt})"
83+
)
84+
return True
85+
86+
elapsed_time = time.time() - start_time
87+
if elapsed_time >= timeout:
88+
self.logger.error(
89+
f"Timeout: Could not connect to kernel within {timeout} seconds (attempts: {attempt})"
90+
)
91+
return False
92+
93+
self.logger.debug(
94+
f"Connection attempt {attempt} failed - retrying in {retry_interval} seconds"
95+
)
96+
time.sleep(retry_interval)
97+
attempt += 1
98+
99+
except Exception as e:
100+
self.logger.error(f"Error while checking kernel connection: {e}")
101+
return False

adf_core_python/launcher.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,8 @@ def __init__(
1212
self,
1313
launcher_config_file: str,
1414
) -> None:
15-
resource.setrlimit(resource.RLIMIT_NOFILE, (8192, 9223372036854775807))
15+
resource.setrlimit(resource.RLIMIT_NOFILE, (8192, 1048576))
1616

17-
configure_logger()
18-
19-
self.logger = get_logger(__name__)
2017
self.launcher_config = Config(launcher_config_file)
2118

2219
parser = argparse.ArgumentParser(description="Agent Launcher")
@@ -80,6 +77,12 @@ def __init__(
8077
action="store_true",
8178
help="precompute flag",
8279
)
80+
parser.add_argument(
81+
"--timeout",
82+
type=int,
83+
help="timeout in seconds",
84+
metavar="",
85+
)
8386
parser.add_argument("--debug", action="store_true", help="debug flag")
8487
parser.add_argument(
8588
"--java",
@@ -98,6 +101,7 @@ def __init__(
98101
ConfigKey.KEY_FIRE_STATION_COUNT: args.firestation,
99102
ConfigKey.KEY_POLICE_OFFICE_COUNT: args.policeoffice,
100103
ConfigKey.KEY_PRECOMPUTE: args.precompute,
104+
ConfigKey.KEY_KERNEL_TIMEOUT: args.timeout,
101105
ConfigKey.KEY_DEBUG_FLAG: args.debug,
102106
ConfigKey.KEY_GATEWAY_FLAG: args.java,
103107
}
@@ -106,6 +110,9 @@ def __init__(
106110
if value is not None:
107111
self.launcher_config.set_value(key, value)
108112

113+
configure_logger()
114+
self.logger = get_logger(__name__)
115+
109116
self.logger.debug(f"launcher_config: {self.launcher_config}")
110117

111118
def launch(self) -> None:

0 commit comments

Comments
 (0)