Skip to content

Commit b1bc816

Browse files
authored
Merge pull request #20 from ringoldsdev/chore/20250811/chunking-should-not-be-in-strategy
chore: strategy now doesn't concern with chunking
2 parents 16755d7 + feef2a6 commit b1bc816

File tree

6 files changed

+168
-201
lines changed

6 files changed

+168
-201
lines changed

laygo/transformers/strategies/http.py

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from collections.abc import Callable
2-
from collections.abc import Iterable
32
from collections.abc import Iterator
43
from concurrent.futures import FIRST_COMPLETED
54
from concurrent.futures import ThreadPoolExecutor
@@ -14,28 +13,51 @@
1413

1514

1615
class HTTPStrategy[In, Out](ExecutionStrategy[In, Out]):
17-
"""
18-
An execution strategy that sends data chunks to a remote HTTP worker.
19-
This is the CLIENT-SIDE implementation.
16+
"""An execution strategy that sends data chunks to a remote HTTP worker.
17+
18+
This is the CLIENT-SIDE implementation that sends chunks to a remote
19+
HTTP endpoint and receives the transformed results back.
2020
"""
2121

2222
def __init__(self, worker_url: Callable[[], str], max_workers: int = 8, timeout: int = 300):
23+
"""Initialize the HTTP strategy.
24+
25+
Args:
26+
worker_url: Function that returns the URL of the remote worker endpoint.
27+
max_workers: Maximum number of concurrent HTTP requests.
28+
timeout: Request timeout in seconds.
29+
"""
2330
self.worker_url = worker_url
2431
self.max_workers = max_workers
2532
self.timeout = timeout
2633
self.session = requests.Session()
2734

2835
def execute(
2936
self,
30-
transformer_logic: InternalTransformer[In, Out], # Note: This is ignored
31-
chunk_generator: Callable[[Iterable[In]], Iterator[list[In]]],
32-
data: Iterable[In],
33-
context: IContextManager, # Note: This is also ignored
34-
) -> Iterator[Out]:
35-
"""Sends data to the remote worker and yields results."""
37+
transformer_logic: InternalTransformer[In, Out], # Ignored for HTTP strategy
38+
chunks: Iterator[list[In]],
39+
context: IContextManager, # Ignored for HTTP strategy
40+
) -> Iterator[list[Out]]:
41+
"""Send data chunks to the remote worker and yield results.
42+
43+
Args:
44+
transformer_logic: Ignored - the remote worker has the transformation logic.
45+
chunks: Iterator of pre-chunked data.
46+
context: Ignored - context is handled by the remote worker.
47+
48+
Returns:
49+
Iterator of transformed chunks received from the remote worker.
50+
"""
3651

3752
def process_chunk(chunk: list[In]) -> list[Out]:
38-
"""Sends one chunk to the worker and returns the result."""
53+
"""Send one chunk to the worker and return the result.
54+
55+
Args:
56+
chunk: Data chunk to send to the remote worker.
57+
58+
Returns:
59+
Transformed chunk received from the remote worker.
60+
"""
3961
try:
4062
response = self.session.post(
4163
self.worker_url(),
@@ -52,12 +74,13 @@ def process_chunk(chunk: list[In]) -> list[Out]:
5274

5375
# Use a ThreadPoolExecutor to make concurrent HTTP requests
5476
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
55-
chunk_iterator = chunk_generator(data)
77+
chunk_iterator = iter(chunks)
5678
futures = {executor.submit(process_chunk, chunk) for chunk in itertools.islice(chunk_iterator, self.max_workers)}
79+
5780
while futures:
5881
done, futures = wait(futures, return_when=FIRST_COMPLETED)
5982
for future in done:
60-
yield from future.result()
83+
yield future.result()
6184
try:
6285
new_chunk = next(chunk_iterator)
6386
futures.add(executor.submit(process_chunk, new_chunk))

laygo/transformers/strategies/process.py

Lines changed: 11 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from loky import get_reusable_executor
88

99
from laygo.context.types import IContextHandle
10+
from laygo.context.types import IContextManager
1011
from laygo.transformers.strategies.types import ExecutionStrategy
1112
from laygo.transformers.types import InternalTransformer
1213

@@ -16,37 +17,33 @@ def _worker_process_chunk[In, Out](
1617
context_handle: IContextHandle,
1718
chunk: list[In],
1819
) -> list[Out]:
19-
"""
20-
Top-level function executed by each worker process.
21-
It reconstructs the context proxy from the handle and runs the transformation.
22-
"""
20+
"""Top-level function executed by each worker process."""
2321
context_proxy = context_handle.create_proxy()
2422
try:
2523
return transformer_logic(chunk, context_proxy)
2624
finally:
27-
# The proxy's shutdown is a no-op, but it's good practice to call it.
2825
context_proxy.shutdown()
2926

3027

3128
class ProcessStrategy[In, Out](ExecutionStrategy[In, Out]):
29+
"""Execute transformer logic using a process pool."""
30+
3231
def __init__(self, max_workers: int = 4, ordered: bool = True):
3332
self.max_workers = max_workers
3433
self.ordered = ordered
3534

36-
def execute(self, transformer_logic, chunk_generator, data, context):
35+
def execute(
36+
self,
37+
transformer_logic: InternalTransformer[In, Out],
38+
chunks: Iterator[list[In]],
39+
context: IContextManager,
40+
) -> Iterator[list[Out]]:
3741
"""Execute the transformer by distributing chunks to a process pool."""
38-
39-
# Get the picklable handle from the context manager.
4042
context_handle = context.get_handle()
41-
4243
executor = get_reusable_executor(max_workers=self.max_workers)
43-
chunks_to_process = chunk_generator(data)
4444

4545
gen_func = self._ordered_generator if self.ordered else self._unordered_generator
46-
47-
processed_chunks_iterator = gen_func(chunks_to_process, transformer_logic, executor, context_handle)
48-
for result_chunk in processed_chunks_iterator:
49-
yield from result_chunk
46+
yield from gen_func(chunks, transformer_logic, executor, context_handle)
5047

5148
def _ordered_generator(
5249
self,
@@ -69,22 +66,16 @@ def _ordered_generator(
6966

7067
try:
7168
while futures:
72-
# Get the result of the oldest task. If it failed or the pool
73-
# is broken, .result() will raise an exception.
7469
result = futures.popleft().result()
7570

76-
# If successful, submit a new task.
7771
try:
7872
chunk = next(chunks_iter)
7973
futures.append(executor.submit(_worker_process_chunk, transformer, context_handle, chunk))
8074
except StopIteration:
81-
# No more chunks to process.
8275
pass
8376

8477
yield result
8578
finally:
86-
# This cleanup runs if the loop finishes or if an exception occurs.
87-
# It prevents orphaned processes by cancelling pending tasks.
8879
for future in futures:
8980
future.cancel()
9081
if futures:
@@ -104,27 +95,18 @@ def _unordered_generator(
10495
}
10596

10697
try:
107-
# as_completed is ideal for this "process as they finish" pattern
10898
for future in as_completed(futures):
109-
# Get the result. This raises an exception if the task failed,
110-
# which immediately stops the loop and proceeds to finally.
11199
result = future.result()
112-
113-
# Remove the completed future from our tracking set
114100
futures.remove(future)
115101

116-
# Try to submit a new task to replace the one that just finished
117102
try:
118103
chunk = next(chunks_iter)
119104
futures.add(executor.submit(_worker_process_chunk, transformer, context_handle, chunk))
120105
except StopIteration:
121-
# No more chunks left to submit.
122106
pass
123107

124108
yield result
125109
finally:
126-
# Clean up any futures that were still running or pending when
127-
# an exception occurred or the input was exhausted.
128110
for future in futures:
129111
future.cancel()
130112
if futures:

laygo/transformers/strategies/sequential.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44
class SequentialStrategy[In, Out](ExecutionStrategy[In, Out]):
5-
def execute(self, transformer_logic, chunk_generator, data, context):
5+
def execute(self, transformer_logic, chunks, context):
66
# Logic from the original Transformer.__call__
7-
for chunk in chunk_generator(data):
8-
yield from transformer_logic(chunk, context)
7+
for chunk in chunks:
8+
yield transformer_logic(chunk, context)

0 commit comments

Comments
 (0)