From e7f945e4907c818ddd9181da61672d9f8fca9fb5 Mon Sep 17 00:00:00 2001 From: will9709 Date: Wed, 13 Nov 2024 23:34:42 +0800 Subject: [PATCH 01/52] stash --- cli/__init__.py | 2 +- cli/stream.py | 3 ++ hemera.py | 3 +- indexer/controller/scheduler/job_scheduler.py | 19 ++++++++++-- indexer/controller/stream_controller.py | 2 +- indexer/jobs/base_job.py | 29 ++++++++++++++++--- 6 files changed, 49 insertions(+), 9 deletions(-) diff --git a/cli/__init__.py b/cli/__init__.py index 5d544fb98..591b49b41 100644 --- a/cli/__init__.py +++ b/cli/__init__.py @@ -17,7 +17,7 @@ def get_version(): @click.group() -@click.version_option(version=get_version()) +# @click.version_option(version=get_version()) @click.pass_context def cli(ctx): pass diff --git a/cli/stream.py b/cli/stream.py index 4b04b60c3..bd488b70b 100644 --- a/cli/stream.py +++ b/cli/stream.py @@ -339,6 +339,8 @@ def stream( auto_upgrade_db=True, log_level="INFO", ): + from multiprocessing import Manager + _shared_data_buff = Manager().dict() print_logo() configure_logging(log_level, log_file) configure_signals() @@ -415,6 +417,7 @@ def stream( auto_reorg=auto_reorg, multicall=multicall, force_filter_mode=force_filter_mode, + _shared_data_buff=_shared_data_buff ) controller = StreamController( diff --git a/hemera.py b/hemera.py index 229e34827..560b67691 100644 --- a/hemera.py +++ b/hemera.py @@ -1,3 +1,4 @@ from cli import cli -cli() +if __name__ == '__main__': + cli() diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 55f87a89c..503b848e8 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -2,6 +2,7 @@ from collections import defaultdict, deque from typing import List, Set, Type +import mpire from pottery import RedisDict from redis.client import Redis @@ -65,6 +66,7 @@ def __init__( multicall=None, auto_reorg=True, force_filter_mode=False, + _shared_data_buff=None ): self.logger = logging.getLogger(__name__) self.auto_reorg = auto_reorg @@ -85,6 +87,7 @@ def __init__( self.job_map = defaultdict(list) self.dependency_map = defaultdict(list) self.pg_service = config.get("db_service") if "db_service" in config else None + self._shared_data_buff = _shared_data_buff self.discover_and_register_job_classes() self.required_job_classes, self.is_pipeline_filter = self.get_required_job_classes(required_output_types) @@ -220,6 +223,7 @@ def instantiate_jobs(self): config=self.config, is_filter=self.is_pipeline_filter, filters=filters, + _shared_data_buff=self._shared_data_buff ) self.jobs.insert(0, export_blocks_job) else: @@ -253,15 +257,26 @@ def instantiate_jobs(self): ) self.jobs.append(check_job) + def split_blocks(self, start_block, end_block, step): + blocks = [] + for i in range(start_block, end_block + 1, step): + blocks.append([{'start_block': i, 'end_block': min(i + step - 1, end_block)}]) + return blocks + def run_jobs(self, start_block, end_block): self.clear_data_buff() + BaseJob._shared_data_buff = self._shared_data_buff try: + splits = self.split_blocks(start_block, end_block, 10) + for job in self.jobs: - job.run(start_block=start_block, end_block=end_block) + # job.run(start_block=start_block, end_block=end_block) + with mpire.WorkerPool(n_jobs=8, shared_objects=BaseJob._shared_data_buff) as pool: + pool.map(func=job.run, iterable_of_args=splits, task_timeout=20) for output_type in self.required_output_types: key = output_type.type() - message = f"{output_type.type()} : {len(self.get_data_buff().get(output_type.type()))}" + message = f"{output_type.type()} : {len(self.get_data_buff().get(output_type.type())) if self.get_data_buff().get(output_type.type()) else 0}" self.logger.info(f"{message}") exception_recorder.log( block_number=-1, dataclass=key, message_type="item_counter", message=message, level=RecordLevel.INFO diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 7cc45f6c2..1dce08f0d 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -24,7 +24,7 @@ def __init__( sync_recorder: BaseRecorder, job_scheduler: JobScheduler, limit_reader: LimitReader, - max_retries=5, + max_retries=1, retry_from_record=False, delay=0, ): diff --git a/indexer/jobs/base_job.py b/indexer/jobs/base_job.py index cbb1732f3..456de9698 100644 --- a/indexer/jobs/base_job.py +++ b/indexer/jobs/base_job.py @@ -39,9 +39,12 @@ def get_subclasses(cls): return get_subclasses(bases) +from multiprocessing import Manager + class BaseJob(metaclass=BaseJobMeta): _data_buff = defaultdict(list) locks = defaultdict(threading.Lock) + _shared_data_buff = None tokens = None @@ -82,7 +85,7 @@ def __init__(self, **kwargs): job_name_snake = to_snake_case(self.job_name) self.user_defined_config = kwargs["config"][job_name_snake] if kwargs["config"].get(job_name_snake) else {} - def run(self, **kwargs): + def run(self, _shared_data_buff, kwargs): try: self._start(**kwargs) @@ -136,14 +139,26 @@ def _collect_batch(self, iterator): def _collect_item(self, key, data): with self.locks[key]: self._data_buff[key].append(data) + if key in self._shared_data_buff: + self._shared_data_buff[key].append(data) + else: + self._shared_data_buff[key] = [data] def _collect_items(self, key, data_list): with self.locks[key]: self._data_buff[key].extend(data_list) + if key in self._shared_data_buff: + self._shared_data_buff[key].extend(data_list) + else: + self._shared_data_buff[key] = data_list def _collect_domain(self, domain): with self.locks[domain.type()]: self._data_buff[domain.type()].append(domain) + if domain.type() in self._shared_data_buff: + self._shared_data_buff[domain.type()].append(domain) + else: + self._shared_data_buff[domain.type()] = [domain] def _collect_domains(self, domains): for domain in domains: @@ -176,10 +191,16 @@ def _export(self): if output_type in self._required_output_types: items.extend(self._extract_from_buff([output_type.type()])) + from multiprocessing import Lock + l = Lock() for item_exporter in self._item_exporters: - item_exporter.open() - item_exporter.export_items(items, job_name=self.job_name) - item_exporter.close() + try: + l.acquire() + item_exporter.open() + item_exporter.export_items(items, job_name=self.job_name) + item_exporter.close() + finally: + l.release() def get_buff(self): return self._data_buff From c1ddfbbd762bbd9550de63a073d1c14592553884 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Thu, 14 Nov 2024 14:03:40 +0800 Subject: [PATCH 02/52] stash --- cli/stream.py | 8 +++-- hemera.py | 2 +- indexer/controller/scheduler/job_scheduler.py | 16 ++++++---- indexer/jobs/base_job.py | 32 +++++++++---------- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/cli/stream.py b/cli/stream.py index bd488b70b..d0c753729 100644 --- a/cli/stream.py +++ b/cli/stream.py @@ -339,8 +339,6 @@ def stream( auto_upgrade_db=True, log_level="INFO", ): - from multiprocessing import Manager - _shared_data_buff = Manager().dict() print_logo() configure_logging(log_level, log_file) configure_signals() @@ -403,6 +401,10 @@ def stream( if source_path and source_path.startswith("postgresql://"): source_types = generate_dataclass_type_list_from_parameter(source_types, "source") + from multiprocessing import Manager + + manager = Manager() + # _shared_data_buff = Manager().dict() job_scheduler = JobScheduler( batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), batch_web3_debug_provider=ThreadLocalProxy(lambda: get_provider_from_uri(debug_provider_uri, batch=True)), @@ -417,7 +419,7 @@ def stream( auto_reorg=auto_reorg, multicall=multicall, force_filter_mode=force_filter_mode, - _shared_data_buff=_shared_data_buff + _manager=manager, ) controller = StreamController( diff --git a/hemera.py b/hemera.py index 560b67691..4046d78bb 100644 --- a/hemera.py +++ b/hemera.py @@ -1,4 +1,4 @@ from cli import cli -if __name__ == '__main__': +if __name__ == "__main__": cli() diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 503b848e8..69b6ce926 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -66,7 +66,7 @@ def __init__( multicall=None, auto_reorg=True, force_filter_mode=False, - _shared_data_buff=None + _manager=None, ): self.logger = logging.getLogger(__name__) self.auto_reorg = auto_reorg @@ -87,7 +87,7 @@ def __init__( self.job_map = defaultdict(list) self.dependency_map = defaultdict(list) self.pg_service = config.get("db_service") if "db_service" in config else None - self._shared_data_buff = _shared_data_buff + self._manager = _manager self.discover_and_register_job_classes() self.required_job_classes, self.is_pipeline_filter = self.get_required_job_classes(required_output_types) @@ -223,7 +223,6 @@ def instantiate_jobs(self): config=self.config, is_filter=self.is_pipeline_filter, filters=filters, - _shared_data_buff=self._shared_data_buff ) self.jobs.insert(0, export_blocks_job) else: @@ -260,19 +259,22 @@ def instantiate_jobs(self): def split_blocks(self, start_block, end_block, step): blocks = [] for i in range(start_block, end_block + 1, step): - blocks.append([{'start_block': i, 'end_block': min(i + step - 1, end_block)}]) + blocks.append([{"start_block": i, "end_block": min(i + step - 1, end_block)}]) return blocks def run_jobs(self, start_block, end_block): self.clear_data_buff() - BaseJob._shared_data_buff = self._shared_data_buff + BaseJob._manager = self._manager + BaseJob._shared_data_buff = self._manager.dict() + BaseJob._shared_data_buff_lock = self._manager.Lock() + try: splits = self.split_blocks(start_block, end_block, 10) for job in self.jobs: # job.run(start_block=start_block, end_block=end_block) - with mpire.WorkerPool(n_jobs=8, shared_objects=BaseJob._shared_data_buff) as pool: - pool.map(func=job.run, iterable_of_args=splits, task_timeout=20) + with mpire.WorkerPool(n_jobs=1, shared_objects=BaseJob._shared_data_buff) as pool: + pool.map(func=job.run, iterable_of_args=splits, task_timeout=2000) for output_type in self.required_output_types: key = output_type.type() diff --git a/indexer/jobs/base_job.py b/indexer/jobs/base_job.py index 456de9698..79db8ecaf 100644 --- a/indexer/jobs/base_job.py +++ b/indexer/jobs/base_job.py @@ -41,10 +41,13 @@ def get_subclasses(cls): from multiprocessing import Manager + class BaseJob(metaclass=BaseJobMeta): _data_buff = defaultdict(list) locks = defaultdict(threading.Lock) + _manager = None _shared_data_buff = None + _shared_data_buff_lock = None tokens = None @@ -139,18 +142,18 @@ def _collect_batch(self, iterator): def _collect_item(self, key, data): with self.locks[key]: self._data_buff[key].append(data) - if key in self._shared_data_buff: - self._shared_data_buff[key].append(data) - else: - self._shared_data_buff[key] = [data] + with self._shared_data_buff_lock: + if key not in self._shared_data_buff: + self._shared_data_buff[key] = self._manager.list() + self._shared_data_buff[key].append(data) def _collect_items(self, key, data_list): with self.locks[key]: self._data_buff[key].extend(data_list) - if key in self._shared_data_buff: - self._shared_data_buff[key].extend(data_list) - else: - self._shared_data_buff[key] = data_list + with self._shared_data_buff_lock: + if key not in self._shared_data_buff: + self._shared_data_buff[key] = self._manager.list() + self._shared_data_buff[key].extend(data_list) def _collect_domain(self, domain): with self.locks[domain.type()]: @@ -179,8 +182,10 @@ def _process(self, **kwargs): def _extract_from_buff(self, keys=None): items = [] for key in keys: - with self.locks[key]: - items.extend(self._data_buff[key]) + # with self.locks[key]: + # items.extend(self._data_buff[key]) + with self._shared_data_buff_lock: + items.extend(self._shared_data_buff[key]) return items @@ -191,16 +196,11 @@ def _export(self): if output_type in self._required_output_types: items.extend(self._extract_from_buff([output_type.type()])) - from multiprocessing import Lock - l = Lock() for item_exporter in self._item_exporters: - try: - l.acquire() + with self._shared_data_buff_lock: item_exporter.open() item_exporter.export_items(items, job_name=self.job_name) item_exporter.close() - finally: - l.release() def get_buff(self): return self._data_buff From a544580f9a5fec6fe2f6df9f85cb5e49e685acf1 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Thu, 14 Nov 2024 16:05:48 +0800 Subject: [PATCH 03/52] stash --- indexer/controller/scheduler/job_scheduler.py | 8 +++--- indexer/jobs/base_job.py | 25 +++++++++---------- indexer/jobs/export_blocks_job.py | 9 ++++++- .../jobs/export_transactions_and_logs_job.py | 8 ++++-- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 69b6ce926..1356ae7c4 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -266,15 +266,15 @@ def run_jobs(self, start_block, end_block): self.clear_data_buff() BaseJob._manager = self._manager BaseJob._shared_data_buff = self._manager.dict() - BaseJob._shared_data_buff_lock = self._manager.Lock() + BaseJob._shared_data_buff_lock = defaultdict(self._manager.Lock()) try: - splits = self.split_blocks(start_block, end_block, 10) + splits = self.split_blocks(start_block, end_block, 300) for job in self.jobs: # job.run(start_block=start_block, end_block=end_block) - with mpire.WorkerPool(n_jobs=1, shared_objects=BaseJob._shared_data_buff) as pool: - pool.map(func=job.run, iterable_of_args=splits, task_timeout=2000) + with mpire.WorkerPool(n_jobs=1, shared_objects=BaseJob._shared_data_buff, use_dill=True) as pool: + pool.map(func=job.run, iterable_of_args=splits, task_timeout=300) for output_type in self.required_output_types: key = output_type.type() diff --git a/indexer/jobs/base_job.py b/indexer/jobs/base_job.py index 79db8ecaf..3b497f8e2 100644 --- a/indexer/jobs/base_job.py +++ b/indexer/jobs/base_job.py @@ -39,12 +39,9 @@ def get_subclasses(cls): return get_subclasses(bases) -from multiprocessing import Manager - - class BaseJob(metaclass=BaseJobMeta): _data_buff = defaultdict(list) - locks = defaultdict(threading.Lock) + _data_buff_lock = defaultdict(threading.Lock) _manager = None _shared_data_buff = None _shared_data_buff_lock = None @@ -140,23 +137,27 @@ def _collect_batch(self, iterator): pass def _collect_item(self, key, data): - with self.locks[key]: + with self._data_buff_lock[key]: self._data_buff[key].append(data) - with self._shared_data_buff_lock: + + def _collect_shared_item(self, key, data): + with self._shared_data_buff_lock[key]: if key not in self._shared_data_buff: self._shared_data_buff[key] = self._manager.list() self._shared_data_buff[key].append(data) def _collect_items(self, key, data_list): - with self.locks[key]: + with self._data_buff_lock[key]: self._data_buff[key].extend(data_list) - with self._shared_data_buff_lock: + + def _collect_shared_items(self, key, data_list): + with self._shared_data_buff_lock[key]: if key not in self._shared_data_buff: self._shared_data_buff[key] = self._manager.list() self._shared_data_buff[key].extend(data_list) def _collect_domain(self, domain): - with self.locks[domain.type()]: + with self._data_buff_lock[domain.type()]: self._data_buff[domain.type()].append(domain) if domain.type() in self._shared_data_buff: self._shared_data_buff[domain.type()].append(domain) @@ -182,10 +183,8 @@ def _process(self, **kwargs): def _extract_from_buff(self, keys=None): items = [] for key in keys: - # with self.locks[key]: - # items.extend(self._data_buff[key]) - with self._shared_data_buff_lock: - items.extend(self._shared_data_buff[key]) + with self._data_buff_lock[key]: + items.extend(self._data_buff[key]) return items diff --git a/indexer/jobs/export_blocks_job.py b/indexer/jobs/export_blocks_job.py index e94218ef1..757181312 100644 --- a/indexer/jobs/export_blocks_job.py +++ b/indexer/jobs/export_blocks_job.py @@ -102,8 +102,15 @@ def _process(self, **kwargs): self._data_buff[Block.type()].sort(key=lambda x: x.number) self._data_buff[Transaction.type()].sort(key=lambda x: (x.block_number, x.transaction_index)) + block_list = list(self._shared_data_buff[Block.type()]) + block_list.sort(key=lambda x: x.number) + self._shared_data_buff[Block.type()] = self._manager.list(block_list) + + tx_list = list(self._shared_data_buff[Transaction.type()]) + tx_list.sort(key=lambda x: (x.block_number, x.transaction_index)) + self._shared_data_buff[Transaction.type()] = self._manager.list(tx_list) ts_dict = {} - for block in self._data_buff[Block.type()]: + for block in self._shared_data_buff[Block.type()]: timestamp = block.timestamp // 3600 * 3600 block_number = block.number diff --git a/indexer/jobs/export_transactions_and_logs_job.py b/indexer/jobs/export_transactions_and_logs_job.py index 4b0fc3ff3..67945c25f 100644 --- a/indexer/jobs/export_transactions_and_logs_job.py +++ b/indexer/jobs/export_transactions_and_logs_job.py @@ -33,7 +33,9 @@ def __init__(self, **kwargs): def _collect(self, **kwargs): - transactions: List[Transaction] = self._data_buff.get(Transaction.type(), []) + # transactions: List[Transaction] = self._data_buff.get(Transaction.type(), []) + transactions: List[Transaction] = self._shared_data_buff.get(Transaction.type(), []) + self._batch_work_executor.execute(transactions, self._collect_batch, total_items=len(transactions)) self._batch_work_executor.wait() @@ -59,7 +61,9 @@ def _collect_batch(self, transactions: List[Transaction]): self._collect_item(Log.type(), log) def _process(self, **kwargs): - self._data_buff[Log.type()].sort(key=lambda x: (x.block_number, x.log_index)) + log_list = list(self._shared_data_buff[Log.type()]) + log_list.sort(key=lambda x: (x.block_number, x.log_index)) + self._shared_data_buff[Log.type()] = self._manager.list(log_list) def receipt_rpc_requests(make_request, transaction_hashes, is_batch): From 35b489e548596b5504643a2c0835e207c26eed5d Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Thu, 14 Nov 2024 16:28:23 +0800 Subject: [PATCH 04/52] stash --- cli/stream.py | 2 +- indexer/controller/scheduler/job_scheduler.py | 23 ++++-------------- indexer/controller/stream_controller.py | 24 ++++++++++++++++++- indexer/jobs/base_job.py | 9 ++++--- indexer/jobs/export_blocks_job.py | 16 ++++++------- 5 files changed, 40 insertions(+), 34 deletions(-) diff --git a/cli/stream.py b/cli/stream.py index d0c753729..9e8997180 100644 --- a/cli/stream.py +++ b/cli/stream.py @@ -419,7 +419,6 @@ def stream( auto_reorg=auto_reorg, multicall=multicall, force_filter_mode=force_filter_mode, - _manager=manager, ) controller = StreamController( @@ -431,6 +430,7 @@ def stream( ), retry_from_record=retry_from_record, delay=delay, + _manager=manager, ) controller.action( diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 1356ae7c4..63f9aa96e 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -66,7 +66,6 @@ def __init__( multicall=None, auto_reorg=True, force_filter_mode=False, - _manager=None, ): self.logger = logging.getLogger(__name__) self.auto_reorg = auto_reorg @@ -87,7 +86,6 @@ def __init__( self.job_map = defaultdict(list) self.dependency_map = defaultdict(list) self.pg_service = config.get("db_service") if "db_service" in config else None - self._manager = _manager self.discover_and_register_job_classes() self.required_job_classes, self.is_pipeline_filter = self.get_required_job_classes(required_output_types) @@ -256,26 +254,13 @@ def instantiate_jobs(self): ) self.jobs.append(check_job) - def split_blocks(self, start_block, end_block, step): - blocks = [] - for i in range(start_block, end_block + 1, step): - blocks.append([{"start_block": i, "end_block": min(i + step - 1, end_block)}]) - return blocks - - def run_jobs(self, start_block, end_block): + def run_jobs(self, shared_objects, args): self.clear_data_buff() - BaseJob._manager = self._manager - BaseJob._shared_data_buff = self._manager.dict() - BaseJob._shared_data_buff_lock = defaultdict(self._manager.Lock()) - try: - splits = self.split_blocks(start_block, end_block, 300) - + start_block = args["start_block"] + end_block = args["end_block"] for job in self.jobs: - # job.run(start_block=start_block, end_block=end_block) - with mpire.WorkerPool(n_jobs=1, shared_objects=BaseJob._shared_data_buff, use_dill=True) as pool: - pool.map(func=job.run, iterable_of_args=splits, task_timeout=300) - + job.run(start_block=start_block, end_block=end_block) for output_type in self.required_output_types: key = output_type.type() message = f"{output_type.type()} : {len(self.get_data_buff().get(output_type.type())) if self.get_data_buff().get(output_type.type()) else 0}" diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 1dce08f0d..be04f9a42 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -1,12 +1,16 @@ import logging import os import time +from collections import defaultdict + +import mpire from common.utils.exception_control import FastShutdownError, HemeraBaseException from common.utils.file_utils import delete_file, write_to_file from common.utils.web3_utils import build_web3 from indexer.controller.base_controller import BaseController from indexer.controller.scheduler.job_scheduler import JobScheduler +from indexer.jobs.base_job import BaseJob from indexer.utils.exception_recorder import ExceptionRecorder from indexer.utils.limit_reader import LimitReader from indexer.utils.sync_recorder import BaseRecorder @@ -27,6 +31,7 @@ def __init__( max_retries=1, retry_from_record=False, delay=0, + _manager=None, ): self.entity_types = 1 self.sync_recorder = sync_recorder @@ -36,6 +41,7 @@ def __init__( self.max_retries = max_retries self.retry_from_record = retry_from_record self.delay = delay + self._manager = _manager def action( self, @@ -61,6 +67,12 @@ def action( def _shutdown(self): pass + def split_blocks(self, start_block, end_block, step): + blocks = [] + for i in range(start_block, end_block + 1, step): + blocks.append([{"start_block": i, "end_block": min(i + step - 1, end_block)}]) + return blocks + def _do_stream(self, start_block, end_block, steps, retry_errors, period_seconds): last_synced_block = self.sync_recorder.get_last_synced_block() if start_block is not None: @@ -95,7 +107,17 @@ def _do_stream(self, start_block, end_block, steps, retry_errors, period_seconds if synced_blocks != 0: # ETL program's main logic - self.job_scheduler.run_jobs(last_synced_block + 1, target_block) + splits = self.split_blocks(last_synced_block + 1, target_block, 100) + BaseJob._manager = self._manager + BaseJob._shared_data_buff = self._manager.dict() + + def shared_lock_factory(): + return self._manager.Lock() + + BaseJob._shared_data_buff_lock = defaultdict(shared_lock_factory) + with mpire.WorkerPool(n_jobs=1, shared_objects=BaseJob._shared_data_buff, use_dill=True) as pool: + pool.map(func=self.job_scheduler.run_jobs, iterable_of_args=splits, task_timeout=300) + # self.job_scheduler.run_jobs(last_synced_block + 1, target_block) logger.info("Writing last synced block {}".format(target_block)) self.sync_recorder.set_last_synced_block(target_block) diff --git a/indexer/jobs/base_job.py b/indexer/jobs/base_job.py index 3b497f8e2..d1e38f709 100644 --- a/indexer/jobs/base_job.py +++ b/indexer/jobs/base_job.py @@ -85,7 +85,7 @@ def __init__(self, **kwargs): job_name_snake = to_snake_case(self.job_name) self.user_defined_config = kwargs["config"][job_name_snake] if kwargs["config"].get(job_name_snake) else {} - def run(self, _shared_data_buff, kwargs): + def run(self, **kwargs): try: self._start(**kwargs) @@ -196,10 +196,9 @@ def _export(self): items.extend(self._extract_from_buff([output_type.type()])) for item_exporter in self._item_exporters: - with self._shared_data_buff_lock: - item_exporter.open() - item_exporter.export_items(items, job_name=self.job_name) - item_exporter.close() + item_exporter.open() + item_exporter.export_items(items, job_name=self.job_name) + item_exporter.close() def get_buff(self): return self._data_buff diff --git a/indexer/jobs/export_blocks_job.py b/indexer/jobs/export_blocks_job.py index 757181312..781f6cb3a 100644 --- a/indexer/jobs/export_blocks_job.py +++ b/indexer/jobs/export_blocks_job.py @@ -102,15 +102,15 @@ def _process(self, **kwargs): self._data_buff[Block.type()].sort(key=lambda x: x.number) self._data_buff[Transaction.type()].sort(key=lambda x: (x.block_number, x.transaction_index)) - block_list = list(self._shared_data_buff[Block.type()]) - block_list.sort(key=lambda x: x.number) - self._shared_data_buff[Block.type()] = self._manager.list(block_list) - - tx_list = list(self._shared_data_buff[Transaction.type()]) - tx_list.sort(key=lambda x: (x.block_number, x.transaction_index)) - self._shared_data_buff[Transaction.type()] = self._manager.list(tx_list) + # block_list = list(self._shared_data_buff[Block.type()]) + # block_list.sort(key=lambda x: x.number) + # self._shared_data_buff[Block.type()] = self._manager.list(block_list) + # + # tx_list = list(self._shared_data_buff[Transaction.type()]) + # tx_list.sort(key=lambda x: (x.block_number, x.transaction_index)) + # self._shared_data_buff[Transaction.type()] = self._manager.list(tx_list) ts_dict = {} - for block in self._shared_data_buff[Block.type()]: + for block in self._data_buff[Block.type()]: timestamp = block.timestamp // 3600 * 3600 block_number = block.number From c9879f309ede5aed8607b894a24a9488fe179f69 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Thu, 14 Nov 2024 17:47:13 +0800 Subject: [PATCH 05/52] stash --- indexer/controller/scheduler/job_scheduler.py | 2 +- indexer/controller/stream_controller.py | 4 +- indexer/exporters/console_item_exporter.py | 10 +- indexer/exporters/postgres_item_exporter.py | 171 ++++++++++-------- .../jobs/export_transactions_and_logs_job.py | 4 +- 5 files changed, 104 insertions(+), 87 deletions(-) diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 63f9aa96e..75fd14965 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -254,7 +254,7 @@ def instantiate_jobs(self): ) self.jobs.append(check_job) - def run_jobs(self, shared_objects, args): + def run_jobs(self, args): self.clear_data_buff() try: start_block = args["start_block"] diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index be04f9a42..479628b46 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -115,8 +115,8 @@ def shared_lock_factory(): return self._manager.Lock() BaseJob._shared_data_buff_lock = defaultdict(shared_lock_factory) - with mpire.WorkerPool(n_jobs=1, shared_objects=BaseJob._shared_data_buff, use_dill=True) as pool: - pool.map(func=self.job_scheduler.run_jobs, iterable_of_args=splits, task_timeout=300) + with mpire.WorkerPool(n_jobs=4, use_dill=True) as pool: + pool.map(func=self.job_scheduler.run_jobs, iterable_of_args=splits, task_timeout=10) # self.job_scheduler.run_jobs(last_synced_block + 1, target_block) logger.info("Writing last synced block {}".format(target_block)) diff --git a/indexer/exporters/console_item_exporter.py b/indexer/exporters/console_item_exporter.py index a1ec45d45..12bc42ef8 100644 --- a/indexer/exporters/console_item_exporter.py +++ b/indexer/exporters/console_item_exporter.py @@ -3,6 +3,8 @@ from indexer.exporters.base_exporter import BaseExporter logger = logging.getLogger(__name__) +from multiprocessing import RLock +lock = RLock() class ConsoleItemExporter(BaseExporter): @@ -12,7 +14,13 @@ def export_items(self, items, **kwargs): self.export_item(item, **kwargs) def export_item(self, item, **kwargs): - print(item) + if lock.acquire(timeout=5): + try: + print(item) + finally: + lock.release() + else: + logger.error('Lock acquired but not released') def batch_finish(self): logging.info("Batch finished") diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index 9cc8a60ca..a77f761e2 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -14,6 +14,9 @@ COMMIT_BATCH_SIZE = 500 +from multiprocessing import RLock +lock = RLock() + class TqdmExtraFormat(tqdm): """Provides both estimated and actual total time format parameters""" @@ -35,88 +38,96 @@ def __init__(self, service): self.sub_progress = None def export_items(self, items, **kwargs): - start_time = datetime.now(tzlocal()) - - # Initialize main progress bar - if kwargs.get("job_name"): - job_name = kwargs.get("job_name") - desc = f"{job_name}(PG)" + if lock.acquire(timeout=3): + try: + + start_time = datetime.now(tzlocal()) + + # Initialize main progress bar + if kwargs.get("job_name"): + job_name = kwargs.get("job_name") + desc = f"{job_name}(PG)" + else: + desc = "Exporting items" + self.main_progress = TqdmExtraFormat( + total=len(items), + desc=desc.ljust(35), + unit="items", + position=0, + ncols=90, + bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}] Est: {total_time}", + ) + + conn = self.service.get_conn() + try: + insert_stmt = "" + items_grouped_by_type = group_by_item_type(items) + tables = [] + + # Process each item type + for item_type in items_grouped_by_type.keys(): + item_group = items_grouped_by_type.get(item_type) + + if item_group: + pg_config = domain_model_mapping[item_type] + table = pg_config["table"] + do_update = pg_config["conflict_do_update"] + update_strategy = pg_config["update_strategy"] + converter = pg_config["converter"] + + # Initialize sub-progress bar for current table + self.sub_progress = TqdmExtraFormat( + total=len(item_group), + desc=f"Processing {table.__tablename__}".ljust(35), + unit="items", + position=1, + leave=False, + ncols=90, + bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", + ) + + cur = conn.cursor() + data = [] + + # Process items with progress tracking + for item in item_group: + converted_item = converter(table, item, do_update) + data.append(converted_item) + self.sub_progress.update(1) + self.main_progress.update(1) + + if data: + columns = list(data[0].keys()) + values = [tuple(d.values()) for d in data] + + insert_stmt = sql_insert_statement(table, do_update, columns, where_clause=update_strategy) + + # Execute in batches with progress tracking + for i in range(0, len(values), COMMIT_BATCH_SIZE): + batch = values[i : i + COMMIT_BATCH_SIZE] + execute_values(cur, insert_stmt, batch) + conn.commit() + + tables.append(table.__tablename__) + self.sub_progress.close() + + except Exception as e: + logger.error(f"Error exporting items: {e}") + logger.error(f"{insert_stmt}") + raise e + finally: + self.service.release_conn(conn) + if self.main_progress: + self.main_progress.close() + if self.sub_progress: + self.sub_progress.close() + + end_time = datetime.now(tzlocal()) + finally: + lock.release() else: - desc = "Exporting items" - self.main_progress = TqdmExtraFormat( - total=len(items), - desc=desc.ljust(35), - unit="items", - position=0, - ncols=90, - bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}] Est: {total_time}", - ) + logger.error('Lock acquired but not released') - conn = self.service.get_conn() - try: - insert_stmt = "" - items_grouped_by_type = group_by_item_type(items) - tables = [] - - # Process each item type - for item_type in items_grouped_by_type.keys(): - item_group = items_grouped_by_type.get(item_type) - - if item_group: - pg_config = domain_model_mapping[item_type] - table = pg_config["table"] - do_update = pg_config["conflict_do_update"] - update_strategy = pg_config["update_strategy"] - converter = pg_config["converter"] - - # Initialize sub-progress bar for current table - self.sub_progress = TqdmExtraFormat( - total=len(item_group), - desc=f"Processing {table.__tablename__}".ljust(35), - unit="items", - position=1, - leave=False, - ncols=90, - bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", - ) - - cur = conn.cursor() - data = [] - - # Process items with progress tracking - for item in item_group: - converted_item = converter(table, item, do_update) - data.append(converted_item) - self.sub_progress.update(1) - self.main_progress.update(1) - - if data: - columns = list(data[0].keys()) - values = [tuple(d.values()) for d in data] - - insert_stmt = sql_insert_statement(table, do_update, columns, where_clause=update_strategy) - - # Execute in batches with progress tracking - for i in range(0, len(values), COMMIT_BATCH_SIZE): - batch = values[i : i + COMMIT_BATCH_SIZE] - execute_values(cur, insert_stmt, batch) - conn.commit() - - tables.append(table.__tablename__) - self.sub_progress.close() - - except Exception as e: - logger.error(f"Error exporting items: {e}") - logger.error(f"{insert_stmt}") - raise e - finally: - self.service.release_conn(conn) - if self.main_progress: - self.main_progress.close() - if self.sub_progress: - self.sub_progress.close() - - end_time = datetime.now(tzlocal()) def sql_insert_statement(model: Type[HemeraModel], do_update: bool, columns, where_clause=None): diff --git a/indexer/jobs/export_transactions_and_logs_job.py b/indexer/jobs/export_transactions_and_logs_job.py index 67945c25f..5e7ab4034 100644 --- a/indexer/jobs/export_transactions_and_logs_job.py +++ b/indexer/jobs/export_transactions_and_logs_job.py @@ -61,9 +61,7 @@ def _collect_batch(self, transactions: List[Transaction]): self._collect_item(Log.type(), log) def _process(self, **kwargs): - log_list = list(self._shared_data_buff[Log.type()]) - log_list.sort(key=lambda x: (x.block_number, x.log_index)) - self._shared_data_buff[Log.type()] = self._manager.list(log_list) + self._data_buff[Log.type()].sort(key=lambda x: (x.block_number, x.log_index)) def receipt_rpc_requests(make_request, transaction_hashes, is_batch): From 6a3ee5c3db5a6ab8333ecd88affaace3aaf669ea Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Thu, 14 Nov 2024 18:31:44 +0800 Subject: [PATCH 06/52] stash --- cli/stream.py | 5 ----- indexer/controller/stream_controller.py | 11 +---------- indexer/exporters/console_item_exporter.py | 5 +++-- indexer/exporters/postgres_item_exporter.py | 10 ++++++---- indexer/jobs/base_job.py | 19 ------------------- .../jobs/export_transactions_and_logs_job.py | 3 +-- 6 files changed, 11 insertions(+), 42 deletions(-) diff --git a/cli/stream.py b/cli/stream.py index 9e8997180..4b04b60c3 100644 --- a/cli/stream.py +++ b/cli/stream.py @@ -401,10 +401,6 @@ def stream( if source_path and source_path.startswith("postgresql://"): source_types = generate_dataclass_type_list_from_parameter(source_types, "source") - from multiprocessing import Manager - - manager = Manager() - # _shared_data_buff = Manager().dict() job_scheduler = JobScheduler( batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), batch_web3_debug_provider=ThreadLocalProxy(lambda: get_provider_from_uri(debug_provider_uri, batch=True)), @@ -430,7 +426,6 @@ def stream( ), retry_from_record=retry_from_record, delay=delay, - _manager=manager, ) controller.action( diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 479628b46..f6a256a75 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -1,7 +1,6 @@ import logging import os import time -from collections import defaultdict import mpire @@ -10,7 +9,6 @@ from common.utils.web3_utils import build_web3 from indexer.controller.base_controller import BaseController from indexer.controller.scheduler.job_scheduler import JobScheduler -from indexer.jobs.base_job import BaseJob from indexer.utils.exception_recorder import ExceptionRecorder from indexer.utils.limit_reader import LimitReader from indexer.utils.sync_recorder import BaseRecorder @@ -41,7 +39,6 @@ def __init__( self.max_retries = max_retries self.retry_from_record = retry_from_record self.delay = delay - self._manager = _manager def action( self, @@ -108,15 +105,9 @@ def _do_stream(self, start_block, end_block, steps, retry_errors, period_seconds if synced_blocks != 0: # ETL program's main logic splits = self.split_blocks(last_synced_block + 1, target_block, 100) - BaseJob._manager = self._manager - BaseJob._shared_data_buff = self._manager.dict() - def shared_lock_factory(): - return self._manager.Lock() - - BaseJob._shared_data_buff_lock = defaultdict(shared_lock_factory) with mpire.WorkerPool(n_jobs=4, use_dill=True) as pool: - pool.map(func=self.job_scheduler.run_jobs, iterable_of_args=splits, task_timeout=10) + pool.map(func=self.job_scheduler.run_jobs, iterable_of_args=splits, task_timeout=20) # self.job_scheduler.run_jobs(last_synced_block + 1, target_block) logger.info("Writing last synced block {}".format(target_block)) diff --git a/indexer/exporters/console_item_exporter.py b/indexer/exporters/console_item_exporter.py index 12bc42ef8..fc9d12718 100644 --- a/indexer/exporters/console_item_exporter.py +++ b/indexer/exporters/console_item_exporter.py @@ -4,6 +4,7 @@ logger = logging.getLogger(__name__) from multiprocessing import RLock + lock = RLock() @@ -14,13 +15,13 @@ def export_items(self, items, **kwargs): self.export_item(item, **kwargs) def export_item(self, item, **kwargs): - if lock.acquire(timeout=5): + if lock.acquire(timeout=10): try: print(item) finally: lock.release() else: - logger.error('Lock acquired but not released') + logger.error("Lock acquired but not released") def batch_finish(self): logging.info("Batch finished") diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index a77f761e2..30ee7c3d8 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -15,6 +15,7 @@ COMMIT_BATCH_SIZE = 500 from multiprocessing import RLock + lock = RLock() @@ -38,7 +39,7 @@ def __init__(self, service): self.sub_progress = None def export_items(self, items, **kwargs): - if lock.acquire(timeout=3): + if lock.acquire(timeout=10): try: start_time = datetime.now(tzlocal()) @@ -100,7 +101,9 @@ def export_items(self, items, **kwargs): columns = list(data[0].keys()) values = [tuple(d.values()) for d in data] - insert_stmt = sql_insert_statement(table, do_update, columns, where_clause=update_strategy) + insert_stmt = sql_insert_statement( + table, do_update, columns, where_clause=update_strategy + ) # Execute in batches with progress tracking for i in range(0, len(values), COMMIT_BATCH_SIZE): @@ -126,8 +129,7 @@ def export_items(self, items, **kwargs): finally: lock.release() else: - logger.error('Lock acquired but not released') - + logger.error("Lock acquired but not released") def sql_insert_statement(model: Type[HemeraModel], do_update: bool, columns, where_clause=None): diff --git a/indexer/jobs/base_job.py b/indexer/jobs/base_job.py index d1e38f709..a259fc8cc 100644 --- a/indexer/jobs/base_job.py +++ b/indexer/jobs/base_job.py @@ -42,9 +42,6 @@ def get_subclasses(cls): class BaseJob(metaclass=BaseJobMeta): _data_buff = defaultdict(list) _data_buff_lock = defaultdict(threading.Lock) - _manager = None - _shared_data_buff = None - _shared_data_buff_lock = None tokens = None @@ -140,29 +137,13 @@ def _collect_item(self, key, data): with self._data_buff_lock[key]: self._data_buff[key].append(data) - def _collect_shared_item(self, key, data): - with self._shared_data_buff_lock[key]: - if key not in self._shared_data_buff: - self._shared_data_buff[key] = self._manager.list() - self._shared_data_buff[key].append(data) - def _collect_items(self, key, data_list): with self._data_buff_lock[key]: self._data_buff[key].extend(data_list) - def _collect_shared_items(self, key, data_list): - with self._shared_data_buff_lock[key]: - if key not in self._shared_data_buff: - self._shared_data_buff[key] = self._manager.list() - self._shared_data_buff[key].extend(data_list) - def _collect_domain(self, domain): with self._data_buff_lock[domain.type()]: self._data_buff[domain.type()].append(domain) - if domain.type() in self._shared_data_buff: - self._shared_data_buff[domain.type()].append(domain) - else: - self._shared_data_buff[domain.type()] = [domain] def _collect_domains(self, domains): for domain in domains: diff --git a/indexer/jobs/export_transactions_and_logs_job.py b/indexer/jobs/export_transactions_and_logs_job.py index 5e7ab4034..c520c9148 100644 --- a/indexer/jobs/export_transactions_and_logs_job.py +++ b/indexer/jobs/export_transactions_and_logs_job.py @@ -33,8 +33,7 @@ def __init__(self, **kwargs): def _collect(self, **kwargs): - # transactions: List[Transaction] = self._data_buff.get(Transaction.type(), []) - transactions: List[Transaction] = self._shared_data_buff.get(Transaction.type(), []) + transactions: List[Transaction] = self._data_buff.get(Transaction.type(), []) self._batch_work_executor.execute(transactions, self._collect_batch, total_items=len(transactions)) self._batch_work_executor.wait() From 0abcbdb08fc91699a2f71e3dba7f3762c6727f40 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 11:42:31 +0800 Subject: [PATCH 07/52] stash --- indexer/controller/stream_controller.py | 10 +++++++--- indexer/exporters/console_item_exporter.py | 3 ++- indexer/exporters/postgres_item_exporter.py | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index f6a256a75..080d82fb6 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -17,6 +17,10 @@ logger = logging.getLogger(__name__) +M_JOBS: int = int(os.environ.get("M_JOBS", 4)) +M_TIMEOUT: int = int(os.environ.get("M_TIMEOUT", 30)) +M_SIZE: int = int(os.environ.get("M_SIZE", 1000)) +M_LOCK_TIME: int = int(os.environ.get("M_LOCK_TIME", 20)) class StreamController(BaseController): @@ -104,10 +108,10 @@ def _do_stream(self, start_block, end_block, steps, retry_errors, period_seconds if synced_blocks != 0: # ETL program's main logic - splits = self.split_blocks(last_synced_block + 1, target_block, 100) + splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) - with mpire.WorkerPool(n_jobs=4, use_dill=True) as pool: - pool.map(func=self.job_scheduler.run_jobs, iterable_of_args=splits, task_timeout=20) + with mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) as pool: + pool.map(func=self.job_scheduler.run_jobs, iterable_of_args=splits, task_timeout=M_TIMEOUT) # self.job_scheduler.run_jobs(last_synced_block + 1, target_block) logger.info("Writing last synced block {}".format(target_block)) diff --git a/indexer/exporters/console_item_exporter.py b/indexer/exporters/console_item_exporter.py index fc9d12718..4809a6be8 100644 --- a/indexer/exporters/console_item_exporter.py +++ b/indexer/exporters/console_item_exporter.py @@ -1,5 +1,6 @@ import logging +from indexer.controller.stream_controller import M_LOCK_TIME from indexer.exporters.base_exporter import BaseExporter logger = logging.getLogger(__name__) @@ -15,7 +16,7 @@ def export_items(self, items, **kwargs): self.export_item(item, **kwargs) def export_item(self, item, **kwargs): - if lock.acquire(timeout=10): + if lock.acquire(timeout=M_LOCK_TIME): try: print(item) finally: diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index 30ee7c3d8..c2147b05c 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -8,6 +8,7 @@ from common.converter.pg_converter import domain_model_mapping from common.models import HemeraModel +from indexer.controller.stream_controller import M_LOCK_TIME from indexer.exporters.base_exporter import BaseExporter, group_by_item_type logger = logging.getLogger(__name__) @@ -39,7 +40,7 @@ def __init__(self, service): self.sub_progress = None def export_items(self, items, **kwargs): - if lock.acquire(timeout=10): + if lock.acquire(timeout=M_LOCK_TIME): try: start_time = datetime.now(tzlocal()) From 341001237fe41555f27163aff139c93991ae89eb Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 11:54:56 +0800 Subject: [PATCH 08/52] stash --- indexer/controller/stream_controller.py | 2 +- indexer/exporters/console_item_exporter.py | 4 +++- indexer/exporters/postgres_item_exporter.py | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 080d82fb6..7c0033815 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -20,7 +20,7 @@ M_JOBS: int = int(os.environ.get("M_JOBS", 4)) M_TIMEOUT: int = int(os.environ.get("M_TIMEOUT", 30)) M_SIZE: int = int(os.environ.get("M_SIZE", 1000)) -M_LOCK_TIME: int = int(os.environ.get("M_LOCK_TIME", 20)) + class StreamController(BaseController): diff --git a/indexer/exporters/console_item_exporter.py b/indexer/exporters/console_item_exporter.py index 4809a6be8..7df212393 100644 --- a/indexer/exporters/console_item_exporter.py +++ b/indexer/exporters/console_item_exporter.py @@ -1,6 +1,6 @@ import logging +import os -from indexer.controller.stream_controller import M_LOCK_TIME from indexer.exporters.base_exporter import BaseExporter logger = logging.getLogger(__name__) @@ -8,6 +8,8 @@ lock = RLock() +M_LOCK_TIME: int = int(os.environ.get("M_LOCK_TIME", 20)) + class ConsoleItemExporter(BaseExporter): diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index c2147b05c..7fa79aaad 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -1,4 +1,5 @@ import logging +import os from datetime import datetime from typing import Type @@ -8,7 +9,6 @@ from common.converter.pg_converter import domain_model_mapping from common.models import HemeraModel -from indexer.controller.stream_controller import M_LOCK_TIME from indexer.exporters.base_exporter import BaseExporter, group_by_item_type logger = logging.getLogger(__name__) @@ -33,6 +33,9 @@ def format_dict(self): return d +M_LOCK_TIME: int = int(os.environ.get("M_LOCK_TIME", 20)) + + class PostgresItemExporter(BaseExporter): def __init__(self, service): self.service = service From 95dfbc3993829b4d73b7298c6b48bc7f7f05264f Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 13:47:20 +0800 Subject: [PATCH 09/52] stash --- indexer/utils/exception_recorder.py | 33 +++++++++++++++++++---------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/indexer/utils/exception_recorder.py b/indexer/utils/exception_recorder.py index 99b67810f..c5ca47897 100644 --- a/indexer/utils/exception_recorder.py +++ b/indexer/utils/exception_recorder.py @@ -1,3 +1,4 @@ +import os import threading from queue import Queue @@ -8,6 +9,13 @@ LOG_BUFFER_SIZE = 5000 +from multiprocessing import RLock + +lock = RLock() + +M_LOCK_TIME: int = int(os.environ.get("M_LOCK_TIME", 20)) + + class ExceptionRecorder(object): _instance = None @@ -63,14 +71,17 @@ def _check_and_flush(self): self._flush_logs_to_db(logs) def _flush_logs_to_db(self, logs): - session = self._service.get_service_session() - - try: - statement = insert(ExceptionRecords).values(logs) - session.execute(statement) - session.commit() - except Exception as e: - print(e) - raise e - finally: - session.close() + if lock.acquire(timeout=M_LOCK_TIME): + session = self._service.get_service_session() + + try: + statement = insert(ExceptionRecords).values(logs) + session.execute(statement) + session.commit() + except Exception as e: + print(e) + raise e + finally: + session.close() + else: + print("failed to get lock, flush logs to db") \ No newline at end of file From 840aad0be5bd55e40b3e5a1bf472caf79c2486dc Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Fri, 15 Nov 2024 15:43:06 +0800 Subject: [PATCH 10/52] update dependency lock file --- poetry.lock | 444 +++++++++++++++++++++++++++---------------------- pyproject.toml | 2 + 2 files changed, 246 insertions(+), 200 deletions(-) diff --git a/poetry.lock b/poetry.lock index fdf6705e6..1ed69dcb9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -13,108 +13,108 @@ files = [ [[package]] name = "aiohttp" -version = "3.10.10" +version = "3.10.11" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.8" files = [ - {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be7443669ae9c016b71f402e43208e13ddf00912f47f623ee5994e12fc7d4b3f"}, - {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b06b7843929e41a94ea09eb1ce3927865387e3e23ebe108e0d0d09b08d25be9"}, - {file = "aiohttp-3.10.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:333cf6cf8e65f6a1e06e9eb3e643a0c515bb850d470902274239fea02033e9a8"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:274cfa632350225ce3fdeb318c23b4a10ec25c0e2c880eff951a3842cf358ac1"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9e5e4a85bdb56d224f412d9c98ae4cbd032cc4f3161818f692cd81766eee65a"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b606353da03edcc71130b52388d25f9a30a126e04caef1fd637e31683033abd"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab5a5a0c7a7991d90446a198689c0535be89bbd6b410a1f9a66688f0880ec026"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:578a4b875af3e0daaf1ac6fa983d93e0bbfec3ead753b6d6f33d467100cdc67b"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8105fd8a890df77b76dd3054cddf01a879fc13e8af576805d667e0fa0224c35d"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3bcd391d083f636c06a68715e69467963d1f9600f85ef556ea82e9ef25f043f7"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fbc6264158392bad9df19537e872d476f7c57adf718944cc1e4495cbabf38e2a"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e48d5021a84d341bcaf95c8460b152cfbad770d28e5fe14a768988c461b821bc"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2609e9ab08474702cc67b7702dbb8a80e392c54613ebe80db7e8dbdb79837c68"}, - {file = "aiohttp-3.10.10-cp310-cp310-win32.whl", hash = "sha256:84afcdea18eda514c25bc68b9af2a2b1adea7c08899175a51fe7c4fb6d551257"}, - {file = "aiohttp-3.10.10-cp310-cp310-win_amd64.whl", hash = "sha256:9c72109213eb9d3874f7ac8c0c5fa90e072d678e117d9061c06e30c85b4cf0e6"}, - {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c30a0eafc89d28e7f959281b58198a9fa5e99405f716c0289b7892ca345fe45f"}, - {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:258c5dd01afc10015866114e210fb7365f0d02d9d059c3c3415382ab633fcbcb"}, - {file = "aiohttp-3.10.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:15ecd889a709b0080f02721255b3f80bb261c2293d3c748151274dfea93ac871"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3935f82f6f4a3820270842e90456ebad3af15810cf65932bd24da4463bc0a4c"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:413251f6fcf552a33c981c4709a6bba37b12710982fec8e558ae944bfb2abd38"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1720b4f14c78a3089562b8875b53e36b51c97c51adc53325a69b79b4b48ebcb"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:679abe5d3858b33c2cf74faec299fda60ea9de62916e8b67e625d65bf069a3b7"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:79019094f87c9fb44f8d769e41dbb664d6e8fcfd62f665ccce36762deaa0e911"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fe2fb38c2ed905a2582948e2de560675e9dfbee94c6d5ccdb1301c6d0a5bf092"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a3f00003de6eba42d6e94fabb4125600d6e484846dbf90ea8e48a800430cc142"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbb122c557a16fafc10354b9d99ebf2f2808a660d78202f10ba9d50786384b9"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:30ca7c3b94708a9d7ae76ff281b2f47d8eaf2579cd05971b5dc681db8caac6e1"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:df9270660711670e68803107d55c2b5949c2e0f2e4896da176e1ecfc068b974a"}, - {file = "aiohttp-3.10.10-cp311-cp311-win32.whl", hash = "sha256:aafc8ee9b742ce75044ae9a4d3e60e3d918d15a4c2e08a6c3c3e38fa59b92d94"}, - {file = "aiohttp-3.10.10-cp311-cp311-win_amd64.whl", hash = "sha256:362f641f9071e5f3ee6f8e7d37d5ed0d95aae656adf4ef578313ee585b585959"}, - {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9294bbb581f92770e6ed5c19559e1e99255e4ca604a22c5c6397b2f9dd3ee42c"}, - {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a8fa23fe62c436ccf23ff930149c047f060c7126eae3ccea005f0483f27b2e28"}, - {file = "aiohttp-3.10.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c6a5b8c7926ba5d8545c7dd22961a107526562da31a7a32fa2456baf040939f"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:007ec22fbc573e5eb2fb7dec4198ef8f6bf2fe4ce20020798b2eb5d0abda6138"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9627cc1a10c8c409b5822a92d57a77f383b554463d1884008e051c32ab1b3742"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50edbcad60d8f0e3eccc68da67f37268b5144ecc34d59f27a02f9611c1d4eec7"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a45d85cf20b5e0d0aa5a8dca27cce8eddef3292bc29d72dcad1641f4ed50aa16"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b00807e2605f16e1e198f33a53ce3c4523114059b0c09c337209ae55e3823a8"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f2d4324a98062be0525d16f768a03e0bbb3b9fe301ceee99611dc9a7953124e6"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:438cd072f75bb6612f2aca29f8bd7cdf6e35e8f160bc312e49fbecab77c99e3a"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:baa42524a82f75303f714108fea528ccacf0386af429b69fff141ffef1c534f9"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a7d8d14fe962153fc681f6366bdec33d4356f98a3e3567782aac1b6e0e40109a"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c1277cd707c465cd09572a774559a3cc7c7a28802eb3a2a9472588f062097205"}, - {file = "aiohttp-3.10.10-cp312-cp312-win32.whl", hash = "sha256:59bb3c54aa420521dc4ce3cc2c3fe2ad82adf7b09403fa1f48ae45c0cbde6628"}, - {file = "aiohttp-3.10.10-cp312-cp312-win_amd64.whl", hash = "sha256:0e1b370d8007c4ae31ee6db7f9a2fe801a42b146cec80a86766e7ad5c4a259cf"}, - {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ad7593bb24b2ab09e65e8a1d385606f0f47c65b5a2ae6c551db67d6653e78c28"}, - {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1eb89d3d29adaf533588f209768a9c02e44e4baf832b08118749c5fad191781d"}, - {file = "aiohttp-3.10.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3fe407bf93533a6fa82dece0e74dbcaaf5d684e5a51862887f9eaebe6372cd79"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aed5155f819873d23520919e16703fc8925e509abbb1a1491b0087d1cd969e"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f05e9727ce409358baa615dbeb9b969db94324a79b5a5cea45d39bdb01d82e6"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dffb610a30d643983aeb185ce134f97f290f8935f0abccdd32c77bed9388b42"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa6658732517ddabe22c9036479eabce6036655ba87a0224c612e1ae6af2087e"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:741a46d58677d8c733175d7e5aa618d277cd9d880301a380fd296975a9cdd7bc"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e00e3505cd80440f6c98c6d69269dcc2a119f86ad0a9fd70bccc59504bebd68a"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ffe595f10566f8276b76dc3a11ae4bb7eba1aac8ddd75811736a15b0d5311414"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdfcf6443637c148c4e1a20c48c566aa694fa5e288d34b20fcdc58507882fed3"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d183cf9c797a5291e8301790ed6d053480ed94070637bfaad914dd38b0981f67"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:77abf6665ae54000b98b3c742bc6ea1d1fb31c394bcabf8b5d2c1ac3ebfe7f3b"}, - {file = "aiohttp-3.10.10-cp313-cp313-win32.whl", hash = "sha256:4470c73c12cd9109db8277287d11f9dd98f77fc54155fc71a7738a83ffcc8ea8"}, - {file = "aiohttp-3.10.10-cp313-cp313-win_amd64.whl", hash = "sha256:486f7aabfa292719a2753c016cc3a8f8172965cabb3ea2e7f7436c7f5a22a151"}, - {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1b66ccafef7336a1e1f0e389901f60c1d920102315a56df85e49552308fc0486"}, - {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:acd48d5b80ee80f9432a165c0ac8cbf9253eaddb6113269a5e18699b33958dbb"}, - {file = "aiohttp-3.10.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3455522392fb15ff549d92fbf4b73b559d5e43dc522588f7eb3e54c3f38beee7"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45c3b868724137f713a38376fef8120c166d1eadd50da1855c112fe97954aed8"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:da1dee8948d2137bb51fbb8a53cce6b1bcc86003c6b42565f008438b806cccd8"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5ce2ce7c997e1971b7184ee37deb6ea9922ef5163c6ee5aa3c274b05f9e12fa"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28529e08fde6f12eba8677f5a8608500ed33c086f974de68cc65ab218713a59d"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f7db54c7914cc99d901d93a34704833568d86c20925b2762f9fa779f9cd2e70f"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:03a42ac7895406220124c88911ebee31ba8b2d24c98507f4a8bf826b2937c7f2"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:7e338c0523d024fad378b376a79faff37fafb3c001872a618cde1d322400a572"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:038f514fe39e235e9fef6717fbf944057bfa24f9b3db9ee551a7ecf584b5b480"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:64f6c17757251e2b8d885d728b6433d9d970573586a78b78ba8929b0f41d045a"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:93429602396f3383a797a2a70e5f1de5df8e35535d7806c9f91df06f297e109b"}, - {file = "aiohttp-3.10.10-cp38-cp38-win32.whl", hash = "sha256:c823bc3971c44ab93e611ab1a46b1eafeae474c0c844aff4b7474287b75fe49c"}, - {file = "aiohttp-3.10.10-cp38-cp38-win_amd64.whl", hash = "sha256:54ca74df1be3c7ca1cf7f4c971c79c2daf48d9aa65dea1a662ae18926f5bc8ce"}, - {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:01948b1d570f83ee7bbf5a60ea2375a89dfb09fd419170e7f5af029510033d24"}, - {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9fc1500fd2a952c5c8e3b29aaf7e3cc6e27e9cfc0a8819b3bce48cc1b849e4cc"}, - {file = "aiohttp-3.10.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f614ab0c76397661b90b6851a030004dac502e48260ea10f2441abd2207fbcc7"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00819de9e45d42584bed046314c40ea7e9aea95411b38971082cad449392b08c"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05646ebe6b94cc93407b3bf34b9eb26c20722384d068eb7339de802154d61bc5"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:998f3bd3cfc95e9424a6acd7840cbdd39e45bc09ef87533c006f94ac47296090"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9010c31cd6fa59438da4e58a7f19e4753f7f264300cd152e7f90d4602449762"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ea7ffc6d6d6f8a11e6f40091a1040995cdff02cfc9ba4c2f30a516cb2633554"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ef9c33cc5cbca35808f6c74be11eb7f5f6b14d2311be84a15b594bd3e58b5527"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ce0cdc074d540265bfeb31336e678b4e37316849d13b308607efa527e981f5c2"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:597a079284b7ee65ee102bc3a6ea226a37d2b96d0418cc9047490f231dc09fe8"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:7789050d9e5d0c309c706953e5e8876e38662d57d45f936902e176d19f1c58ab"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e7f8b04d83483577fd9200461b057c9f14ced334dcb053090cea1da9c8321a91"}, - {file = "aiohttp-3.10.10-cp39-cp39-win32.whl", hash = "sha256:c02a30b904282777d872266b87b20ed8cc0d1501855e27f831320f471d54d983"}, - {file = "aiohttp-3.10.10-cp39-cp39-win_amd64.whl", hash = "sha256:edfe3341033a6b53a5c522c802deb2079eee5cbfbb0af032a55064bd65c73a23"}, - {file = "aiohttp-3.10.10.tar.gz", hash = "sha256:0631dd7c9f0822cc61c88586ca76d5b5ada26538097d0f1df510b082bad3411a"}, + {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"}, + {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"}, + {file = "aiohttp-3.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffbfde2443696345e23a3c597049b1dd43049bb65337837574205e7368472177"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20b3d9e416774d41813bc02fdc0663379c01817b0874b932b81c7f777f67b217"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b943011b45ee6bf74b22245c6faab736363678e910504dd7531a58c76c9015a"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48bc1d924490f0d0b3658fe5c4b081a4d56ebb58af80a6729d4bd13ea569797a"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e12eb3f4b1f72aaaf6acd27d045753b18101524f72ae071ae1c91c1cd44ef115"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f14ebc419a568c2eff3c1ed35f634435c24ead2fe19c07426af41e7adb68713a"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:72b191cdf35a518bfc7ca87d770d30941decc5aaf897ec8b484eb5cc8c7706f3"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5ab2328a61fdc86424ee540d0aeb8b73bbcad7351fb7cf7a6546fc0bcffa0038"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa93063d4af05c49276cf14e419550a3f45258b6b9d1f16403e777f1addf4519"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:30283f9d0ce420363c24c5c2421e71a738a2155f10adbb1a11a4d4d6d2715cfc"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e5358addc8044ee49143c546d2182c15b4ac3a60be01c3209374ace05af5733d"}, + {file = "aiohttp-3.10.11-cp310-cp310-win32.whl", hash = "sha256:e1ffa713d3ea7cdcd4aea9cddccab41edf6882fa9552940344c44e59652e1120"}, + {file = "aiohttp-3.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:778cbd01f18ff78b5dd23c77eb82987ee4ba23408cbed233009fd570dda7e674"}, + {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:80ff08556c7f59a7972b1e8919f62e9c069c33566a6d28586771711e0eea4f07"}, + {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c8f96e9ee19f04c4914e4e7a42a60861066d3e1abf05c726f38d9d0a466e695"}, + {file = "aiohttp-3.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fb8601394d537da9221947b5d6e62b064c9a43e88a1ecd7414d21a1a6fba9c24"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ea224cf7bc2d8856d6971cea73b1d50c9c51d36971faf1abc169a0d5f85a382"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db9503f79e12d5d80b3efd4d01312853565c05367493379df76d2674af881caa"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0f449a50cc33f0384f633894d8d3cd020e3ccef81879c6e6245c3c375c448625"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82052be3e6d9e0c123499127782a01a2b224b8af8c62ab46b3f6197035ad94e9"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:20063c7acf1eec550c8eb098deb5ed9e1bb0521613b03bb93644b810986027ac"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:489cced07a4c11488f47aab1f00d0c572506883f877af100a38f1fedaa884c3a"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ea9b3bab329aeaa603ed3bf605f1e2a6f36496ad7e0e1aa42025f368ee2dc07b"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ca117819d8ad113413016cb29774b3f6d99ad23c220069789fc050267b786c16"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2dfb612dcbe70fb7cdcf3499e8d483079b89749c857a8f6e80263b021745c730"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9b615d3da0d60e7d53c62e22b4fd1c70f4ae5993a44687b011ea3a2e49051b8"}, + {file = "aiohttp-3.10.11-cp311-cp311-win32.whl", hash = "sha256:29103f9099b6068bbdf44d6a3d090e0a0b2be6d3c9f16a070dd9d0d910ec08f9"}, + {file = "aiohttp-3.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:236b28ceb79532da85d59aa9b9bf873b364e27a0acb2ceaba475dc61cffb6f3f"}, + {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7480519f70e32bfb101d71fb9a1f330fbd291655a4c1c922232a48c458c52710"}, + {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f65267266c9aeb2287a6622ee2bb39490292552f9fbf851baabc04c9f84e048d"}, + {file = "aiohttp-3.10.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7400a93d629a0608dc1d6c55f1e3d6e07f7375745aaa8bd7f085571e4d1cee97"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f34b97e4b11b8d4eb2c3a4f975be626cc8af99ff479da7de49ac2c6d02d35725"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e7b825da878464a252ccff2958838f9caa82f32a8dbc334eb9b34a026e2c636"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f92a344c50b9667827da308473005f34767b6a2a60d9acff56ae94f895f385"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f1ab987a27b83c5268a17218463c2ec08dbb754195113867a27b166cd6087"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1dc0f4ca54842173d03322793ebcf2c8cc2d34ae91cc762478e295d8e361e03f"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7ce6a51469bfaacff146e59e7fb61c9c23006495d11cc24c514a455032bcfa03"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:aad3cd91d484d065ede16f3cf15408254e2469e3f613b241a1db552c5eb7ab7d"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f4df4b8ca97f658c880fb4b90b1d1ec528315d4030af1ec763247ebfd33d8b9a"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2e4e18a0a2d03531edbc06c366954e40a3f8d2a88d2b936bbe78a0c75a3aab3e"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ce66780fa1a20e45bc753cda2a149daa6dbf1561fc1289fa0c308391c7bc0a4"}, + {file = "aiohttp-3.10.11-cp312-cp312-win32.whl", hash = "sha256:a919c8957695ea4c0e7a3e8d16494e3477b86f33067478f43106921c2fef15bb"}, + {file = "aiohttp-3.10.11-cp312-cp312-win_amd64.whl", hash = "sha256:b5e29706e6389a2283a91611c91bf24f218962717c8f3b4e528ef529d112ee27"}, + {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:703938e22434d7d14ec22f9f310559331f455018389222eed132808cd8f44127"}, + {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9bc50b63648840854e00084c2b43035a62e033cb9b06d8c22b409d56eb098413"}, + {file = "aiohttp-3.10.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f0463bf8b0754bc744e1feb61590706823795041e63edf30118a6f0bf577461"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6c6dec398ac5a87cb3a407b068e1106b20ef001c344e34154616183fe684288"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcaf2d79104d53d4dcf934f7ce76d3d155302d07dae24dff6c9fffd217568067"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25fd5470922091b5a9aeeb7e75be609e16b4fba81cdeaf12981393fb240dd10e"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbde2ca67230923a42161b1f408c3992ae6e0be782dca0c44cb3206bf330dee1"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:249c8ff8d26a8b41a0f12f9df804e7c685ca35a207e2410adbd3e924217b9006"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:878ca6a931ee8c486a8f7b432b65431d095c522cbeb34892bee5be97b3481d0f"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8663f7777ce775f0413324be0d96d9730959b2ca73d9b7e2c2c90539139cbdd6"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6cd3f10b01f0c31481fba8d302b61603a2acb37b9d30e1d14e0f5a58b7b18a31"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e8d8aad9402d3aa02fdc5ca2fe68bcb9fdfe1f77b40b10410a94c7f408b664d"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:38e3c4f80196b4f6c3a85d134a534a56f52da9cb8d8e7af1b79a32eefee73a00"}, + {file = "aiohttp-3.10.11-cp313-cp313-win32.whl", hash = "sha256:fc31820cfc3b2863c6e95e14fcf815dc7afe52480b4dc03393c4873bb5599f71"}, + {file = "aiohttp-3.10.11-cp313-cp313-win_amd64.whl", hash = "sha256:4996ff1345704ffdd6d75fb06ed175938c133425af616142e7187f28dc75f14e"}, + {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:74baf1a7d948b3d640badeac333af581a367ab916b37e44cf90a0334157cdfd2"}, + {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:473aebc3b871646e1940c05268d451f2543a1d209f47035b594b9d4e91ce8339"}, + {file = "aiohttp-3.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c2f746a6968c54ab2186574e15c3f14f3e7f67aef12b761e043b33b89c5b5f95"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d110cabad8360ffa0dec8f6ec60e43286e9d251e77db4763a87dcfe55b4adb92"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0099c7d5d7afff4202a0c670e5b723f7718810000b4abcbc96b064129e64bc7"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0316e624b754dbbf8c872b62fe6dcb395ef20c70e59890dfa0de9eafccd2849d"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a5f7ab8baf13314e6b2485965cbacb94afff1e93466ac4d06a47a81c50f9cca"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c891011e76041e6508cbfc469dd1a8ea09bc24e87e4c204e05f150c4c455a5fa"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9208299251370ee815473270c52cd3f7069ee9ed348d941d574d1457d2c73e8b"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:459f0f32c8356e8125f45eeff0ecf2b1cb6db1551304972702f34cd9e6c44658"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:14cdc8c1810bbd4b4b9f142eeee23cda528ae4e57ea0923551a9af4820980e39"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:971aa438a29701d4b34e4943e91b5e984c3ae6ccbf80dd9efaffb01bd0b243a9"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9a309c5de392dfe0f32ee57fa43ed8fc6ddf9985425e84bd51ed66bb16bce3a7"}, + {file = "aiohttp-3.10.11-cp38-cp38-win32.whl", hash = "sha256:9ec1628180241d906a0840b38f162a3215114b14541f1a8711c368a8739a9be4"}, + {file = "aiohttp-3.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:9c6e0ffd52c929f985c7258f83185d17c76d4275ad22e90aa29f38e211aacbec"}, + {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc493a2e5d8dc79b2df5bec9558425bcd39aff59fc949810cbd0832e294b106"}, + {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3e70f24e7d0405be2348da9d5a7836936bf3a9b4fd210f8c37e8d48bc32eca6"}, + {file = "aiohttp-3.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:968b8fb2a5eee2770eda9c7b5581587ef9b96fbdf8dcabc6b446d35ccc69df01"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deef4362af9493d1382ef86732ee2e4cbc0d7c005947bd54ad1a9a16dd59298e"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:686b03196976e327412a1b094f4120778c7c4b9cff9bce8d2fdfeca386b89829"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3bf6d027d9d1d34e1c2e1645f18a6498c98d634f8e373395221121f1c258ace8"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:099fd126bf960f96d34a760e747a629c27fb3634da5d05c7ef4d35ef4ea519fc"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c73c4d3dae0b4644bc21e3de546530531d6cdc88659cdeb6579cd627d3c206aa"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0c5580f3c51eea91559db3facd45d72e7ec970b04528b4709b1f9c2555bd6d0b"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fdf6429f0caabfd8a30c4e2eaecb547b3c340e4730ebfe25139779b9815ba138"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d97187de3c276263db3564bb9d9fad9e15b51ea10a371ffa5947a5ba93ad6777"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:0acafb350cfb2eba70eb5d271f55e08bd4502ec35e964e18ad3e7d34d71f7261"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c13ed0c779911c7998a58e7848954bd4d63df3e3575f591e321b19a2aec8df9f"}, + {file = "aiohttp-3.10.11-cp39-cp39-win32.whl", hash = "sha256:22b7c540c55909140f63ab4f54ec2c20d2635c0289cdd8006da46f3327f971b9"}, + {file = "aiohttp-3.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:7b26b1551e481012575dab8e3727b16fe7dd27eb2711d2e63ced7368756268fb"}, + {file = "aiohttp-3.10.11.tar.gz", hash = "sha256:9dc2b8f3dcab2e39e0fa309c8da50c3b55e6f34ab25f1a71d3288f24924d33a7"}, ] [package.dependencies] aiohappyeyeballs = ">=2.3.0" aiosignal = ">=1.1.2" -async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} +async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""} attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" @@ -174,13 +174,13 @@ dev = ["black", "coverage", "isort", "pre-commit", "pyenchant", "pylint"] [[package]] name = "async-timeout" -version = "4.0.3" +version = "5.0.1" description = "Timeout context manager for asyncio programs" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, + {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, + {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, ] [[package]] @@ -780,6 +780,21 @@ wrapt = ">=1.10,<2" [package.extras] dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] +[[package]] +name = "dill" +version = "0.3.9" +description = "serialize all of Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dill-0.3.9-py3-none-any.whl", hash = "sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a"}, + {file = "dill-0.3.9.tar.gz", hash = "sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c"}, +] + +[package.extras] +graph = ["objgraph (>=1.7.2)"] +profile = ["gprof2dot (>=2022.7.29)"] + [[package]] name = "et-xmlfile" version = "2.0.0" @@ -1993,6 +2008,34 @@ files = [ [package.dependencies] typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""} +[[package]] +name = "multiprocess" +version = "0.70.17" +description = "better multiprocessing and multithreading in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "multiprocess-0.70.17-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7ddb24e5bcdb64e90ec5543a1f05a39463068b6d3b804aa3f2a4e16ec28562d6"}, + {file = "multiprocess-0.70.17-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d729f55198a3579f6879766a6d9b72b42d4b320c0dcb7844afb774d75b573c62"}, + {file = "multiprocess-0.70.17-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2c82d0375baed8d8dd0d8c38eb87c5ae9c471f8e384ad203a36f095ee860f67"}, + {file = "multiprocess-0.70.17-pp38-pypy38_pp73-macosx_10_9_arm64.whl", hash = "sha256:a22a6b1a482b80eab53078418bb0f7025e4f7d93cc8e1f36481477a023884861"}, + {file = "multiprocess-0.70.17-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:349525099a0c9ac5936f0488b5ee73199098dac3ac899d81d326d238f9fd3ccd"}, + {file = "multiprocess-0.70.17-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:27b8409c02b5dd89d336107c101dfbd1530a2cd4fd425fc27dcb7adb6e0b47bf"}, + {file = "multiprocess-0.70.17-pp39-pypy39_pp73-macosx_10_13_arm64.whl", hash = "sha256:2ea0939b0f4760a16a548942c65c76ff5afd81fbf1083c56ae75e21faf92e426"}, + {file = "multiprocess-0.70.17-pp39-pypy39_pp73-macosx_10_13_x86_64.whl", hash = "sha256:2b12e081df87ab755190e227341b2c3b17ee6587e9c82fecddcbe6aa812cd7f7"}, + {file = "multiprocess-0.70.17-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a0f01cd9d079af7a8296f521dc03859d1a414d14c1e2b6e676ef789333421c95"}, + {file = "multiprocess-0.70.17-py310-none-any.whl", hash = "sha256:38357ca266b51a2e22841b755d9a91e4bb7b937979a54d411677111716c32744"}, + {file = "multiprocess-0.70.17-py311-none-any.whl", hash = "sha256:2884701445d0177aec5bd5f6ee0df296773e4fb65b11903b94c613fb46cfb7d1"}, + {file = "multiprocess-0.70.17-py312-none-any.whl", hash = "sha256:2818af14c52446b9617d1b0755fa70ca2f77c28b25ed97bdaa2c69a22c47b46c"}, + {file = "multiprocess-0.70.17-py313-none-any.whl", hash = "sha256:20c28ca19079a6c879258103a6d60b94d4ffe2d9da07dda93fb1c8bc6243f522"}, + {file = "multiprocess-0.70.17-py38-none-any.whl", hash = "sha256:1d52f068357acd1e5bbc670b273ef8f81d57863235d9fbf9314751886e141968"}, + {file = "multiprocess-0.70.17-py39-none-any.whl", hash = "sha256:c3feb874ba574fbccfb335980020c1ac631fbf2a3f7bee4e2042ede62558a021"}, + {file = "multiprocess-0.70.17.tar.gz", hash = "sha256:4ae2f11a3416809ebc9a48abfc8b14ecce0652a0944731a1493a3c1ba44ff57a"}, +] + +[package.dependencies] +dill = ">=0.3.9" + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -2137,13 +2180,13 @@ files = [ [[package]] name = "packaging" -version = "24.1" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -2744,105 +2787,105 @@ rpds-py = ">=0.7.0" [[package]] name = "regex" -version = "2024.9.11" +version = "2024.11.6" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" files = [ - {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408"}, - {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0e12c481ad92d129c78f13a2a3662317e46ee7ef96c94fd332e1c29131875b7d"}, - {file = "regex-2024.9.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:16e13a7929791ac1216afde26f712802e3df7bf0360b32e4914dca3ab8baeea5"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46989629904bad940bbec2106528140a218b4a36bb3042d8406980be1941429c"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a906ed5e47a0ce5f04b2c981af1c9acf9e8696066900bf03b9d7879a6f679fc8"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a091b0550b3b0207784a7d6d0f1a00d1d1c8a11699c1a4d93db3fbefc3ad35"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ddcd9a179c0a6fa8add279a4444015acddcd7f232a49071ae57fa6e278f1f71"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b41e1adc61fa347662b09398e31ad446afadff932a24807d3ceb955ed865cc8"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ced479f601cd2f8ca1fd7b23925a7e0ad512a56d6e9476f79b8f381d9d37090a"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:635a1d96665f84b292e401c3d62775851aedc31d4f8784117b3c68c4fcd4118d"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0256beda696edcf7d97ef16b2a33a8e5a875affd6fa6567b54f7c577b30a137"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3ce4f1185db3fbde8ed8aa223fc9620f276c58de8b0d4f8cc86fd1360829edb6"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:09d77559e80dcc9d24570da3745ab859a9cf91953062e4ab126ba9d5993688ca"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a22ccefd4db3f12b526eccb129390942fe874a3a9fdbdd24cf55773a1faab1a"}, - {file = "regex-2024.9.11-cp310-cp310-win32.whl", hash = "sha256:f745ec09bc1b0bd15cfc73df6fa4f726dcc26bb16c23a03f9e3367d357eeedd0"}, - {file = "regex-2024.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:01c2acb51f8a7d6494c8c5eafe3d8e06d76563d8a8a4643b37e9b2dd8a2ff623"}, - {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2cce2449e5927a0bf084d346da6cd5eb016b2beca10d0013ab50e3c226ffc0df"}, - {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b37fa423beefa44919e009745ccbf353d8c981516e807995b2bd11c2c77d268"}, - {file = "regex-2024.9.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:64ce2799bd75039b480cc0360907c4fb2f50022f030bf9e7a8705b636e408fad"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4cc92bb6db56ab0c1cbd17294e14f5e9224f0cc6521167ef388332604e92679"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d05ac6fa06959c4172eccd99a222e1fbf17b5670c4d596cb1e5cde99600674c4"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040562757795eeea356394a7fb13076ad4f99d3c62ab0f8bdfb21f99a1f85664"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6113c008a7780792efc80f9dfe10ba0cd043cbf8dc9a76ef757850f51b4edc50"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e5fb5f77c8745a60105403a774fe2c1759b71d3e7b4ca237a5e67ad066c7199"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54d9ff35d4515debf14bc27f1e3b38bfc453eff3220f5bce159642fa762fe5d4"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df5cbb1fbc74a8305b6065d4ade43b993be03dbe0f8b30032cced0d7740994bd"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7fb89ee5d106e4a7a51bce305ac4efb981536301895f7bdcf93ec92ae0d91c7f"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a738b937d512b30bf75995c0159c0ddf9eec0775c9d72ac0202076c72f24aa96"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e28f9faeb14b6f23ac55bfbbfd3643f5c7c18ede093977f1df249f73fd22c7b1"}, - {file = "regex-2024.9.11-cp311-cp311-win32.whl", hash = "sha256:18e707ce6c92d7282dfce370cd205098384b8ee21544e7cb29b8aab955b66fa9"}, - {file = "regex-2024.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:313ea15e5ff2a8cbbad96ccef6be638393041b0a7863183c2d31e0c6116688cf"}, - {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b0d0a6c64fcc4ef9c69bd5b3b3626cc3776520a1637d8abaa62b9edc147a58f7"}, - {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:49b0e06786ea663f933f3710a51e9385ce0cba0ea56b67107fd841a55d56a231"}, - {file = "regex-2024.9.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5b513b6997a0b2f10e4fd3a1313568e373926e8c252bd76c960f96fd039cd28d"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee439691d8c23e76f9802c42a95cfeebf9d47cf4ffd06f18489122dbb0a7ad64"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f877c89719d759e52783f7fe6e1c67121076b87b40542966c02de5503ace42"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23b30c62d0f16827f2ae9f2bb87619bc4fba2044911e2e6c2eb1af0161cdb766"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ab7824093d8f10d44330fe1e6493f756f252d145323dd17ab6b48733ff6c0a"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dee5b4810a89447151999428fe096977346cf2f29f4d5e29609d2e19e0199c9"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98eeee2f2e63edae2181c886d7911ce502e1292794f4c5ee71e60e23e8d26b5d"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:57fdd2e0b2694ce6fc2e5ccf189789c3e2962916fb38779d3e3521ff8fe7a822"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d552c78411f60b1fdaafd117a1fca2f02e562e309223b9d44b7de8be451ec5e0"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a0b2b80321c2ed3fcf0385ec9e51a12253c50f146fddb2abbb10f033fe3d049a"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:18406efb2f5a0e57e3a5881cd9354c1512d3bb4f5c45d96d110a66114d84d23a"}, - {file = "regex-2024.9.11-cp312-cp312-win32.whl", hash = "sha256:e464b467f1588e2c42d26814231edecbcfe77f5ac414d92cbf4e7b55b2c2a776"}, - {file = "regex-2024.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:9e8719792ca63c6b8340380352c24dcb8cd7ec49dae36e963742a275dfae6009"}, - {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c157bb447303070f256e084668b702073db99bbb61d44f85d811025fcf38f784"}, - {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4db21ece84dfeefc5d8a3863f101995de646c6cb0536952c321a2650aa202c36"}, - {file = "regex-2024.9.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:220e92a30b426daf23bb67a7962900ed4613589bab80382be09b48896d211e92"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1ae19e64c14c7ec1995f40bd932448713d3c73509e82d8cd7744dc00e29e86"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f47cd43a5bfa48f86925fe26fbdd0a488ff15b62468abb5d2a1e092a4fb10e85"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9d4a76b96f398697fe01117093613166e6aa8195d63f1b4ec3f21ab637632963"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ea51dcc0835eea2ea31d66456210a4e01a076d820e9039b04ae8d17ac11dee6"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7aaa315101c6567a9a45d2839322c51c8d6e81f67683d529512f5bcfb99c802"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c57d08ad67aba97af57a7263c2d9006d5c404d721c5f7542f077f109ec2a4a29"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8404bf61298bb6f8224bb9176c1424548ee1181130818fcd2cbffddc768bed8"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dd4490a33eb909ef5078ab20f5f000087afa2a4daa27b4c072ccb3cb3050ad84"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:eee9130eaad130649fd73e5cd92f60e55708952260ede70da64de420cdcad554"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a2644a93da36c784e546de579ec1806bfd2763ef47babc1b03d765fe560c9f8"}, - {file = "regex-2024.9.11-cp313-cp313-win32.whl", hash = "sha256:e997fd30430c57138adc06bba4c7c2968fb13d101e57dd5bb9355bf8ce3fa7e8"}, - {file = "regex-2024.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:042c55879cfeb21a8adacc84ea347721d3d83a159da6acdf1116859e2427c43f"}, - {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:35f4a6f96aa6cb3f2f7247027b07b15a374f0d5b912c0001418d1d55024d5cb4"}, - {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:55b96e7ce3a69a8449a66984c268062fbaa0d8ae437b285428e12797baefce7e"}, - {file = "regex-2024.9.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cb130fccd1a37ed894824b8c046321540263013da72745d755f2d35114b81a60"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:323c1f04be6b2968944d730e5c2091c8c89767903ecaa135203eec4565ed2b2b"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be1c8ed48c4c4065ecb19d882a0ce1afe0745dfad8ce48c49586b90a55f02366"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5b029322e6e7b94fff16cd120ab35a253236a5f99a79fb04fda7ae71ca20ae8"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6fff13ef6b5f29221d6904aa816c34701462956aa72a77f1f151a8ec4f56aeb"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:587d4af3979376652010e400accc30404e6c16b7df574048ab1f581af82065e4"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:079400a8269544b955ffa9e31f186f01d96829110a3bf79dc338e9910f794fca"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f9268774428ec173654985ce55fc6caf4c6d11ade0f6f914d48ef4719eb05ebb"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:23f9985c8784e544d53fc2930fc1ac1a7319f5d5332d228437acc9f418f2f168"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2941333154baff9838e88aa71c1d84f4438189ecc6021a12c7573728b5838e"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e93f1c331ca8e86fe877a48ad64e77882c0c4da0097f2212873a69bbfea95d0c"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:846bc79ee753acf93aef4184c040d709940c9d001029ceb7b7a52747b80ed2dd"}, - {file = "regex-2024.9.11-cp38-cp38-win32.whl", hash = "sha256:c94bb0a9f1db10a1d16c00880bdebd5f9faf267273b8f5bd1878126e0fbde771"}, - {file = "regex-2024.9.11-cp38-cp38-win_amd64.whl", hash = "sha256:2b08fce89fbd45664d3df6ad93e554b6c16933ffa9d55cb7e01182baaf971508"}, - {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:07f45f287469039ffc2c53caf6803cd506eb5f5f637f1d4acb37a738f71dd066"}, - {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4838e24ee015101d9f901988001038f7f0d90dc0c3b115541a1365fb439add62"}, - {file = "regex-2024.9.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6edd623bae6a737f10ce853ea076f56f507fd7726bee96a41ee3d68d347e4d16"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c69ada171c2d0e97a4b5aa78fbb835e0ffbb6b13fc5da968c09811346564f0d3"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02087ea0a03b4af1ed6ebab2c54d7118127fee8d71b26398e8e4b05b78963199"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69dee6a020693d12a3cf892aba4808fe168d2a4cef368eb9bf74f5398bfd4ee8"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297f54910247508e6e5cae669f2bc308985c60540a4edd1c77203ef19bfa63ca"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecea58b43a67b1b79805f1a0255730edaf5191ecef84dbc4cc85eb30bc8b63b9"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eab4bb380f15e189d1313195b062a6aa908f5bd687a0ceccd47c8211e9cf0d4a"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0cbff728659ce4bbf4c30b2a1be040faafaa9eca6ecde40aaff86f7889f4ab39"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:54c4a097b8bc5bb0dfc83ae498061d53ad7b5762e00f4adaa23bee22b012e6ba"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:73d6d2f64f4d894c96626a75578b0bf7d9e56dcda8c3d037a2118fdfe9b1c664"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e53b5fbab5d675aec9f0c501274c467c0f9a5d23696cfc94247e1fb56501ed89"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0ffbcf9221e04502fc35e54d1ce9567541979c3fdfb93d2c554f0ca583a19b35"}, - {file = "regex-2024.9.11-cp39-cp39-win32.whl", hash = "sha256:e4c22e1ac1f1ec1e09f72e6c44d8f2244173db7eb9629cc3a346a8d7ccc31142"}, - {file = "regex-2024.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:faa3c142464efec496967359ca99696c896c591c56c53506bac1ad465f66e919"}, - {file = "regex-2024.9.11.tar.gz", hash = "sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"}, + {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"}, + {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"}, + {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"}, + {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"}, + {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"}, + {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"}, + {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"}, + {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"}, + {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"}, + {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"}, + {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"}, + {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"}, + {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"}, ] [[package]] @@ -3178,13 +3221,13 @@ files = [ [[package]] name = "tqdm" -version = "4.66.6" +version = "4.67.0" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"}, - {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"}, + {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"}, + {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"}, ] [package.dependencies] @@ -3192,6 +3235,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +discord = ["requests"] notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] @@ -3584,4 +3628,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4" -content-hash = "d8c3593acde44d26f9899efe4bd5e4c580e1be4462804f1b8121a5f2ab0ae6cf" +content-hash = "4c8403176d9acbf8ee3bbea620acfee78a483e234c668ff6c3acbc0b55677638" diff --git a/pyproject.toml b/pyproject.toml index 546714def..f7b831ada 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,8 @@ pottery = "3.0.0" eth_typing = ">=2.2.0,<5" orjson = "3.10.7" mpire = "2.10.2" +dill = "0.3.9" +multiprocess = "0.70.17" PyYAML = "6.0.2" numpy = "1.24.4" From aedb9741f2a8f5a69274fd78d6a229428f4fd51b Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 16:01:46 +0800 Subject: [PATCH 11/52] a new pg service --- common/services/postgresql_service.py | 272 ++++++++++++------ indexer/controller/scheduler/job_scheduler.py | 8 +- .../controller/scheduler/reorg_scheduler.py | 7 +- indexer/controller/stream_controller.py | 2 +- indexer/exporters/postgres_item_exporter.py | 169 +++++------ indexer/utils/exception_recorder.py | 18 +- 6 files changed, 267 insertions(+), 209 deletions(-) diff --git a/common/services/postgresql_service.py b/common/services/postgresql_service.py index 5b1da7a86..fbc540616 100644 --- a/common/services/postgresql_service.py +++ b/common/services/postgresql_service.py @@ -1,105 +1,207 @@ import os +import threading from contextlib import contextmanager +from typing import Optional from alembic import command from alembic.config import Config -from psycopg2 import pool +from psycopg2.pool import ThreadedConnectionPool from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker - - -@contextmanager -def session_scope(session): - try: - yield session - session.commit() - except: - session.rollback() - raise - finally: - session.close() - - -class PostgreSQLService(object): - _jdbc_instance = {} - _jdbc_initialized = set() - - def __new__(cls, jdbc_url, *args, **kwargs): - if jdbc_url not in cls._jdbc_instance: - instance = super().__new__(cls) - cls._jdbc_instance[jdbc_url] = instance - return cls._jdbc_instance[jdbc_url] - - def __init__(self, jdbc_url, db_version="head", script_location="migrations", init_schema=False): - if jdbc_url not in self._jdbc_initialized: - self.db_version = db_version - self.engine = create_engine( - jdbc_url, - pool_size=10, - max_overflow=10, - pool_timeout=30, - pool_recycle=60, - connect_args={"application_name": "hemera_indexer"}, - ) - self.jdbc_url = jdbc_url - self.connection_pool = pool.SimpleConnectionPool(1, 10, jdbc_url) - - self.Session = sessionmaker(bind=self.engine) - if init_schema: - self.init_schema(script_location) - self._jdbc_initialized.add(jdbc_url) - - def get_conn(self): - return self.connection_pool.getconn() - - def release_conn(self, conn): - self.connection_pool.putconn(conn) - - def init_schema(self, script_location): +from sqlalchemy.engine import Engine +from sqlalchemy.orm import Session, sessionmaker +from sqlalchemy.pool import QueuePool + + +class PostgreSQLService: + """ + A thread-safe PostgreSQL service class that manages database connections and sessions. + Implements singleton pattern per JDBC URL to avoid multiple connection pools to the same database. + """ + + _instances: dict = {} + _initialized: set = set() + _lock: threading.Lock = threading.Lock() + + def __new__(cls, jdbc_url: str, *args, **kwargs) -> "PostgreSQLService": + """ + Ensures only one instance exists per JDBC URL. + """ + if jdbc_url not in cls._instances: + with cls._lock: + if jdbc_url not in cls._instances: + instance = super().__new__(cls) + cls._instances[jdbc_url] = instance + return cls._instances[jdbc_url] + + def __init__( + self, + jdbc_url: str, + min_connections: int = 5, + max_connections: int = 20, + pool_size: int = 10, + max_overflow: int = 10, + pool_timeout: int = 30, + pool_recycle: int = 1800, # 30 minutes + application_name: str = "postgresql_service", + db_version: str = "head", + script_location: str = "migrations", + init_schema: bool = False, + ): + """ + Initialize the PostgreSQL service with connection pooling. + """ + if jdbc_url in self._initialized: + return + + self.jdbc_url: str = jdbc_url + self.db_version: str = db_version + + # Initialize SQLAlchemy engine with better defaults + self.engine: Engine = create_engine( + jdbc_url, + poolclass=QueuePool, + pool_size=pool_size, + max_overflow=max_overflow, + pool_timeout=pool_timeout, + pool_recycle=pool_recycle, + pool_pre_ping=True, # Enable connection health checks + connect_args={ + "application_name": application_name, + "keepalives": 1, + "keepalives_idle": 30, + "keepalives_interval": 10, + "keepalives_count": 5, + }, + ) + + # Initialize psycopg2 connection pool + self.connection_pool: ThreadedConnectionPool = ThreadedConnectionPool( + min_connections, + max_connections, + jdbc_url, + keepalives=1, + keepalives_idle=30, + keepalives_interval=10, + keepalives_count=5, + application_name=application_name, + ) + + # Initialize session factory + self.Session = sessionmaker(bind=self.engine, expire_on_commit=False) + + if init_schema: + self._init_schema(script_location) + + self._initialized.add(jdbc_url) + + def _init_schema(self, script_location: str) -> None: + """ + Initialize database schema using Alembic migrations. + """ alembic_cfg = Config() - # Set script location and version path separator alembic_cfg.set_main_option("script_location", script_location) - alembic_cfg.set_main_option("version_path_separator", os.pathsep) - - # Set the database connection URL alembic_cfg.set_main_option("sqlalchemy.url", self.jdbc_url) - # Configure log settings - alembic_cfg.set_main_option("loggers", "root,sqlalchemy,alembic") - alembic_cfg.set_main_option("handlers", "console") - alembic_cfg.set_main_option("formatters", "generic") - - # Configure root logger - alembic_cfg.set_section_option("logger_root", "level", "WARN") - alembic_cfg.set_section_option("logger_root", "handlers", "console") - alembic_cfg.set_section_option("logger_root", "qualname", "") + # Configure logging + self._configure_alembic_logging(alembic_cfg) - # Configure SQLAlchemy logger - alembic_cfg.set_section_option("logger_sqlalchemy", "level", "WARN") - alembic_cfg.set_section_option("logger_sqlalchemy", "handlers", "") - alembic_cfg.set_section_option("logger_sqlalchemy", "qualname", "sqlalchemy.engine") + command.upgrade(alembic_cfg, self.db_version) - # Configure Alembic logger - alembic_cfg.set_section_option("logger_alembic", "level", "INFO") - alembic_cfg.set_section_option("logger_alembic", "handlers", "") - alembic_cfg.set_section_option("logger_alembic", "qualname", "alembic") + def _configure_alembic_logging(self, config: Config) -> None: + """ + Configure Alembic logging settings. + """ + config.set_main_option("loggers", "root,sqlalchemy,alembic") + config.set_main_option("handlers", "console") + config.set_main_option("formatters", "generic") + + # Logger configurations + loggers = { + "root": ("WARN", "console", ""), + "sqlalchemy": ("WARN", "", "sqlalchemy.engine"), + "alembic": ("INFO", "", "alembic"), + } + + for logger, (level, handlers, qualname) in loggers.items(): + section = f"logger_{logger}" + config.set_section_option(section, "level", level) + config.set_section_option(section, "handlers", handlers) + config.set_section_option(section, "qualname", qualname) # Configure console handler - alembic_cfg.set_section_option("handler_console", "class", "StreamHandler") - alembic_cfg.set_section_option("handler_console", "args", "(sys.stderr,)") - alembic_cfg.set_section_option("handler_console", "level", "NOTSET") - alembic_cfg.set_section_option("handler_console", "formatter", "generic") - - command.upgrade(alembic_cfg, self.db_version) - - def get_service_uri(self): + config.set_section_option("handler_console", "class", "StreamHandler") + config.set_section_option("handler_console", "args", "(sys.stderr,)") + config.set_section_option("handler_console", "level", "NOTSET") + config.set_section_option("handler_console", "formatter", "generic") + + @contextmanager + def session_scope(self) -> Session: + """ + Provide a transactional scope around a series of operations. + """ + session = self.Session() + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() + + @contextmanager + def connection_scope(self): + """ + Provide a scope for raw database connection usage. + """ + conn = self.connection_pool.getconn() + try: + yield conn + finally: + self.connection_pool.putconn(conn) + + @contextmanager + def cursor_scope(self): + """ + Provide a scope for cursor operations. + """ + with self.connection_scope() as conn: + with conn.cursor() as cursor: + yield cursor + + def close(self) -> None: + """ + Close all connections and clean up resources. + """ + if hasattr(self, "connection_pool"): + self.connection_pool.closeall() + if hasattr(self, "engine"): + self.engine.dispose() + + def __del__(self) -> None: + """ + Ensure resources are cleaned up when the instance is deleted. + """ + self.close() + + # Convenience methods for backward compatibility + def get_service_uri(self) -> str: return self.jdbc_url - def get_service_engine(self): + def get_service_engine(self) -> Engine: return self.engine - def get_service_session(self): + def get_service_session(self) -> Session: return self.Session() - def get_service_connection(self): - return self.engine.connect() + def get_connection(self): + """ + @deprecated Use connection_scope instead + """ + return self.connection_pool.getconn() + + def release_connection(self, conn) -> None: + """ + @deprecated Use connection_scope instead + """ + self.connection_pool.putconn(conn) diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 75fd14965..76d1dde61 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -2,12 +2,10 @@ from collections import defaultdict, deque from typing import List, Set, Type -import mpire from pottery import RedisDict from redis.client import Redis from common.models.tokens import Tokens -from common.services.postgresql_service import session_scope from common.utils.format_utils import bytes_to_hex_str from common.utils.module_loading import import_submodules from enumeration.record_level import RecordLevel @@ -23,8 +21,8 @@ exception_recorder = ExceptionRecorder() -def get_tokens_from_db(session): - with session_scope(session) as s: +def get_tokens_from_db(service): + with service.session_scope() as s: dict = {} result = s.query(Tokens).all() if result is not None: @@ -96,7 +94,7 @@ def __init__( self.resolved_job_classes = self.resolve_dependencies(self.required_job_classes) token_dict_from_db = defaultdict() if self.pg_service is not None: - token_dict_from_db = get_tokens_from_db(self.pg_service.get_service_session()) + token_dict_from_db = get_tokens_from_db(self.pg_service) if cache is None or cache == "memory": BaseJob.init_token_cache(token_dict_from_db) else: diff --git a/indexer/controller/scheduler/reorg_scheduler.py b/indexer/controller/scheduler/reorg_scheduler.py index e8b2c9698..f1c88c651 100644 --- a/indexer/controller/scheduler/reorg_scheduler.py +++ b/indexer/controller/scheduler/reorg_scheduler.py @@ -6,7 +6,6 @@ from redis.client import Redis from common.models.tokens import Tokens -from common.services.postgresql_service import session_scope from common.utils.format_utils import bytes_to_hex_str from common.utils.module_loading import import_submodules from indexer.jobs import FilterTransactionDataJob @@ -17,8 +16,8 @@ import_submodules("indexer.modules") -def get_tokens_from_db(session): - with session_scope(session) as s: +def get_tokens_from_db(service): + with service.session_scope() as s: dict = {} result = s.query(Tokens).all() if result is not None: @@ -69,7 +68,7 @@ def __init__( self.resolved_job_classes = self.resolve_dependencies(self.required_job_classes) token_dict_from_db = defaultdict() if self.pg_service is not None: - token_dict_from_db = get_tokens_from_db(self.pg_service.get_service_session()) + token_dict_from_db = get_tokens_from_db(self.pg_service) if cache is None or cache == "memory": BaseJob.init_token_cache(token_dict_from_db) else: diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 7c0033815..7968dd773 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -19,7 +19,7 @@ M_JOBS: int = int(os.environ.get("M_JOBS", 4)) M_TIMEOUT: int = int(os.environ.get("M_TIMEOUT", 30)) -M_SIZE: int = int(os.environ.get("M_SIZE", 1000)) +M_SIZE: int = int(os.environ.get("M_SIZE", 10)) class StreamController(BaseController): diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index 7fa79aaad..4d7edb562 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -1,9 +1,6 @@ import logging -import os -from datetime import datetime from typing import Type -from dateutil.tz import tzlocal from psycopg2.extras import execute_values from tqdm import tqdm @@ -15,10 +12,6 @@ COMMIT_BATCH_SIZE = 500 -from multiprocessing import RLock - -lock = RLock() - class TqdmExtraFormat(tqdm): """Provides both estimated and actual total time format parameters""" @@ -33,9 +26,6 @@ def format_dict(self): return d -M_LOCK_TIME: int = int(os.environ.get("M_LOCK_TIME", 20)) - - class PostgresItemExporter(BaseExporter): def __init__(self, service): self.service = service @@ -43,97 +33,82 @@ def __init__(self, service): self.sub_progress = None def export_items(self, items, **kwargs): - if lock.acquire(timeout=M_LOCK_TIME): - try: + # Initialize main progress bar + if kwargs.get("job_name"): + job_name = kwargs.get("job_name") + desc = f"{job_name}(PG)" + else: + desc = "Exporting items" + self.main_progress = TqdmExtraFormat( + total=len(items), + desc=desc.ljust(35), + unit="items", + position=0, + ncols=90, + bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}] Est: {total_time}", + ) + with self.service.cursor_scope() as cur: - start_time = datetime.now(tzlocal()) - - # Initialize main progress bar - if kwargs.get("job_name"): - job_name = kwargs.get("job_name") - desc = f"{job_name}(PG)" - else: - desc = "Exporting items" - self.main_progress = TqdmExtraFormat( - total=len(items), - desc=desc.ljust(35), - unit="items", - position=0, - ncols=90, - bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}] Est: {total_time}", - ) - - conn = self.service.get_conn() - try: - insert_stmt = "" - items_grouped_by_type = group_by_item_type(items) - tables = [] - - # Process each item type - for item_type in items_grouped_by_type.keys(): - item_group = items_grouped_by_type.get(item_type) - - if item_group: - pg_config = domain_model_mapping[item_type] - table = pg_config["table"] - do_update = pg_config["conflict_do_update"] - update_strategy = pg_config["update_strategy"] - converter = pg_config["converter"] - - # Initialize sub-progress bar for current table - self.sub_progress = TqdmExtraFormat( - total=len(item_group), - desc=f"Processing {table.__tablename__}".ljust(35), - unit="items", - position=1, - leave=False, - ncols=90, - bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", - ) - - cur = conn.cursor() - data = [] - - # Process items with progress tracking - for item in item_group: - converted_item = converter(table, item, do_update) - data.append(converted_item) - self.sub_progress.update(1) - self.main_progress.update(1) - - if data: - columns = list(data[0].keys()) - values = [tuple(d.values()) for d in data] - - insert_stmt = sql_insert_statement( - table, do_update, columns, where_clause=update_strategy - ) - - # Execute in batches with progress tracking - for i in range(0, len(values), COMMIT_BATCH_SIZE): - batch = values[i : i + COMMIT_BATCH_SIZE] - execute_values(cur, insert_stmt, batch) - conn.commit() - - tables.append(table.__tablename__) - self.sub_progress.close() - - except Exception as e: - logger.error(f"Error exporting items: {e}") - logger.error(f"{insert_stmt}") - raise e - finally: - self.service.release_conn(conn) - if self.main_progress: - self.main_progress.close() - if self.sub_progress: + try: + insert_stmt = "" + items_grouped_by_type = group_by_item_type(items) + tables = [] + + # Process each item type + for item_type in items_grouped_by_type.keys(): + item_group = items_grouped_by_type.get(item_type) + + if item_group: + pg_config = domain_model_mapping[item_type] + table = pg_config["table"] + do_update = pg_config["conflict_do_update"] + update_strategy = pg_config["update_strategy"] + converter = pg_config["converter"] + + # Initialize sub-progress bar for current table + self.sub_progress = TqdmExtraFormat( + total=len(item_group), + desc=f"Processing {table.__tablename__}".ljust(35), + unit="items", + position=1, + leave=False, + ncols=90, + bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", + ) + + data = [] + + # Process items with progress tracking + for item in item_group: + converted_item = converter(table, item, do_update) + data.append(converted_item) + self.sub_progress.update(1) + self.main_progress.update(1) + + if data: + columns = list(data[0].keys()) + values = [tuple(d.values()) for d in data] + + insert_stmt = sql_insert_statement(table, do_update, columns, where_clause=update_strategy) + + # Execute in batches with progress tracking + for i in range(0, len(values), COMMIT_BATCH_SIZE): + batch = values[i : i + COMMIT_BATCH_SIZE] + execute_values(cur, insert_stmt, batch) + cur.connection.commit() + + tables.append(table.__tablename__) self.sub_progress.close() - end_time = datetime.now(tzlocal()) + except Exception as e: + logger.error(f"Error exporting items: {e}") + logger.error(f"{insert_stmt}") + raise e finally: - lock.release() - else: - logger.error("Lock acquired but not released") + if self.main_progress: + self.main_progress.close() + if self.sub_progress: + self.sub_progress.close() def sql_insert_statement(model: Type[HemeraModel], do_update: bool, columns, where_clause=None): diff --git a/indexer/utils/exception_recorder.py b/indexer/utils/exception_recorder.py index c5ca47897..341fa0ab7 100644 --- a/indexer/utils/exception_recorder.py +++ b/indexer/utils/exception_recorder.py @@ -1,4 +1,3 @@ -import os import threading from queue import Queue @@ -9,13 +8,6 @@ LOG_BUFFER_SIZE = 5000 -from multiprocessing import RLock - -lock = RLock() - -M_LOCK_TIME: int = int(os.environ.get("M_LOCK_TIME", 20)) - - class ExceptionRecorder(object): _instance = None @@ -71,17 +63,9 @@ def _check_and_flush(self): self._flush_logs_to_db(logs) def _flush_logs_to_db(self, logs): - if lock.acquire(timeout=M_LOCK_TIME): - session = self._service.get_service_session() - + with self._service.session_scope() as session: try: statement = insert(ExceptionRecords).values(logs) session.execute(statement) - session.commit() except Exception as e: print(e) - raise e - finally: - session.close() - else: - print("failed to get lock, flush logs to db") \ No newline at end of file From f1ee4db4f9705b3afdd0d6437f6dd0e70efb00f2 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 16:26:45 +0800 Subject: [PATCH 12/52] a new pg service --- indexer/controller/stream_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 7968dd773..fd5d2c652 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -142,7 +142,7 @@ def _do_stream(self, start_block, end_block, steps, retry_errors, period_seconds tries_reset = False if not retry_errors or tries >= self.max_retries: logger.info(f"The number of retry is reached limit {self.max_retries}. Program will exit.") - exception_recorder.force_to_flush() + # exception_recorder.force_to_flush() raise e else: From a7f2f83c390611315db3195e596aed9fbe759c4f Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 16:29:08 +0800 Subject: [PATCH 13/52] a new pg service --- indexer/utils/exception_recorder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/indexer/utils/exception_recorder.py b/indexer/utils/exception_recorder.py index 341fa0ab7..b3a093768 100644 --- a/indexer/utils/exception_recorder.py +++ b/indexer/utils/exception_recorder.py @@ -67,5 +67,6 @@ def _flush_logs_to_db(self, logs): try: statement = insert(ExceptionRecords).values(logs) session.execute(statement) + session.commit() except Exception as e: print(e) From 7fb38f93b8b421d60bc24da694ff97edf6833810 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 16:37:08 +0800 Subject: [PATCH 14/52] a new pg service --- cli/reorg.py | 4 ++-- indexer/controller/reorg_controller.py | 6 +++--- indexer/controller/stream_controller.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cli/reorg.py b/cli/reorg.py index 894692a19..591dcbe5d 100644 --- a/cli/reorg.py +++ b/cli/reorg.py @@ -14,7 +14,7 @@ from indexer.utils.rpc_utils import pick_random_provider_uri from indexer.utils.thread_local_proxy import ThreadLocalProxy -exception_recorder = ExceptionRecorder() +# exception_recorder = ExceptionRecorder() @click.command(context_settings=dict(help_option_names=["-h", "--help"])) @@ -153,7 +153,7 @@ def reorg( if postgres_url: service = PostgreSQLService(postgres_url, db_version=db_version, init_schema=auto_upgrade_db) config = {"db_service": service} - exception_recorder.init_pg_service(service) + # exception_recorder.init_pg_service(service) else: logging.error("No postgres url provided. Exception recorder will not be useful.") exit(1) diff --git a/indexer/controller/reorg_controller.py b/indexer/controller/reorg_controller.py index 83e1291f3..6cd6f48b9 100644 --- a/indexer/controller/reorg_controller.py +++ b/indexer/controller/reorg_controller.py @@ -11,9 +11,9 @@ from common.utils.format_utils import hex_str_to_bytes from common.utils.web3_utils import build_web3 from indexer.controller.base_controller import BaseController -from indexer.utils.exception_recorder import ExceptionRecorder +# from indexer.utils.exception_recorder import ExceptionRecorder -exception_recorder = ExceptionRecorder() +# exception_recorder = ExceptionRecorder() class ReorgController(BaseController): @@ -128,7 +128,7 @@ def _do_fixing(self, fix_block, retry_errors=True): tries_reset = False if not retry_errors or tries >= self.max_retries: logging.info(f"The number of retry is reached limit {self.max_retries}. Program will exit.") - exception_recorder.force_to_flush() + # exception_recorder.force_to_flush() raise e else: logging.info("After 5 seconds will retry the job.") diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index fd5d2c652..e640bdbd3 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -13,7 +13,7 @@ from indexer.utils.limit_reader import LimitReader from indexer.utils.sync_recorder import BaseRecorder -exception_recorder = ExceptionRecorder() +# exception_recorder = ExceptionRecorder() logger = logging.getLogger(__name__) From d3e508df1095dd2bdb3fdec8f285b6b5dbfe9c8f Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 16:39:44 +0800 Subject: [PATCH 15/52] a new pg service --- indexer/controller/scheduler/job_scheduler.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 76d1dde61..5f452d13f 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -18,7 +18,7 @@ from indexer.utils.exception_recorder import ExceptionRecorder import_submodules("indexer.modules") -exception_recorder = ExceptionRecorder() +# exception_recorder = ExceptionRecorder() def get_tokens_from_db(service): @@ -263,14 +263,15 @@ def run_jobs(self, args): key = output_type.type() message = f"{output_type.type()} : {len(self.get_data_buff().get(output_type.type())) if self.get_data_buff().get(output_type.type()) else 0}" self.logger.info(f"{message}") - exception_recorder.log( - block_number=-1, dataclass=key, message_type="item_counter", message=message, level=RecordLevel.INFO - ) + # exception_recorder.log( + # block_number=-1, dataclass=key, message_type="item_counter", message=message, level=RecordLevel.INFO + #) except Exception as e: raise e finally: - exception_recorder.force_to_flush() + pass + # exception_recorder.force_to_flush() def resolve_dependencies(self, required_jobs: Set[Type[BaseJob]]) -> List[Type[BaseJob]]: sorted_order = [] From c01f0a14a1b20423ee485e4b868cc5e9fae4ec21 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 17:01:04 +0800 Subject: [PATCH 16/52] a new pg service --- indexer/exporters/postgres_item_exporter.py | 47 ++------------------- 1 file changed, 3 insertions(+), 44 deletions(-) diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index 4d7edb562..f5fa1039d 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -10,27 +10,12 @@ logger = logging.getLogger(__name__) -COMMIT_BATCH_SIZE = 500 - - -class TqdmExtraFormat(tqdm): - """Provides both estimated and actual total time format parameters""" - - @property - def format_dict(self): - d = super().format_dict - d.update( - total_time=self.format_interval(d["total"] / (d["n"] / d["elapsed"]) if d["elapsed"] and d["n"] else 0), - current_total_time=self.format_interval(d["elapsed"]), - ) - return d +COMMIT_BATCH_SIZE = 100 class PostgresItemExporter(BaseExporter): def __init__(self, service): self.service = service - self.main_progress = None - self.sub_progress = None def export_items(self, items, **kwargs): # Initialize main progress bar @@ -39,14 +24,6 @@ def export_items(self, items, **kwargs): desc = f"{job_name}(PG)" else: desc = "Exporting items" - self.main_progress = TqdmExtraFormat( - total=len(items), - desc=desc.ljust(35), - unit="items", - position=0, - ncols=90, - bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}] Est: {total_time}", - ) with self.service.cursor_scope() as cur: try: @@ -66,24 +43,11 @@ def export_items(self, items, **kwargs): converter = pg_config["converter"] # Initialize sub-progress bar for current table - self.sub_progress = TqdmExtraFormat( - total=len(item_group), - desc=f"Processing {table.__tablename__}".ljust(35), - unit="items", - position=1, - leave=False, - ncols=90, - bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", - ) - data = [] - # Process items with progress tracking for item in item_group: converted_item = converter(table, item, do_update) data.append(converted_item) - self.sub_progress.update(1) - self.main_progress.update(1) if data: columns = list(data[0].keys()) @@ -98,17 +62,12 @@ def export_items(self, items, **kwargs): cur.connection.commit() tables.append(table.__tablename__) - self.sub_progress.close() except Exception as e: logger.error(f"Error exporting items: {e}") logger.error(f"{insert_stmt}") - raise e - finally: - if self.main_progress: - self.main_progress.close() - if self.sub_progress: - self.sub_progress.close() + pass + # raise e def sql_insert_statement(model: Type[HemeraModel], do_update: bool, columns, where_clause=None): From 00b198f214990e66efd9a4de2e3c7d57ade39430 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 17:02:12 +0800 Subject: [PATCH 17/52] a new pg service --- indexer/exporters/postgres_item_exporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index f5fa1039d..1e1a0dad3 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -COMMIT_BATCH_SIZE = 100 +COMMIT_BATCH_SIZE = 1000 class PostgresItemExporter(BaseExporter): From 4a34f433d8143fcc8b3c96a293d645c1a1eaa66b Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 17:04:21 +0800 Subject: [PATCH 18/52] a new pg service --- indexer/controller/stream_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index e640bdbd3..4e159d27b 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -19,7 +19,7 @@ M_JOBS: int = int(os.environ.get("M_JOBS", 4)) M_TIMEOUT: int = int(os.environ.get("M_TIMEOUT", 30)) -M_SIZE: int = int(os.environ.get("M_SIZE", 10)) +M_SIZE: int = int(os.environ.get("M_SIZE", 100)) class StreamController(BaseController): From b4d36d6b32b9f0e756a7a6b99effa3b00ec67889 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Fri, 15 Nov 2024 18:15:08 +0800 Subject: [PATCH 19/52] a new pg service --- cli/reorg.py | 2 +- indexer/exporters/item_exporter.py | 2 +- indexer/exporters/postgres_item_exporter.py | 12 +++++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cli/reorg.py b/cli/reorg.py index 591dcbe5d..5a7df691d 100644 --- a/cli/reorg.py +++ b/cli/reorg.py @@ -179,7 +179,7 @@ def reorg( job_scheduler = ReorgScheduler( batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), batch_web3_debug_provider=ThreadLocalProxy(lambda: get_provider_from_uri(debug_provider_uri, batch=True)), - item_exporters=PostgresItemExporter(config["db_service"]), + item_exporters=PostgresItemExporter(postgres_url=postgres_url, db_version=db_version, init_schema=auto_upgrade_db), batch_size=batch_size, debug_batch_size=debug_batch_size, required_output_types=output_types, diff --git a/indexer/exporters/item_exporter.py b/indexer/exporters/item_exporter.py index f648bcb49..6c7d1f872 100644 --- a/indexer/exporters/item_exporter.py +++ b/indexer/exporters/item_exporter.py @@ -17,7 +17,7 @@ def create_item_exporter(output, config): if item_exporter_type == ItemExporterType.CONSOLE: item_exporter = ConsoleItemExporter() elif item_exporter_type == ItemExporterType.POSTGRES: - item_exporter = PostgresItemExporter(config["db_service"]) + item_exporter = PostgresItemExporter(postgres_url=config['db_service'].jdbc_url) elif item_exporter_type == ItemExporterType.JSONFILE: item_exporter = JSONFileItemExporter(output, config) elif item_exporter_type == ItemExporterType.CSVFILE: diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index 1e1a0dad3..914c3c77e 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -6,6 +6,7 @@ from common.converter.pg_converter import domain_model_mapping from common.models import HemeraModel +from common.services.postgresql_service import PostgreSQLService from indexer.exporters.base_exporter import BaseExporter, group_by_item_type logger = logging.getLogger(__name__) @@ -14,8 +15,11 @@ class PostgresItemExporter(BaseExporter): - def __init__(self, service): - self.service = service + def __init__(self, **service): + self.postgres_url = service['postgres_url'] + self.db_version = service.get('db_version') + self.init_schema = service.get('init_schema') + # self.service = service def export_items(self, items, **kwargs): # Initialize main progress bar @@ -24,7 +28,9 @@ def export_items(self, items, **kwargs): desc = f"{job_name}(PG)" else: desc = "Exporting items" - with self.service.cursor_scope() as cur: + service = PostgreSQLService(self.postgres_url, db_version=self.db_version, init_schema=self.init_schema) + + with service.cursor_scope() as cur: try: insert_stmt = "" From 532b5d7938139df1706d7e8f52228ee6e77eda67 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Fri, 15 Nov 2024 21:27:47 +0800 Subject: [PATCH 20/52] make job execute concurrently --- cli/stream.py | 13 +- indexer/controller/scheduler/job_scheduler.py | 7 +- indexer/executors/concurrent_job_executor.py | 120 ++++++++++++++++++ 3 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 indexer/executors/concurrent_job_executor.py diff --git a/cli/stream.py b/cli/stream.py index 4b04b60c3..08eee5f28 100644 --- a/cli/stream.py +++ b/cli/stream.py @@ -183,13 +183,22 @@ def wrapper(*args, **kwargs): envvar="BLOCK_BATCH_SIZE", help="How many blocks to batch in single sync round", ) +@click.option( + "-P", + "--max-processors", + default=1, + show_default=True, + type=int, + help="How many sync round to concurrently execute.", + envvar="MAX_PROCESSOR", +) @click.option( "-w", "--max-workers", default=5, show_default=True, type=int, - help="The number of workers", + help="The number of workers during a request to rpc.", envvar="MAX_WORKERS", ) @click.option( @@ -325,6 +334,7 @@ def stream( batch_size=10, debug_batch_size=1, block_batch_size=1, + max_processors=1, max_workers=5, log_file=None, pid_file=None, @@ -419,6 +429,7 @@ def stream( controller = StreamController( batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=False)), + max_processors=max_processors, job_scheduler=job_scheduler, sync_recorder=create_recorder(sync_recorder, config), limit_reader=create_limit_reader( diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 5f452d13f..6dd66d8a7 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -252,20 +252,19 @@ def instantiate_jobs(self): ) self.jobs.append(check_job) - def run_jobs(self, args): + def run_jobs(self, start_block, end_block): self.clear_data_buff() try: - start_block = args["start_block"] - end_block = args["end_block"] for job in self.jobs: job.run(start_block=start_block, end_block=end_block) + for output_type in self.required_output_types: key = output_type.type() message = f"{output_type.type()} : {len(self.get_data_buff().get(output_type.type())) if self.get_data_buff().get(output_type.type()) else 0}" self.logger.info(f"{message}") # exception_recorder.log( # block_number=-1, dataclass=key, message_type="item_counter", message=message, level=RecordLevel.INFO - #) + # ) except Exception as e: raise e diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py new file mode 100644 index 000000000..fd631de30 --- /dev/null +++ b/indexer/executors/concurrent_job_executor.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Time : 2024/11/14 下午3:21 +Author : xuzh +Project : hemera_indexer +""" +import logging +from queue import Empty, Queue +from threading import Event, Semaphore, Thread + +from mpire import WorkerPool + + +class ConcurrentJobExecutor: + + def __init__(self, max_processors=1, call_back=None, error_callback=None): + self.pool = WorkerPool(n_jobs=max_processors, use_dill=True) + self.call_back = call_back + self.error_callback = error_callback + + self.running_tasks = {} + self.results = {} + self.task_count = 0 + + self.processors = {f"processor-{i}": True for i in range(max_processors)} + self.processor_semaphore = Semaphore(max_processors) + self.shutdown_event = Event() + + self.task_queue = Queue() + + self.task_processor = Thread(target=self._process_tasks) + self.task_processor.daemon = True + self.task_processor.start() + + self.logger = logging.getLogger(__name__) + + def _find_available_processor(self): + for processor in self.processors.keys(): + if self.processors[processor]: + return processor + return None + + def _allocate_processor(self): + processor = self._find_available_processor() + if processor: + self.processors[processor] = False + return processor + return None + + def _release_processor(self, processor): + self.processors[processor] = True + self.processor_semaphore.release() + + def _process_tasks(self): + while not self.shutdown_event.is_set(): + try: + try: + task = self.task_queue.get(timeout=1) + except Empty: + continue + + try: + processor = self._allocate_processor() + + self.pool.apply_async( + task["func"], + task["args"], + task["kwargs"], + callback=lambda result, p=processor, param=task["kwargs"]: self._handle_task_completion( + result, p, param + ), + error_callback=lambda error, p=processor, param=task["kwargs"]: self._handle_task_completion( + error, p, param + ), + ) + except Exception as e: + self.logger.error(f"Error processing task: {str(e)}") + self.processor_semaphore.release() + + except Exception as e: + self.logger.error(f"Unexpected error in task processor: {e}") + + def _handle_task_completion(self, result, processor, param): + self.logger.info(f"Task with parameter:{param} completed successfully by processor: {processor}") + self._release_processor(processor) + + if self.call_back: + param["processor"] = processor + self.call_back(**param) + + def _handle_task_failed(self, error, processor, param): + self.logger.error(f"with parameter:{param} failed in processor:{processor} error: {error}") + self._release_processor(processor) + + if self.error_callback: + try: + param["processor"] = processor + self.error_callback(**param) + except Exception as e: + self.logger.error(f"An exception occurred while execute call back function. error: {e}") + + raise error + + def submit(self, func, *args, **kwargs): + self.processor_semaphore.acquire() + + try: + task = {"func": func, "args": args, "kwargs": kwargs} + self.task_queue.put(task) + + except Exception as e: + self.processor_semaphore.release() + raise e + + def __exit__(self, exc_type, exc_val, exc_tb): + self.shutdown_event.set() + self.task_processor.join() + self.pool.terminate() + self.pool.join() From dca7472678699ac4684c2dd314e0b5f52b86aabb Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Fri, 15 Nov 2024 21:28:02 +0800 Subject: [PATCH 21/52] make format --- cli/reorg.py | 4 +- indexer/controller/reorg_controller.py | 1 + indexer/controller/stream_controller.py | 120 ++++++++++---------- indexer/exporters/item_exporter.py | 2 +- indexer/exporters/postgres_item_exporter.py | 6 +- indexer/utils/sync_recorder.py | 2 - 6 files changed, 68 insertions(+), 67 deletions(-) diff --git a/cli/reorg.py b/cli/reorg.py index 5a7df691d..b600524a5 100644 --- a/cli/reorg.py +++ b/cli/reorg.py @@ -179,7 +179,9 @@ def reorg( job_scheduler = ReorgScheduler( batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), batch_web3_debug_provider=ThreadLocalProxy(lambda: get_provider_from_uri(debug_provider_uri, batch=True)), - item_exporters=PostgresItemExporter(postgres_url=postgres_url, db_version=db_version, init_schema=auto_upgrade_db), + item_exporters=PostgresItemExporter( + postgres_url=postgres_url, db_version=db_version, init_schema=auto_upgrade_db + ), batch_size=batch_size, debug_batch_size=debug_batch_size, required_output_types=output_types, diff --git a/indexer/controller/reorg_controller.py b/indexer/controller/reorg_controller.py index 6cd6f48b9..816aff5d5 100644 --- a/indexer/controller/reorg_controller.py +++ b/indexer/controller/reorg_controller.py @@ -11,6 +11,7 @@ from common.utils.format_utils import hex_str_to_bytes from common.utils.web3_utils import build_web3 from indexer.controller.base_controller import BaseController + # from indexer.utils.exception_recorder import ExceptionRecorder # exception_recorder = ExceptionRecorder() diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 4e159d27b..f80596324 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -9,6 +9,7 @@ from common.utils.web3_utils import build_web3 from indexer.controller.base_controller import BaseController from indexer.controller.scheduler.job_scheduler import JobScheduler +from indexer.executors.concurrent_job_executor import ConcurrentJobExecutor from indexer.utils.exception_recorder import ExceptionRecorder from indexer.utils.limit_reader import LimitReader from indexer.utils.sync_recorder import BaseRecorder @@ -27,6 +28,7 @@ class StreamController(BaseController): def __init__( self, batch_web3_provider, + max_processors, sync_recorder: BaseRecorder, job_scheduler: JobScheduler, limit_reader: LimitReader, @@ -36,8 +38,9 @@ def __init__( _manager=None, ): self.entity_types = 1 - self.sync_recorder = sync_recorder self.web3 = build_web3(batch_web3_provider) + self.job_executor = ConcurrentJobExecutor(max_processors=max_processors) + self.sync_recorder = sync_recorder self.job_scheduler = job_scheduler self.limit_reader = limit_reader self.max_retries = max_retries @@ -58,38 +61,19 @@ def action( logger.info("Creating pid file {}".format(pid_file)) write_to_file(pid_file, str(os.getpid())) - self._do_stream(start_block, end_block, block_batch_size, retry_errors, period_seconds) - - finally: - if pid_file is not None: - logger.info("Deleting pid file {}".format(pid_file)) - delete_file(pid_file) - - def _shutdown(self): - pass - - def split_blocks(self, start_block, end_block, step): - blocks = [] - for i in range(start_block, end_block + 1, step): - blocks.append([{"start_block": i, "end_block": min(i + step - 1, end_block)}]) - return blocks + last_synced_block = self.sync_recorder.get_last_synced_block() - def _do_stream(self, start_block, end_block, steps, retry_errors, period_seconds): - last_synced_block = self.sync_recorder.get_last_synced_block() - if start_block is not None: - if ( - not self.retry_from_record - or last_synced_block < start_block - or (end_block is not None and last_synced_block > end_block) - ): - last_synced_block = start_block - 1 + if start_block is not None: + if ( + not self.retry_from_record + or last_synced_block < start_block + or (end_block is not None and last_synced_block > end_block) + ): + last_synced_block = start_block - 1 - tries, tries_reset = 0, True - while True and (end_block is None or last_synced_block < end_block): - synced_blocks = 0 + while True and (end_block is None or last_synced_block < end_block): + synced_blocks = 0 - try: - tries_reset = True current_block = self.limit_reader.get_current_block_number() if current_block is None: raise FastShutdownError( @@ -97,7 +81,9 @@ def _do_stream(self, start_block, end_block, steps, retry_errors, period_seconds "If you're using PGLimitReader, please confirm blocks table has one record at least." ) - target_block = self._calculate_target_block(current_block, last_synced_block, end_block, steps) + target_block = self._calculate_target_block( + current_block, last_synced_block, end_block, block_batch_size + ) synced_blocks = max(target_block - last_synced_block, 0) logger.info( @@ -107,17 +93,39 @@ def _do_stream(self, start_block, end_block, steps, retry_errors, period_seconds ) if synced_blocks != 0: - # ETL program's main logic - splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) - - with mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) as pool: - pool.map(func=self.job_scheduler.run_jobs, iterable_of_args=splits, task_timeout=M_TIMEOUT) - # self.job_scheduler.run_jobs(last_synced_block + 1, target_block) + # submit job and concurrent running + self.job_executor.submit(self._do_stream, start_block=last_synced_block + 1, end_block=target_block) logger.info("Writing last synced block {}".format(target_block)) self.sync_recorder.set_last_synced_block(target_block) last_synced_block = target_block + if synced_blocks <= 0: + logger.info("Nothing to sync. Sleeping for {} seconds...".format(period_seconds)) + time.sleep(period_seconds) + + finally: + if pid_file is not None: + logger.info("Deleting pid file {}".format(pid_file)) + delete_file(pid_file) + + def _shutdown(self): + pass + + def split_blocks(self, start_block, end_block, step): + blocks = [] + for i in range(start_block, end_block + 1, step): + blocks.append([{"start_block": i, "end_block": min(i + step - 1, end_block)}]) + return blocks + + def _do_stream(self, start_block, end_block): + + for retry in range(self.max_retries): + try: + # ETL program's main logic + self.job_scheduler.run_jobs(start_block, end_block) + return + except HemeraBaseException as e: logger.error(f"An rpc response exception occurred while syncing block data. error: {e}") if e.crashable: @@ -125,35 +133,20 @@ def _do_stream(self, start_block, end_block, steps, retry_errors, period_seconds raise e if e.retriable: - tries += 1 - tries_reset = False - if tries >= self.max_retries: - logger.info(f"The number of retry is reached limit {self.max_retries}. Program will exit.") - raise e - else: - logger.info(f"No: {tries} retry is about to start.") + logger.info(f"No: {retry} retry is about to start.") else: logger.error("Mission will not retry, and exit immediately.") raise e except Exception as e: - logger.error("An exception occurred while syncing block data.") - tries += 1 - tries_reset = False - if not retry_errors or tries >= self.max_retries: - logger.info(f"The number of retry is reached limit {self.max_retries}. Program will exit.") - # exception_recorder.force_to_flush() - raise e + logger.error(f"An unknown exception occurred while syncing block data. error: {e}") + raise e - else: - logger.info(f"No: {tries} retry is about to start.") - finally: - if tries_reset: - tries = 0 - - if synced_blocks <= 0: - logger.info("Nothing to sync. Sleeping for {} seconds...".format(period_seconds)) - time.sleep(period_seconds) + logger.info(f"The number of retry is reached limit {self.max_retries}. Program will exit.") + raise FastShutdownError( + f"The job with parameters start_block:{start_block}, end_block:{end_block}" + f"can't be automatically resumed after reached out limit of retries. Program will exit." + ) def _get_current_block_number(self): return int(self.web3.eth.block_number) @@ -162,3 +155,10 @@ def _calculate_target_block(self, current_block, last_synced_block, end_block, s target_block = min(current_block - self.delay, last_synced_block + steps) target_block = min(target_block, end_block) if end_block is not None else target_block return target_block + + def handle_success(self, processor: str, start_block: int, end_block: int): + # self.sync_recorder.set_last_synced_block(target_block) + pass + + def handle_failure(self, processor: str, start_block: int, end_block: int): + pass diff --git a/indexer/exporters/item_exporter.py b/indexer/exporters/item_exporter.py index 6c7d1f872..f5cc515a1 100644 --- a/indexer/exporters/item_exporter.py +++ b/indexer/exporters/item_exporter.py @@ -17,7 +17,7 @@ def create_item_exporter(output, config): if item_exporter_type == ItemExporterType.CONSOLE: item_exporter = ConsoleItemExporter() elif item_exporter_type == ItemExporterType.POSTGRES: - item_exporter = PostgresItemExporter(postgres_url=config['db_service'].jdbc_url) + item_exporter = PostgresItemExporter(postgres_url=config["db_service"].jdbc_url) elif item_exporter_type == ItemExporterType.JSONFILE: item_exporter = JSONFileItemExporter(output, config) elif item_exporter_type == ItemExporterType.CSVFILE: diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index 914c3c77e..ec4133d3d 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -16,9 +16,9 @@ class PostgresItemExporter(BaseExporter): def __init__(self, **service): - self.postgres_url = service['postgres_url'] - self.db_version = service.get('db_version') - self.init_schema = service.get('init_schema') + self.postgres_url = service["postgres_url"] + self.db_version = service.get("db_version") + self.init_schema = service.get("init_schema") # self.service = service def export_items(self, items, **kwargs): diff --git a/indexer/utils/sync_recorder.py b/indexer/utils/sync_recorder.py index bfefb883c..57189ae4c 100644 --- a/indexer/utils/sync_recorder.py +++ b/indexer/utils/sync_recorder.py @@ -62,7 +62,6 @@ def set_last_synced_block(self, last_synced_block): session.execute(statement) session.commit() except Exception as e: - print(e) raise e finally: session.close() @@ -72,7 +71,6 @@ def get_last_synced_block(self): try: result = session.query(SyncRecord.last_block_number).filter(SyncRecord.mission_sign == self.key).scalar() except Exception as e: - print(e) raise e finally: session.close() From 3148a37004626176a9d870e036654f0cd6a6da39 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Sat, 16 Nov 2024 10:13:15 +0800 Subject: [PATCH 22/52] a new pg service --- common/services/postgresql_service.py | 19 +++++++++++-------- indexer/exporters/postgres_item_exporter.py | 3 ++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/common/services/postgresql_service.py b/common/services/postgresql_service.py index fbc540616..269a56472 100644 --- a/common/services/postgresql_service.py +++ b/common/services/postgresql_service.py @@ -10,7 +10,7 @@ from sqlalchemy.engine import Engine from sqlalchemy.orm import Session, sessionmaker from sqlalchemy.pool import QueuePool - +from multiprocessing import current_process class PostgreSQLService: """ @@ -19,19 +19,21 @@ class PostgreSQLService: """ _instances: dict = {} - _initialized: set = set() + _initialized: dict = {} _lock: threading.Lock = threading.Lock() def __new__(cls, jdbc_url: str, *args, **kwargs) -> "PostgreSQLService": """ Ensures only one instance exists per JDBC URL. """ - if jdbc_url not in cls._instances: + p_name = current_process().name + + if (p_name, jdbc_url) not in cls._instances: with cls._lock: - if jdbc_url not in cls._instances: + if (p_name, jdbc_url) not in cls._instances: instance = super().__new__(cls) - cls._instances[jdbc_url] = instance - return cls._instances[jdbc_url] + cls._instances[(p_name, jdbc_url)] = instance + return cls._instances[(p_name, jdbc_url)] def __init__( self, @@ -50,7 +52,8 @@ def __init__( """ Initialize the PostgreSQL service with connection pooling. """ - if jdbc_url in self._initialized: + p_name = current_process().name + if (p_name, jdbc_url) in self._initialized: return self.jdbc_url: str = jdbc_url @@ -92,7 +95,7 @@ def __init__( if init_schema: self._init_schema(script_location) - self._initialized.add(jdbc_url) + self._initialized[(p_name, jdbc_url)] =True def _init_schema(self, script_location: str) -> None: """ diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index ec4133d3d..c4a1f2d72 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -12,6 +12,7 @@ logger = logging.getLogger(__name__) COMMIT_BATCH_SIZE = 1000 +from multiprocessing import current_process class PostgresItemExporter(BaseExporter): @@ -29,7 +30,7 @@ def export_items(self, items, **kwargs): else: desc = "Exporting items" service = PostgreSQLService(self.postgres_url, db_version=self.db_version, init_schema=self.init_schema) - + print(current_process().name, service) with service.cursor_scope() as cur: try: From 44356fd7decbad01340fd5c878c3dae587926b07 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Sat, 16 Nov 2024 13:18:06 +0800 Subject: [PATCH 23/52] a new pg service --- common/services/postgresql_service.py | 5 +++-- indexer/controller/stream_controller.py | 9 ++++++--- indexer/executors/concurrent_job_executor.py | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/common/services/postgresql_service.py b/common/services/postgresql_service.py index 269a56472..c878cf7fd 100644 --- a/common/services/postgresql_service.py +++ b/common/services/postgresql_service.py @@ -1,6 +1,7 @@ import os import threading from contextlib import contextmanager +from multiprocessing import current_process from typing import Optional from alembic import command @@ -10,7 +11,7 @@ from sqlalchemy.engine import Engine from sqlalchemy.orm import Session, sessionmaker from sqlalchemy.pool import QueuePool -from multiprocessing import current_process + class PostgreSQLService: """ @@ -95,7 +96,7 @@ def __init__( if init_schema: self._init_schema(script_location) - self._initialized[(p_name, jdbc_url)] =True + self._initialized[(p_name, jdbc_url)] = True def _init_schema(self, script_location: str) -> None: """ diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index f80596324..8b172edb1 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -94,8 +94,11 @@ def action( if synced_blocks != 0: # submit job and concurrent running - self.job_executor.submit(self._do_stream, start_block=last_synced_block + 1, end_block=target_block) - + # self.job_executor.submit(self._do_stream, start_block=last_synced_block + 1, end_block=target_block) + splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) + with mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) as pool: + pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) + # self.job_scheduler.run_jobs(last_synced_block + 1, target_block) logger.info("Writing last synced block {}".format(target_block)) self.sync_recorder.set_last_synced_block(target_block) last_synced_block = target_block @@ -115,7 +118,7 @@ def _shutdown(self): def split_blocks(self, start_block, end_block, step): blocks = [] for i in range(start_block, end_block + 1, step): - blocks.append([{"start_block": i, "end_block": min(i + step - 1, end_block)}]) + blocks.append((i, min(i + step - 1, end_block))) return blocks def _do_stream(self, start_block, end_block): diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index fd631de30..cfef3aec3 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -63,7 +63,7 @@ def _process_tasks(self): try: processor = self._allocate_processor() - self.pool.apply_async( + self.pool.apply( task["func"], task["args"], task["kwargs"], From 8bbc88ad18e3a7b122e78ed9d03c7a9a0488bf93 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Mon, 18 Nov 2024 13:30:28 +0800 Subject: [PATCH 24/52] a new pg service --- indexer/controller/stream_controller.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 8b172edb1..8549842e7 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -46,6 +46,7 @@ def __init__( self.max_retries = max_retries self.retry_from_record = retry_from_record self.delay = delay + self.pool = mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) def action( self, @@ -96,8 +97,8 @@ def action( # submit job and concurrent running # self.job_executor.submit(self._do_stream, start_block=last_synced_block + 1, end_block=target_block) splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) - with mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) as pool: - pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) + # with mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) as pool: + self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) # self.job_scheduler.run_jobs(last_synced_block + 1, target_block) logger.info("Writing last synced block {}".format(target_block)) self.sync_recorder.set_last_synced_block(target_block) @@ -165,3 +166,9 @@ def handle_success(self, processor: str, start_block: int, end_block: int): def handle_failure(self, processor: str, start_block: int, end_block: int): pass + + def __exit__(self, exc_type, exc_val, exc_tb): + try: + self.pool.terminate() + except Exception: + pass From 6951e469c8b66416ac1c7836f47df7b86d8f96d3 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Mon, 18 Nov 2024 17:40:56 +0800 Subject: [PATCH 25/52] a new pg service --- common/services/postgresql_service.py | 8 +++---- indexer/controller/stream_controller.py | 25 ++++++++++++++++---- indexer/executors/batch_work_executor.py | 2 +- indexer/executors/concurrent_job_executor.py | 2 +- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/common/services/postgresql_service.py b/common/services/postgresql_service.py index c878cf7fd..68db3486f 100644 --- a/common/services/postgresql_service.py +++ b/common/services/postgresql_service.py @@ -1,8 +1,6 @@ -import os import threading from contextlib import contextmanager from multiprocessing import current_process -from typing import Optional from alembic import command from alembic.config import Config @@ -25,7 +23,7 @@ class PostgreSQLService: def __new__(cls, jdbc_url: str, *args, **kwargs) -> "PostgreSQLService": """ - Ensures only one instance exists per JDBC URL. + Ensures only one instance exists per progress and JDBC URL. """ p_name = current_process().name @@ -39,8 +37,8 @@ def __new__(cls, jdbc_url: str, *args, **kwargs) -> "PostgreSQLService": def __init__( self, jdbc_url: str, - min_connections: int = 5, - max_connections: int = 20, + min_connections: int = 2, + max_connections: int = 10, pool_size: int = 10, max_overflow: int = 10, pool_timeout: int = 30, diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 8549842e7..e62661f7d 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) M_JOBS: int = int(os.environ.get("M_JOBS", 4)) -M_TIMEOUT: int = int(os.environ.get("M_TIMEOUT", 30)) +M_TIMEOUT: int = int(os.environ.get("M_TIMEOUT", 100)) M_SIZE: int = int(os.environ.get("M_SIZE", 100)) @@ -58,6 +58,12 @@ def action( pid_file=None, ): try: + import cProfile + import pstats + + profiler = cProfile.Profile() + profiler.enable() + if pid_file is not None: logger.info("Creating pid file {}".format(pid_file)) write_to_file(pid_file, str(os.getpid())) @@ -96,10 +102,10 @@ def action( if synced_blocks != 0: # submit job and concurrent running # self.job_executor.submit(self._do_stream, start_block=last_synced_block + 1, end_block=target_block) - splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) + # splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) # with mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) as pool: - self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) - # self.job_scheduler.run_jobs(last_synced_block + 1, target_block) + # self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) + self._do_stream(start_block, end_block) logger.info("Writing last synced block {}".format(target_block)) self.sync_recorder.set_last_synced_block(target_block) last_synced_block = target_block @@ -113,6 +119,17 @@ def action( logger.info("Deleting pid file {}".format(pid_file)) delete_file(pid_file) + profiler.disable() + stats = pstats.Stats(profiler) + # 按累计时间排序 + stats.sort_stats("cumulative") + # 保存到文件 + stats.dump_stats("output.prof") # 二进制格式 + # 保存可读文本 + with open("output.txt", "w") as f: + stats.stream = f + stats.print_stats() + def _shutdown(self): pass diff --git a/indexer/executors/batch_work_executor.py b/indexer/executors/batch_work_executor.py index 135c790d6..6e7b2b618 100644 --- a/indexer/executors/batch_work_executor.py +++ b/indexer/executors/batch_work_executor.py @@ -40,7 +40,7 @@ def __init__( self.max_workers = max_workers # Using bounded executor prevents unlimited queue growth # and allows monitoring in-progress futures and failing fast in case of errors. - self.executor = BoundedExecutor(1, self.max_workers) + self.executor = BoundedExecutor(100, self.max_workers) self._futures = [] self.retry_exceptions = retry_exceptions self.max_retries = max_retries diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index cfef3aec3..fd631de30 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -63,7 +63,7 @@ def _process_tasks(self): try: processor = self._allocate_processor() - self.pool.apply( + self.pool.apply_async( task["func"], task["args"], task["kwargs"], From 6c08780c7ad0974101c7350c4d9785a0071690f2 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Mon, 18 Nov 2024 17:49:09 +0800 Subject: [PATCH 26/52] a new pg service --- indexer/controller/stream_controller.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index e62661f7d..9e39a470f 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -102,10 +102,10 @@ def action( if synced_blocks != 0: # submit job and concurrent running # self.job_executor.submit(self._do_stream, start_block=last_synced_block + 1, end_block=target_block) - # splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) - # with mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) as pool: - # self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) - self._do_stream(start_block, end_block) + splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) + with mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) as pool: + self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) + # self._do_stream(start_block, end_block) logger.info("Writing last synced block {}".format(target_block)) self.sync_recorder.set_last_synced_block(target_block) last_synced_block = target_block From c01c261384e8914d77f7b5fa16273acecd47b6a2 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Mon, 18 Nov 2024 17:56:23 +0800 Subject: [PATCH 27/52] a new pg service --- indexer/executors/batch_work_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexer/executors/batch_work_executor.py b/indexer/executors/batch_work_executor.py index 6e7b2b618..a03f81c55 100644 --- a/indexer/executors/batch_work_executor.py +++ b/indexer/executors/batch_work_executor.py @@ -40,7 +40,7 @@ def __init__( self.max_workers = max_workers # Using bounded executor prevents unlimited queue growth # and allows monitoring in-progress futures and failing fast in case of errors. - self.executor = BoundedExecutor(100, self.max_workers) + self.executor = BoundedExecutor(50, self.max_workers) self._futures = [] self.retry_exceptions = retry_exceptions self.max_retries = max_retries From 51be78afb067fc1dc6e44927576533865262edd6 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Mon, 18 Nov 2024 18:00:01 +0800 Subject: [PATCH 28/52] a new pg service --- indexer/executors/batch_work_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexer/executors/batch_work_executor.py b/indexer/executors/batch_work_executor.py index a03f81c55..e000db997 100644 --- a/indexer/executors/batch_work_executor.py +++ b/indexer/executors/batch_work_executor.py @@ -40,7 +40,7 @@ def __init__( self.max_workers = max_workers # Using bounded executor prevents unlimited queue growth # and allows monitoring in-progress futures and failing fast in case of errors. - self.executor = BoundedExecutor(50, self.max_workers) + self.executor = BoundedExecutor(10, self.max_workers) self._futures = [] self.retry_exceptions = retry_exceptions self.max_retries = max_retries From 30f4d7a77682779b6555a58af9562967a9399a36 Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Mon, 18 Nov 2024 18:10:08 +0800 Subject: [PATCH 29/52] a new pg service --- indexer/controller/stream_controller.py | 3 +-- indexer/executors/batch_work_executor.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 9e39a470f..6101e4071 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -103,8 +103,7 @@ def action( # submit job and concurrent running # self.job_executor.submit(self._do_stream, start_block=last_synced_block + 1, end_block=target_block) splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) - with mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) as pool: - self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) + self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) # self._do_stream(start_block, end_block) logger.info("Writing last synced block {}".format(target_block)) self.sync_recorder.set_last_synced_block(target_block) diff --git a/indexer/executors/batch_work_executor.py b/indexer/executors/batch_work_executor.py index e000db997..6e7b2b618 100644 --- a/indexer/executors/batch_work_executor.py +++ b/indexer/executors/batch_work_executor.py @@ -40,7 +40,7 @@ def __init__( self.max_workers = max_workers # Using bounded executor prevents unlimited queue growth # and allows monitoring in-progress futures and failing fast in case of errors. - self.executor = BoundedExecutor(10, self.max_workers) + self.executor = BoundedExecutor(100, self.max_workers) self._futures = [] self.retry_exceptions = retry_exceptions self.max_retries = max_retries From d819f77965cd2168945f2a496478cac81532cbef Mon Sep 17 00:00:00 2001 From: will0x0909 Date: Mon, 18 Nov 2024 18:49:41 +0800 Subject: [PATCH 30/52] a new pg service --- indexer/controller/stream_controller.py | 32 ++++++++++++------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 6101e4071..28d91083d 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -58,12 +58,6 @@ def action( pid_file=None, ): try: - import cProfile - import pstats - - profiler = cProfile.Profile() - profiler.enable() - if pid_file is not None: logger.info("Creating pid file {}".format(pid_file)) write_to_file(pid_file, str(os.getpid())) @@ -118,17 +112,6 @@ def action( logger.info("Deleting pid file {}".format(pid_file)) delete_file(pid_file) - profiler.disable() - stats = pstats.Stats(profiler) - # 按累计时间排序 - stats.sort_stats("cumulative") - # 保存到文件 - stats.dump_stats("output.prof") # 二进制格式 - # 保存可读文本 - with open("output.txt", "w") as f: - stats.stream = f - stats.print_stats() - def _shutdown(self): pass @@ -139,11 +122,26 @@ def split_blocks(self, start_block, end_block, step): return blocks def _do_stream(self, start_block, end_block): + import cProfile + import pstats + + profiler = cProfile.Profile() + profiler.enable() for retry in range(self.max_retries): try: # ETL program's main logic self.job_scheduler.run_jobs(start_block, end_block) + profiler.disable() + stats = pstats.Stats(profiler) + # 按累计时间排序 + stats.sort_stats("cumulative") + # 保存到文件 + stats.dump_stats("output.prof") # 二进制格式 + # 保存可读文本 + with open("output.txt", "w") as f: + stats.stream = f + stats.print_stats() return except HemeraBaseException as e: From 4e011a2cbe7ad5be22d63f43cb1a4d5907b28a2e Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Tue, 19 Nov 2024 13:59:54 +0800 Subject: [PATCH 31/52] minimize runnable version --- cli/stream.py | 8 +- indexer/controller/scheduler/job_scheduler.py | 27 ++--- indexer/controller/stream_controller.py | 99 +++++++++---------- indexer/executors/concurrent_job_executor.py | 15 ++- indexer/jobs/base_job.py | 21 +++- indexer/jobs/export_blocks_job.py | 16 +-- .../jobs/export_transactions_and_logs_job.py | 10 +- 7 files changed, 101 insertions(+), 95 deletions(-) diff --git a/cli/stream.py b/cli/stream.py index 08eee5f28..adefae6d9 100644 --- a/cli/stream.py +++ b/cli/stream.py @@ -371,7 +371,7 @@ def stream( if postgres_url: service = PostgreSQLService(postgres_url, db_version=db_version, init_schema=auto_upgrade_db) - config["db_service"] = service + config["db_service"] = postgres_url exception_recorder.init_pg_service(service) else: logging.getLogger("ROOT").warning("No postgres url provided. Exception recorder will not be useful.") @@ -412,8 +412,8 @@ def stream( source_types = generate_dataclass_type_list_from_parameter(source_types, "source") job_scheduler = JobScheduler( - batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), - batch_web3_debug_provider=ThreadLocalProxy(lambda: get_provider_from_uri(debug_provider_uri, batch=True)), + web3_provider_uri=provider_uri, + web3_debug_provider_uri=debug_provider_uri, item_exporters=create_item_exporters(output, config), batch_size=batch_size, debug_batch_size=debug_batch_size, @@ -430,7 +430,7 @@ def stream( controller = StreamController( batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=False)), max_processors=max_processors, - job_scheduler=job_scheduler, + scheduled_jobs=job_scheduler.get_scheduled_jobs(), sync_recorder=create_recorder(sync_recorder, config), limit_reader=create_limit_reader( source_path, ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=False)) diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 6dd66d8a7..3d0803101 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -51,8 +51,8 @@ def get_source_job_type(source_path: str): class JobScheduler: def __init__( self, - batch_web3_provider, - batch_web3_debug_provider, + web3_provider_uri, + web3_debug_provider_uri, batch_size=100, debug_batch_size=1, max_workers=5, @@ -67,8 +67,8 @@ def __init__( ): self.logger = logging.getLogger(__name__) self.auto_reorg = auto_reorg - self.batch_web3_provider = batch_web3_provider - self.batch_web3_debug_provider = batch_web3_debug_provider + self.web3_provider_uri = web3_provider_uri + self.web3_debug_provider_uri = web3_debug_provider_uri self.item_exporters = item_exporters self.batch_size = batch_size self._is_multicall = multicall @@ -192,8 +192,8 @@ def instantiate_jobs(self): continue job = job_class( required_output_types=self.required_output_types, - batch_web3_provider=self.batch_web3_provider, - batch_web3_debug_provider=self.batch_web3_debug_provider, + web3_provider_uri=self.web3_provider_uri, + web3_debug_provider_uri=self.web3_debug_provider_uri, item_exporters=self.item_exporters, batch_size=self.batch_size, multicall=self._is_multicall, @@ -209,8 +209,8 @@ def instantiate_jobs(self): if ExportBlocksJob in self.resolved_job_classes: export_blocks_job = ExportBlocksJob( required_output_types=self.required_output_types, - batch_web3_provider=self.batch_web3_provider, - batch_web3_debug_provider=self.batch_web3_debug_provider, + web3_provider_uri=self.web3_provider_uri, + web3_debug_provider_uri=self.web3_debug_provider_uri, item_exporters=self.item_exporters, batch_size=self.batch_size, multicall=self._is_multicall, @@ -224,8 +224,8 @@ def instantiate_jobs(self): else: pg_source_job = PGSourceJob( required_output_types=self.required_output_types, - batch_web3_provider=self.batch_web3_provider, - batch_web3_debug_provider=self.batch_web3_debug_provider, + web3_provider_uri=self.web3_provider_uri, + web3_debug_provider_uri=self.web3_debug_provider_uri, item_exporters=self.item_exporters, batch_size=self.batch_size, multicall=self._is_multicall, @@ -240,8 +240,8 @@ def instantiate_jobs(self): if self.auto_reorg: check_job = CheckBlockConsensusJob( required_output_types=self.required_output_types, - batch_web3_provider=self.batch_web3_provider, - batch_web3_debug_provider=self.batch_web3_debug_provider, + web3_provider_uri=self.web3_provider_uri, + web3_debug_provider_uri=self.web3_debug_provider_uri, item_exporters=self.item_exporters, batch_size=self.batch_size, multicall=self._is_multicall, @@ -298,3 +298,6 @@ def resolve_dependencies(self, required_jobs: Set[Type[BaseJob]]) -> List[Type[B raise Exception("Dependency cycle detected") return sorted_order + + def get_scheduled_jobs(self): + return self.jobs diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 28d91083d..09e2716fb 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -1,6 +1,7 @@ import logging import os import time +from typing import List import mpire @@ -10,6 +11,7 @@ from indexer.controller.base_controller import BaseController from indexer.controller.scheduler.job_scheduler import JobScheduler from indexer.executors.concurrent_job_executor import ConcurrentJobExecutor +from indexer.jobs.base_job import BaseJob from indexer.utils.exception_recorder import ExceptionRecorder from indexer.utils.limit_reader import LimitReader from indexer.utils.sync_recorder import BaseRecorder @@ -30,7 +32,7 @@ def __init__( batch_web3_provider, max_processors, sync_recorder: BaseRecorder, - job_scheduler: JobScheduler, + scheduled_jobs: List[BaseJob], limit_reader: LimitReader, max_retries=1, retry_from_record=False, @@ -41,7 +43,7 @@ def __init__( self.web3 = build_web3(batch_web3_provider) self.job_executor = ConcurrentJobExecutor(max_processors=max_processors) self.sync_recorder = sync_recorder - self.job_scheduler = job_scheduler + self.scheduled_jobs = scheduled_jobs self.limit_reader = limit_reader self.max_retries = max_retries self.retry_from_record = retry_from_record @@ -95,9 +97,9 @@ def action( if synced_blocks != 0: # submit job and concurrent running - # self.job_executor.submit(self._do_stream, start_block=last_synced_block + 1, end_block=target_block) - splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) - self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) + self.job_executor.submit(do_stream, start_block=last_synced_block + 1, end_block=target_block) + # splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) + # self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) # self._do_stream(start_block, end_block) logger.info("Writing last synced block {}".format(target_block)) self.sync_recorder.set_last_synced_block(target_block) @@ -121,54 +123,6 @@ def split_blocks(self, start_block, end_block, step): blocks.append((i, min(i + step - 1, end_block))) return blocks - def _do_stream(self, start_block, end_block): - import cProfile - import pstats - - profiler = cProfile.Profile() - profiler.enable() - - for retry in range(self.max_retries): - try: - # ETL program's main logic - self.job_scheduler.run_jobs(start_block, end_block) - profiler.disable() - stats = pstats.Stats(profiler) - # 按累计时间排序 - stats.sort_stats("cumulative") - # 保存到文件 - stats.dump_stats("output.prof") # 二进制格式 - # 保存可读文本 - with open("output.txt", "w") as f: - stats.stream = f - stats.print_stats() - return - - except HemeraBaseException as e: - logger.error(f"An rpc response exception occurred while syncing block data. error: {e}") - if e.crashable: - logger.error("Mission will crash immediately.") - raise e - - if e.retriable: - logger.info(f"No: {retry} retry is about to start.") - else: - logger.error("Mission will not retry, and exit immediately.") - raise e - - except Exception as e: - logger.error(f"An unknown exception occurred while syncing block data. error: {e}") - raise e - - logger.info(f"The number of retry is reached limit {self.max_retries}. Program will exit.") - raise FastShutdownError( - f"The job with parameters start_block:{start_block}, end_block:{end_block}" - f"can't be automatically resumed after reached out limit of retries. Program will exit." - ) - - def _get_current_block_number(self): - return int(self.web3.eth.block_number) - def _calculate_target_block(self, current_block, last_synced_block, end_block, steps): target_block = min(current_block - self.delay, last_synced_block + steps) target_block = min(target_block, end_block) if end_block is not None else target_block @@ -186,3 +140,42 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.pool.terminate() except Exception: pass + + +def do_stream(jobs, start_block, end_block, max_retries): + for retry in range(max_retries): + try: + # ETL program's main logic + run_jobs(jobs, start_block, end_block) + + except HemeraBaseException as e: + logger.error(f"An rpc response exception occurred while syncing block data. error: {e}") + if e.crashable: + logger.error("Mission will crash immediately.") + raise e + + if e.retriable: + logger.info(f"No: {retry} retry is about to start.") + else: + logger.error("Mission will not retry, and exit immediately.") + raise e + + except Exception as e: + logger.error(f"An unknown exception occurred while syncing block data. error: {e}") + raise e + + logger.info(f"The number of retry is reached limit {self.max_retries}. Program will exit.") + raise FastShutdownError( + f"The job with parameters start_block:{start_block}, end_block:{end_block}" + f"can't be automatically resumed after reached out limit of retries. Program will exit." + ) + + +def run_jobs(jobs, start_block, end_block): + try: + for job in jobs: + job.run(start_block=start_block, end_block=end_block) + except Exception as e: + raise e + finally: + pass diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index fd631de30..b4375cebd 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -6,6 +6,7 @@ Project : hemera_indexer """ import logging +from collections import deque from queue import Empty, Queue from threading import Event, Semaphore, Thread @@ -15,7 +16,7 @@ class ConcurrentJobExecutor: def __init__(self, max_processors=1, call_back=None, error_callback=None): - self.pool = WorkerPool(n_jobs=max_processors, use_dill=True) + self.pool = WorkerPool(n_jobs=max_processors, use_dill=True, start_method="spawn") self.call_back = call_back self.error_callback = error_callback @@ -24,6 +25,7 @@ def __init__(self, max_processors=1, call_back=None, error_callback=None): self.task_count = 0 self.processors = {f"processor-{i}": True for i in range(max_processors)} + self.available_processors = deque(f"processor-{i}" for i in range(max_processors)) self.processor_semaphore = Semaphore(max_processors) self.shutdown_event = Event() @@ -35,15 +37,9 @@ def __init__(self, max_processors=1, call_back=None, error_callback=None): self.logger = logging.getLogger(__name__) - def _find_available_processor(self): - for processor in self.processors.keys(): - if self.processors[processor]: - return processor - return None - def _allocate_processor(self): - processor = self._find_available_processor() - if processor: + if len(self.available_processors) > 1: + processor = self.available_processors.popleft() self.processors[processor] = False return processor return None @@ -51,6 +47,7 @@ def _allocate_processor(self): def _release_processor(self, processor): self.processors[processor] = True self.processor_semaphore.release() + self.available_processors.append(processor) def _process_tasks(self): while not self.shutdown_event.is_set(): diff --git a/indexer/jobs/base_job.py b/indexer/jobs/base_job.py index a259fc8cc..b044a6be0 100644 --- a/indexer/jobs/base_job.py +++ b/indexer/jobs/base_job.py @@ -10,7 +10,9 @@ from common.utils.format_utils import to_snake_case from indexer.domain import Domain from indexer.domain.transaction import Transaction +from indexer.utils.provider import get_provider_from_uri from indexer.utils.reorg import should_reorg +from indexer.utils.thread_local_proxy import ThreadLocalProxy class BaseJobMeta(type): @@ -67,13 +69,15 @@ def __init__(self, **kwargs): self._required_output_types = kwargs["required_output_types"] self._item_exporters = kwargs["item_exporters"] - self._batch_web3_provider = kwargs["batch_web3_provider"] - self._web3 = Web3(Web3.HTTPProvider(self._batch_web3_provider.endpoint_uri)) - self.logger = logging.getLogger(self.__class__.__name__) + self._web3_provider_uri = kwargs["web3_provider_uri"] + self._web3_debug_provider_uri = kwargs["web3_debug_provider_uri"] + # self._batch_web3_provider = kwargs["batch_web3_provider"] + self._batch_size = kwargs["batch_size"] + self._max_workers = kwargs["max_workers"] self._is_batch = kwargs["batch_size"] > 1 if kwargs.get("batch_size") else False self._reorg = kwargs["reorg"] if kwargs.get("reorg") else False - self._chain_id = kwargs.get("chain_id") or (self._web3.eth.chain_id if self._batch_web3_provider else None) + self._chain_id = kwargs.get("chain_id", None) self._should_reorg = False self._should_reorg_type = set() @@ -103,7 +107,14 @@ def run(self, **kwargs): self._end() def _start(self, **kwargs): - pass + self.logger = logging.getLogger(self.__class__.__name__) + self._batch_web3_provider = ThreadLocalProxy(lambda: get_provider_from_uri(self._web3_provider_uri, batch=True)) + self._web3 = Web3(Web3.HTTPProvider(self._web3_provider_uri)) + self._chain_id = ( + (self._web3.eth.chain_id if self._batch_web3_provider else None) + if self._chain_id is None + else self._chain_id + ) def _pre_reorg(self, **kwargs): if self._service is None: diff --git a/indexer/jobs/export_blocks_job.py b/indexer/jobs/export_blocks_job.py index 781f6cb3a..7a45d2f0b 100644 --- a/indexer/jobs/export_blocks_job.py +++ b/indexer/jobs/export_blocks_job.py @@ -20,8 +20,6 @@ from indexer.utils.reorg import set_reorg_sign from indexer.utils.rpc_utils import rpc_response_batch_to_results -logger = logging.getLogger(__name__) - # Exports blocks and block number <-> timestamp mapping class ExportBlocksJob(BaseExportJob): @@ -32,17 +30,21 @@ class ExportBlocksJob(BaseExportJob): def __init__(self, **kwargs): super().__init__(**kwargs) - self._batch_work_executor = BatchWorkExecutor( - kwargs["batch_size"], - kwargs["max_workers"], - job_name=self.__class__.__name__, - ) self._is_batch = kwargs["batch_size"] > 1 self._filters = flatten(kwargs.get("filters", [])) self._is_filter = kwargs.get("is_filter", False) self._specification = AlwaysFalseSpecification() if self._is_filter else AlwaysTrueSpecification() self._reorg_jobs = kwargs.get("reorg_jobs", []) + def _start(self, **kwargs): + super()._start(**kwargs) + + self._batch_work_executor = BatchWorkExecutor( + self._batch_size, + self._max_workers, + job_name=self.__class__.__name__, + ) + def _pre_reorg(self, **kwargs): if self._service is None: raise FastShutdownError("PG Service is not set") diff --git a/indexer/jobs/export_transactions_and_logs_job.py b/indexer/jobs/export_transactions_and_logs_job.py index c520c9148..43dfff870 100644 --- a/indexer/jobs/export_transactions_and_logs_job.py +++ b/indexer/jobs/export_transactions_and_logs_job.py @@ -12,8 +12,6 @@ from indexer.utils.json_rpc_requests import generate_get_receipt_json_rpc from indexer.utils.rpc_utils import rpc_response_batch_to_results -logger = logging.getLogger(__name__) - # Exports transactions and logs class ExportTransactionsAndLogsJob(BaseExportJob): @@ -24,12 +22,14 @@ class ExportTransactionsAndLogsJob(BaseExportJob): def __init__(self, **kwargs): super().__init__(**kwargs) + def _start(self, **kwargs): + super()._start(**kwargs) + self._batch_work_executor = BatchWorkExecutor( - kwargs["batch_size"], - kwargs["max_workers"], + self._batch_size, + self._max_workers, job_name=self.__class__.__name__, ) - self._is_batch = kwargs["batch_size"] > 1 def _collect(self, **kwargs): From b78d713007f3264c94b13f87e796e3f4ee3a2371 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Thu, 21 Nov 2024 17:31:06 +0800 Subject: [PATCH 32/52] refactor for multi-processing --- cli/stream.py | 11 +- indexer/controller/scheduler/job_scheduler.py | 29 ++-- indexer/controller/stream_controller.py | 89 +++++++----- indexer/executors/concurrent_job_executor.py | 11 +- indexer/exporters/item_exporter.py | 2 +- indexer/exporters/postgres_item_exporter.py | 134 +++++++++++------- indexer/jobs/base_job.py | 62 ++++---- indexer/jobs/export_blocks_job.py | 26 +++- .../jobs/export_transactions_and_logs_job.py | 20 ++- indexer/utils/BufferService.py | 129 +++++++++++++++++ indexer/utils/sync_recorder.py | 5 +- 11 files changed, 365 insertions(+), 153 deletions(-) create mode 100644 indexer/utils/BufferService.py diff --git a/cli/stream.py b/cli/stream.py index adefae6d9..31edb5de5 100644 --- a/cli/stream.py +++ b/cli/stream.py @@ -356,6 +356,12 @@ def stream( debug_provider_uri = pick_random_provider_uri(debug_provider_uri) logging.getLogger("ROOT").info("Using provider " + provider_uri) logging.getLogger("ROOT").info("Using debug provider " + debug_provider_uri) + logging.getLogger("ROOT").info( + f"Indexer will run in {'multi' if max_processors > 1 else 'single'}-process mode " + f"{'with ' if max_processors > 1 else ''}" + f"{max_processors if max_processors > 1 else ''}" + f" {'processor' if max_processors > 1 else ''} " + ) # parameter logic checking if source_path: @@ -407,6 +413,7 @@ def stream( output_types = list( set(generate_dataclass_type_list_from_parameter(output_types, "output") + output_types_by_entity_type) ) + output_types.sort(key=lambda x: x.type()) if source_path and source_path.startswith("postgresql://"): source_types = generate_dataclass_type_list_from_parameter(source_types, "source") @@ -414,7 +421,6 @@ def stream( job_scheduler = JobScheduler( web3_provider_uri=provider_uri, web3_debug_provider_uri=debug_provider_uri, - item_exporters=create_item_exporters(output, config), batch_size=batch_size, debug_batch_size=debug_batch_size, max_workers=max_workers, @@ -424,6 +430,7 @@ def stream( cache=cache, auto_reorg=auto_reorg, multicall=multicall, + multiprocess=max_processors > 1, force_filter_mode=force_filter_mode, ) @@ -431,6 +438,8 @@ def stream( batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=False)), max_processors=max_processors, scheduled_jobs=job_scheduler.get_scheduled_jobs(), + item_exporters=create_item_exporters(output, config), + required_output_types=output_types, sync_recorder=create_recorder(sync_recorder, config), limit_reader=create_limit_reader( source_path, ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=False)) diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 3d0803101..3180395f7 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -6,6 +6,7 @@ from redis.client import Redis from common.models.tokens import Tokens +from common.services.postgresql_service import PostgreSQLService from common.utils.format_utils import bytes_to_hex_str from common.utils.module_loading import import_submodules from enumeration.record_level import RecordLevel @@ -57,11 +58,11 @@ def __init__( debug_batch_size=1, max_workers=5, config={}, - item_exporters=[ConsoleItemExporter()], required_output_types=[], required_source_types=[], cache="memory", multicall=None, + multiprocess=False, auto_reorg=True, force_filter_mode=False, ): @@ -69,9 +70,9 @@ def __init__( self.auto_reorg = auto_reorg self.web3_provider_uri = web3_provider_uri self.web3_debug_provider_uri = web3_debug_provider_uri - self.item_exporters = item_exporters self.batch_size = batch_size self._is_multicall = multicall + self._is_multiprocess = multiprocess self.debug_batch_size = debug_batch_size self.max_workers = max_workers self.config = config @@ -83,7 +84,8 @@ def __init__( self.job_classes = [] self.job_map = defaultdict(list) self.dependency_map = defaultdict(list) - self.pg_service = config.get("db_service") if "db_service" in config else None + service_url = config.get("db_service") if "db_service" in config else None + self.pg_service = PostgreSQLService(service_url) if service_url is not None else None self.discover_and_register_job_classes() self.required_job_classes, self.is_pipeline_filter = self.get_required_job_classes(required_output_types) @@ -194,9 +196,9 @@ def instantiate_jobs(self): required_output_types=self.required_output_types, web3_provider_uri=self.web3_provider_uri, web3_debug_provider_uri=self.web3_debug_provider_uri, - item_exporters=self.item_exporters, batch_size=self.batch_size, multicall=self._is_multicall, + multiprocess=self._is_multiprocess, debug_batch_size=self.debug_batch_size, max_workers=self.max_workers, config=self.config, @@ -211,9 +213,9 @@ def instantiate_jobs(self): required_output_types=self.required_output_types, web3_provider_uri=self.web3_provider_uri, web3_debug_provider_uri=self.web3_debug_provider_uri, - item_exporters=self.item_exporters, batch_size=self.batch_size, multicall=self._is_multicall, + multiprocess=self._is_multiprocess, debug_batch_size=self.debug_batch_size, max_workers=self.max_workers, config=self.config, @@ -226,9 +228,9 @@ def instantiate_jobs(self): required_output_types=self.required_output_types, web3_provider_uri=self.web3_provider_uri, web3_debug_provider_uri=self.web3_debug_provider_uri, - item_exporters=self.item_exporters, batch_size=self.batch_size, multicall=self._is_multicall, + multiprocess=self._is_multiprocess, debug_batch_size=self.debug_batch_size, max_workers=self.max_workers, config=self.config, @@ -237,21 +239,6 @@ def instantiate_jobs(self): ) self.jobs.insert(0, pg_source_job) - if self.auto_reorg: - check_job = CheckBlockConsensusJob( - required_output_types=self.required_output_types, - web3_provider_uri=self.web3_provider_uri, - web3_debug_provider_uri=self.web3_debug_provider_uri, - item_exporters=self.item_exporters, - batch_size=self.batch_size, - multicall=self._is_multicall, - debug_batch_size=self.debug_batch_size, - max_workers=self.max_workers, - config=self.config, - filters=filters, - ) - self.jobs.append(check_job) - def run_jobs(self, start_block, end_block): self.clear_data_buff() try: diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 09e2716fb..15f2bdf4e 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -3,15 +3,13 @@ import time from typing import List -import mpire - from common.utils.exception_control import FastShutdownError, HemeraBaseException from common.utils.file_utils import delete_file, write_to_file from common.utils.web3_utils import build_web3 from indexer.controller.base_controller import BaseController -from indexer.controller.scheduler.job_scheduler import JobScheduler from indexer.executors.concurrent_job_executor import ConcurrentJobExecutor from indexer.jobs.base_job import BaseJob +from indexer.utils.BufferService import BufferService from indexer.utils.exception_recorder import ExceptionRecorder from indexer.utils.limit_reader import LimitReader from indexer.utils.sync_recorder import BaseRecorder @@ -33,22 +31,31 @@ def __init__( max_processors, sync_recorder: BaseRecorder, scheduled_jobs: List[BaseJob], + item_exporters, + required_output_types, limit_reader: LimitReader, - max_retries=1, + max_retries=5, retry_from_record=False, delay=0, _manager=None, ): self.entity_types = 1 self.web3 = build_web3(batch_web3_provider) - self.job_executor = ConcurrentJobExecutor(max_processors=max_processors) + self.required_output_types = [output.type() for output in required_output_types] + self.buffer_service = BufferService( + item_exporters, self.required_output_types, export_workers=max_processors, block_size=100 + ) + self.job_executor = ( + ConcurrentJobExecutor(buffer_service=self.buffer_service, max_processors=max_processors) + if max_processors > 1 + else None + ) self.sync_recorder = sync_recorder self.scheduled_jobs = scheduled_jobs self.limit_reader = limit_reader self.max_retries = max_retries self.retry_from_record = retry_from_record self.delay = delay - self.pool = mpire.WorkerPool(n_jobs=M_JOBS, use_dill=True) def action( self, @@ -97,10 +104,23 @@ def action( if synced_blocks != 0: # submit job and concurrent running - self.job_executor.submit(do_stream, start_block=last_synced_block + 1, end_block=target_block) - # splits = self.split_blocks(last_synced_block + 1, target_block, M_SIZE) - # self.pool.map(func=self._do_stream, iterable_of_args=splits, task_timeout=M_TIMEOUT) - # self._do_stream(start_block, end_block) + if self.job_executor: + self.job_executor.submit( + run_jobs, + jobs=self.scheduled_jobs, + start_block=last_synced_block + 1, + end_block=target_block, + max_retries=self.max_retries, + ) + else: + export_data = run_jobs( + jobs=self.scheduled_jobs, + start_block=last_synced_block + 1, + end_block=target_block, + max_retries=self.max_retries, + ) + self.buffer_service.write(export_data) + logger.info("Writing last synced block {}".format(target_block)) self.sync_recorder.set_last_synced_block(target_block) last_synced_block = target_block @@ -128,28 +148,28 @@ def _calculate_target_block(self, current_block, last_synced_block, end_block, s target_block = min(target_block, end_block) if end_block is not None else target_block return target_block - def handle_success(self, processor: str, start_block: int, end_block: int): - # self.sync_recorder.set_last_synced_block(target_block) - pass - def handle_failure(self, processor: str, start_block: int, end_block: int): - pass +def run_jobs(jobs, start_block, end_block, max_retries, processor=None): + try: + jobs_export_data = {} + for job in jobs: + job_export_data = job_with_retires( + job, start_block=start_block, end_block=end_block, max_retries=max_retries, processor=processor + ) + jobs_export_data.update(job_export_data) + except Exception as e: + raise e - def __exit__(self, exc_type, exc_val, exc_tb): - try: - self.pool.terminate() - except Exception: - pass + return jobs_export_data -def do_stream(jobs, start_block, end_block, max_retries): +def job_with_retires(job, start_block, end_block, max_retries, processor=None): for retry in range(max_retries): try: - # ETL program's main logic - run_jobs(jobs, start_block, end_block) + return job.run(start_block=start_block, end_block=end_block, processor=processor) except HemeraBaseException as e: - logger.error(f"An rpc response exception occurred while syncing block data. error: {e}") + logger.error(f"An rpc response exception occurred while running {job.__name__}. error: {e}") if e.crashable: logger.error("Mission will crash immediately.") raise e @@ -161,21 +181,20 @@ def do_stream(jobs, start_block, end_block, max_retries): raise e except Exception as e: - logger.error(f"An unknown exception occurred while syncing block data. error: {e}") + logger.error(f"An unknown exception occurred while running {job.__name__}. error: {e}") raise e - logger.info(f"The number of retry is reached limit {self.max_retries}. Program will exit.") + logger.info(f"The number of retry is reached limit {max_retries}. Program will exit.") raise FastShutdownError( - f"The job with parameters start_block:{start_block}, end_block:{end_block}" + f"The {job} with parameters start_block:{start_block}, end_block:{end_block} " f"can't be automatically resumed after reached out limit of retries. Program will exit." ) -def run_jobs(jobs, start_block, end_block): - try: - for job in jobs: - job.run(start_block=start_block, end_block=end_block) - except Exception as e: - raise e - finally: - pass +def handle_success(processor: str, start_block: int, end_block: int): + # self.sync_recorder.set_last_synced_block(target_block) + pass + + +def handle_failure(processor: str, start_block: int, end_block: int): + pass diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index b4375cebd..805a085af 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -15,7 +15,8 @@ class ConcurrentJobExecutor: - def __init__(self, max_processors=1, call_back=None, error_callback=None): + def __init__(self, buffer_service, max_processors=1, call_back=None, error_callback=None): + self.buffer_service = buffer_service self.pool = WorkerPool(n_jobs=max_processors, use_dill=True, start_method="spawn") self.call_back = call_back self.error_callback = error_callback @@ -38,7 +39,7 @@ def __init__(self, max_processors=1, call_back=None, error_callback=None): self.logger = logging.getLogger(__name__) def _allocate_processor(self): - if len(self.available_processors) > 1: + if len(self.available_processors) > 0: processor = self.available_processors.popleft() self.processors[processor] = False return processor @@ -59,7 +60,7 @@ def _process_tasks(self): try: processor = self._allocate_processor() - + task["kwargs"]["processor"] = processor self.pool.apply_async( task["func"], task["args"], @@ -67,7 +68,7 @@ def _process_tasks(self): callback=lambda result, p=processor, param=task["kwargs"]: self._handle_task_completion( result, p, param ), - error_callback=lambda error, p=processor, param=task["kwargs"]: self._handle_task_completion( + error_callback=lambda error, p=processor, param=task["kwargs"]: self._handle_task_failed( error, p, param ), ) @@ -79,6 +80,8 @@ def _process_tasks(self): self.logger.error(f"Unexpected error in task processor: {e}") def _handle_task_completion(self, result, processor, param): + self.buffer_service.write(result) + self.logger.info(f"Task with parameter:{param} completed successfully by processor: {processor}") self._release_processor(processor) diff --git a/indexer/exporters/item_exporter.py b/indexer/exporters/item_exporter.py index f5cc515a1..f648bcb49 100644 --- a/indexer/exporters/item_exporter.py +++ b/indexer/exporters/item_exporter.py @@ -17,7 +17,7 @@ def create_item_exporter(output, config): if item_exporter_type == ItemExporterType.CONSOLE: item_exporter = ConsoleItemExporter() elif item_exporter_type == ItemExporterType.POSTGRES: - item_exporter = PostgresItemExporter(postgres_url=config["db_service"].jdbc_url) + item_exporter = PostgresItemExporter(config["db_service"]) elif item_exporter_type == ItemExporterType.JSONFILE: item_exporter = JSONFileItemExporter(output, config) elif item_exporter_type == ItemExporterType.CSVFILE: diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index c4a1f2d72..83de250c5 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -1,6 +1,8 @@ import logging +from datetime import datetime from typing import Type +from dateutil.tz import tzlocal from psycopg2.extras import execute_values from tqdm import tqdm @@ -8,73 +10,103 @@ from common.models import HemeraModel from common.services.postgresql_service import PostgreSQLService from indexer.exporters.base_exporter import BaseExporter, group_by_item_type +from indexer.utils.progress_logger import TqdmExtraFormat logger = logging.getLogger(__name__) COMMIT_BATCH_SIZE = 1000 -from multiprocessing import current_process class PostgresItemExporter(BaseExporter): - def __init__(self, **service): - self.postgres_url = service["postgres_url"] - self.db_version = service.get("db_version") - self.init_schema = service.get("init_schema") + def __init__(self, service_url): + self.service = PostgreSQLService(service_url) + self.main_progress = None + self.sub_progress = None # self.service = service def export_items(self, items, **kwargs): + start_time = datetime.now(tzlocal()) + # Initialize main progress bar if kwargs.get("job_name"): job_name = kwargs.get("job_name") desc = f"{job_name}(PG)" else: desc = "Exporting items" - service = PostgreSQLService(self.postgres_url, db_version=self.db_version, init_schema=self.init_schema) - print(current_process().name, service) - with service.cursor_scope() as cur: - - try: - insert_stmt = "" - items_grouped_by_type = group_by_item_type(items) - tables = [] - - # Process each item type - for item_type in items_grouped_by_type.keys(): - item_group = items_grouped_by_type.get(item_type) - - if item_group: - pg_config = domain_model_mapping[item_type] - table = pg_config["table"] - do_update = pg_config["conflict_do_update"] - update_strategy = pg_config["update_strategy"] - converter = pg_config["converter"] - - # Initialize sub-progress bar for current table - data = [] - # Process items with progress tracking - for item in item_group: - converted_item = converter(table, item, do_update) - data.append(converted_item) - - if data: - columns = list(data[0].keys()) - values = [tuple(d.values()) for d in data] - - insert_stmt = sql_insert_statement(table, do_update, columns, where_clause=update_strategy) - - # Execute in batches with progress tracking - for i in range(0, len(values), COMMIT_BATCH_SIZE): - batch = values[i : i + COMMIT_BATCH_SIZE] - execute_values(cur, insert_stmt, batch) - cur.connection.commit() - - tables.append(table.__tablename__) - - except Exception as e: - logger.error(f"Error exporting items: {e}") - logger.error(f"{insert_stmt}") - pass - # raise e + self.main_progress = TqdmExtraFormat( + total=len(items), + desc=desc.ljust(35), + unit="items", + position=0, + ncols=90, + bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}] Est: {total_time}", + ) + + conn = self.service.get_connection() + try: + insert_stmt = "" + items_grouped_by_type = group_by_item_type(items) + tables = [] + + # Process each item type + for item_type in items_grouped_by_type.keys(): + item_group = items_grouped_by_type.get(item_type) + + if item_group: + pg_config = domain_model_mapping[item_type] + table = pg_config["table"] + do_update = pg_config["conflict_do_update"] + update_strategy = pg_config["update_strategy"] + converter = pg_config["converter"] + + # Initialize sub-progress bar for current table + self.sub_progress = TqdmExtraFormat( + total=len(item_group), + desc=f"Processing {table.__tablename__}".ljust(35), + unit="items", + position=1, + leave=False, + ncols=90, + bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", + ) + + cur = conn.cursor() + data = [] + + # Process items with progress tracking + for item in item_group: + converted_item = converter(table, item, do_update) + data.append(converted_item) + self.sub_progress.update(1) + self.main_progress.update(1) + + if data: + columns = list(data[0].keys()) + values = [tuple(d.values()) for d in data] + + insert_stmt = sql_insert_statement(table, do_update, columns, where_clause=update_strategy) + + # Execute in batches with progress tracking + for i in range(0, len(values), COMMIT_BATCH_SIZE): + batch = values[i : i + COMMIT_BATCH_SIZE] + execute_values(cur, insert_stmt, batch) + conn.commit() + + tables.append(table.__tablename__) + self.sub_progress.close() + + except Exception as e: + logger.error(f"Error exporting items: {e}") + logger.error(f"{insert_stmt}") + raise e + finally: + self.service.release_connection(conn) + if self.main_progress: + self.main_progress.close() + if self.sub_progress: + self.sub_progress.close() + + end_time = datetime.now(tzlocal()) def sql_insert_statement(model: Type[HemeraModel], do_update: bool, columns, where_clause=None): diff --git a/indexer/jobs/base_job.py b/indexer/jobs/base_job.py index b044a6be0..ba0af8c15 100644 --- a/indexer/jobs/base_job.py +++ b/indexer/jobs/base_job.py @@ -52,6 +52,7 @@ class BaseJob(metaclass=BaseJobMeta): dependency_types = [] output_types = [] able_to_reorg = False + able_to_multi_process = False @classmethod def discover_jobs(cls): @@ -67,8 +68,8 @@ def init_token_cache(cls, _token=None): def __init__(self, **kwargs): + self._multiprocess = kwargs["multiprocess"] self._required_output_types = kwargs["required_output_types"] - self._item_exporters = kwargs["item_exporters"] self._web3_provider_uri = kwargs["web3_provider_uri"] self._web3_debug_provider_uri = kwargs["web3_debug_provider_uri"] # self._batch_web3_provider = kwargs["batch_web3_provider"] @@ -86,6 +87,25 @@ def __init__(self, **kwargs): job_name_snake = to_snake_case(self.job_name) self.user_defined_config = kwargs["config"][job_name_snake] if kwargs["config"].get(job_name_snake) else {} + if not self.able_to_multi_process and self._multiprocess: + raise FastShutdownError( + f"Job: {self.__class__.__name__} can not run in multiprocessing mode, " + f"please check runtime parameter or modify job code." + ) + + if not self._multiprocess: + self.logger_name = self.__class__.__name__ + self.logger = logging.getLogger(self.logger_name) + self._batch_web3_provider = ThreadLocalProxy( + lambda: get_provider_from_uri(self._web3_provider_uri, batch=True) + ) + self._web3 = Web3(Web3.HTTPProvider(self._web3_provider_uri)) + self._chain_id = ( + (self._web3.eth.chain_id if self._batch_web3_provider else None) + if self._chain_id is None + else self._chain_id + ) + def run(self, **kwargs): try: self._start(**kwargs) @@ -100,21 +120,27 @@ def run(self, **kwargs): self._collect(**kwargs) self._process(**kwargs) - if not self._reorg: - self._export() - finally: self._end() + return {dataclass.type(): self._data_buff[dataclass.type()] for dataclass in self.output_types} + def _start(self, **kwargs): - self.logger = logging.getLogger(self.__class__.__name__) - self._batch_web3_provider = ThreadLocalProxy(lambda: get_provider_from_uri(self._web3_provider_uri, batch=True)) - self._web3 = Web3(Web3.HTTPProvider(self._web3_provider_uri)) - self._chain_id = ( - (self._web3.eth.chain_id if self._batch_web3_provider else None) - if self._chain_id is None - else self._chain_id - ) + if self._multiprocess: + self.logger_name = f"{self.__class__.__name__}-{kwargs['processor']}" + self.logger = logging.getLogger(self.logger_name) + self._batch_web3_provider = ThreadLocalProxy( + lambda: get_provider_from_uri(self._web3_provider_uri, batch=True) + ) + self._web3 = Web3(Web3.HTTPProvider(self._web3_provider_uri)) + self._chain_id = ( + (self._web3.eth.chain_id if self._batch_web3_provider else None) + if self._chain_id is None + else self._chain_id + ) + + for dataclass in self.output_types: + self._data_buff[dataclass.type()].clear() def _pre_reorg(self, **kwargs): if self._service is None: @@ -180,18 +206,6 @@ def _extract_from_buff(self, keys=None): return items - def _export(self): - items = [] - - for output_type in self.output_types: - if output_type in self._required_output_types: - items.extend(self._extract_from_buff([output_type.type()])) - - for item_exporter in self._item_exporters: - item_exporter.open() - item_exporter.export_items(items, job_name=self.job_name) - item_exporter.close() - def get_buff(self): return self._data_buff diff --git a/indexer/jobs/export_blocks_job.py b/indexer/jobs/export_blocks_job.py index 7a45d2f0b..d018a3528 100644 --- a/indexer/jobs/export_blocks_job.py +++ b/indexer/jobs/export_blocks_job.py @@ -26,6 +26,7 @@ class ExportBlocksJob(BaseExportJob): dependency_types = [] output_types = [Block, BlockTsMapper] able_to_reorg = True + able_to_multi_process = True def __init__(self, **kwargs): super().__init__(**kwargs) @@ -36,14 +37,22 @@ def __init__(self, **kwargs): self._specification = AlwaysFalseSpecification() if self._is_filter else AlwaysTrueSpecification() self._reorg_jobs = kwargs.get("reorg_jobs", []) + if not self._multiprocess: + self._batch_work_executor = BatchWorkExecutor( + self._batch_size, + self._max_workers, + job_name=self.logger_name, + ) + def _start(self, **kwargs): super()._start(**kwargs) - self._batch_work_executor = BatchWorkExecutor( - self._batch_size, - self._max_workers, - job_name=self.__class__.__name__, - ) + if self._multiprocess: + self._batch_work_executor = BatchWorkExecutor( + self._batch_size, + self._max_workers, + job_name=self.logger_name, + ) def _pre_reorg(self, **kwargs): if self._service is None: @@ -96,13 +105,16 @@ def _collect_batch(self, block_number_batch): for block_rpc_dict in results: block_entity = Block.from_rpc(block_rpc_dict) self._collect_item(Block.type(), block_entity) + + satisfied_transactions = [] for transaction_entity in block_entity.transactions: if self._specification.is_satisfied_by(transaction_entity): - self._collect_item(Transaction.type(), transaction_entity) + satisfied_transactions.append(transaction_entity) + + block_entity.transactions = satisfied_transactions def _process(self, **kwargs): self._data_buff[Block.type()].sort(key=lambda x: x.number) - self._data_buff[Transaction.type()].sort(key=lambda x: (x.block_number, x.transaction_index)) # block_list = list(self._shared_data_buff[Block.type()]) # block_list.sort(key=lambda x: x.number) diff --git a/indexer/jobs/export_transactions_and_logs_job.py b/indexer/jobs/export_transactions_and_logs_job.py index 43dfff870..b38750936 100644 --- a/indexer/jobs/export_transactions_and_logs_job.py +++ b/indexer/jobs/export_transactions_and_logs_job.py @@ -1,4 +1,3 @@ -import logging from typing import List import orjson @@ -18,22 +17,28 @@ class ExportTransactionsAndLogsJob(BaseExportJob): dependency_types = [Block] output_types = [Transaction, Log] able_to_reorg = True + able_to_multi_process = True def __init__(self, **kwargs): super().__init__(**kwargs) + if not self._multiprocess: + self._batch_work_executor = BatchWorkExecutor( + self._batch_size, self._max_workers, job_name=self.logger_name + ) def _start(self, **kwargs): super()._start(**kwargs) - self._batch_work_executor = BatchWorkExecutor( - self._batch_size, - self._max_workers, - job_name=self.__class__.__name__, - ) + if self._multiprocess: + self._batch_work_executor = BatchWorkExecutor( + self._batch_size, self._max_workers, job_name=self.logger_name + ) def _collect(self, **kwargs): - transactions: List[Transaction] = self._data_buff.get(Transaction.type(), []) + transactions: List[Transaction] = [ + transaction for block in self._data_buff.get(Block.type(), []) for transaction in block.transactions + ] self._batch_work_executor.execute(transactions, self._collect_batch, total_items=len(transactions)) self._batch_work_executor.wait() @@ -60,6 +65,7 @@ def _collect_batch(self, transactions: List[Transaction]): self._collect_item(Log.type(), log) def _process(self, **kwargs): + self._data_buff[Transaction.type()].sort(key=lambda x: (x.block_number, x.transaction_index)) self._data_buff[Log.type()].sort(key=lambda x: (x.block_number, x.log_index)) diff --git a/indexer/utils/BufferService.py b/indexer/utils/BufferService.py new file mode 100644 index 000000000..91a263891 --- /dev/null +++ b/indexer/utils/BufferService.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Time : 2024/11/19 下午6:07 +Author : xuzh +Project : hemera_indexer +""" +import logging +import signal +import threading +import time +from collections import defaultdict +from concurrent.futures import Future, ThreadPoolExecutor +from threading import Event, Thread +from typing import Dict, Set + + +class BufferService: + + def __init__( + self, + item_exporters, + required_output_types, + block_size: int = 1000, + linger_ms: int = 50000000, + max_buffer_size: int = 10000, + export_workers: int = 5, + ): + self.block_size = block_size + self.linger_ms = linger_ms + self.max_buffer_size = max_buffer_size + + self.item_exporters = item_exporters + self.required_output_types = required_output_types + + self.buffer = defaultdict(list) + self.buffer_lock = threading.Lock() + self.pending_futures: Set[Future] = set() + self.futures_lock = threading.Lock() + + self._shutdown_event = Event() + self._last_flush_time = time.time() + + self.submit_export_pool = ThreadPoolExecutor(max_workers=export_workers) + self._flush_thread = Thread(target=self._flush_loop) + self._flush_thread.daemon = True + self._flush_thread.start() + + self._setup_signal_handlers() + + self.logger = logging.getLogger(__name__) + + def _setup_signal_handlers(self): + signal.signal(signal.SIGTERM, self._handle_shutdown) + signal.signal(signal.SIGINT, self._handle_shutdown) + + def _handle_shutdown(self, signum, frame): + self.logger.info("Received shutdown signal, flushing buffer...") + self.flush_buffer() + self._shutdown_event.set() + + def _handle_export_completion(self, future: Future): + with self.futures_lock: + self.pending_futures.discard(future) + + try: + future.result() + except Exception as e: + raise e + + def write(self, records: Dict): + with self.buffer_lock: + for dataclass in records.keys(): + if dataclass in self.required_output_types: + self.buffer[dataclass].extend(records[dataclass]) + + if len(self.buffer["block"]) >= self.max_buffer_size: + self.flush_buffer() + + def _should_flush(self) -> bool: + current_time = time.time() + time_since_last_flush = (current_time - self._last_flush_time) * 1000 + + return len(self.buffer["block"]) >= self.block_size or time_since_last_flush >= self.linger_ms + + def export_items(self, items): + for item_exporter in self.item_exporters: + item_exporter.open() + item_exporter.export_items(items) + item_exporter.close() + + def flush_buffer(self): + + with self.buffer_lock: + if len(self.buffer["block"]) == 0: + return + + flush_items = [] + for key in self.buffer: + flush_items.extend(self.buffer[key]) + + self.buffer.clear() + + future = self.submit_export_pool.submit(self.export_items, flush_items) + future.add_done_callback(self._handle_export_completion) + + with self.futures_lock: + self.pending_futures.add(future) + + self._last_flush_time = time.time() + + def _flush_loop(self): + while not self._shutdown_event.is_set(): + try: + if self._should_flush(): + self.flush_buffer() + time.sleep(0.1) + except Exception as e: + self.logger.error(f"Error in flush loop: {e}") + + def shutdown(self): + if self._shutdown_event.is_set(): + return + + self.logger.info("Shutting down buffer service...") + self._handle_shutdown(None, None) + self._flush_thread.join() + self.submit_export_pool.shutdown(wait=True) + self.logger.info("Buffer service shut down completed") diff --git a/indexer/utils/sync_recorder.py b/indexer/utils/sync_recorder.py index 57189ae4c..9a555d0a8 100644 --- a/indexer/utils/sync_recorder.py +++ b/indexer/utils/sync_recorder.py @@ -5,6 +5,7 @@ from sqlalchemy.dialects.postgresql import insert from common.models.sync_record import SyncRecord +from common.services.postgresql_service import PostgreSQLService from common.utils.file_utils import smart_open, write_to_file @@ -34,9 +35,9 @@ def get_last_synced_block(self): class PGSyncRecorder(BaseRecorder): - def __init__(self, key, service): + def __init__(self, key, service_url): self.key = key - self.service = service + self.service = PostgreSQLService(service_url) def set_last_synced_block(self, last_synced_block): session = self.service.get_service_session() From 91f9b642f2896431e33569c2117ec5c744c32121 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Thu, 21 Nov 2024 17:40:02 +0800 Subject: [PATCH 33/52] modify export_tokens_and_transfers_job for multiprocessing --- .../jobs/export_tokens_and_transfers_job.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/indexer/jobs/export_tokens_and_transfers_job.py b/indexer/jobs/export_tokens_and_transfers_job.py index 53641f8d6..0b2100e38 100644 --- a/indexer/jobs/export_tokens_and_transfers_job.py +++ b/indexer/jobs/export_tokens_and_transfers_job.py @@ -62,15 +62,17 @@ class ExportTokensAndTransfersJob(FilterTransactionDataJob): dependency_types = [Log] output_types = output_transfer_types + output_token_types able_to_reorg = True + able_to_multi_process = True def __init__(self, **kwargs): super().__init__(**kwargs) - self._batch_work_executor = BatchWorkExecutor( - kwargs["batch_size"], - kwargs["max_workers"], - job_name=self.__class__.__name__, - ) + if not self._multiprocess: + self._batch_work_executor = BatchWorkExecutor( + kwargs["batch_size"], + kwargs["max_workers"], + job_name=self.logger_name, + ) self._is_batch = kwargs["batch_size"] > 1 self.weth_address = self.user_defined_config.get("weth_address") @@ -98,6 +100,15 @@ def get_filter(self): ) return TransactionFilterByLogs(filters) + def _start(self, **kwargs): + super()._start(**kwargs) + if self._multiprocess: + self._batch_work_executor = BatchWorkExecutor( + kwargs["batch_size"], + kwargs["max_workers"], + job_name=self.logger_name, + ) + def _collect(self, **kwargs): filtered_logs = [ From ce027e0bb7f6f202c00a35e3f57707af0d1c07a5 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Thu, 21 Nov 2024 17:44:01 +0800 Subject: [PATCH 34/52] modify error code --- common/utils/exception_control.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/common/utils/exception_control.py b/common/utils/exception_control.py index e4cddbdf0..2287173d8 100644 --- a/common/utils/exception_control.py +++ b/common/utils/exception_control.py @@ -99,21 +99,19 @@ def decode_response_error(error): if "InvalidJump" in message: return None - if code == -32000: - if ( - message == "execution reverted" - or message == "out of gas" - or message == "gas uint64 overflow" - or message == "invalid jump destination" - or message.lower().find("stack underflow") != -1 - ): - return None - elif message.find("required historical state unavailable") != -1: - raise HistoryUnavailableError(message) - else: - # print(error) - logging.error(error) - raise RPCNotReachable(message) + if ( + message == "execution reverted" + or message == "out of gas" + or message == "gas uint64 overflow" + or message == "invalid jump destination" + or message.lower().find("stack underflow") != -1 + ): + return None + elif message.find("required historical state unavailable") != -1: + raise HistoryUnavailableError(message) + elif code == -32000: + logging.error(error) + raise RPCNotReachable(message) elif code == -32700 or code == -32600 or code == -32602: raise FastShutdownError(message) elif (-32000 > code >= -32099) or code == -32603: From 0eaccee563853626200876717940aa665ebb5b2e Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Thu, 21 Nov 2024 17:54:01 +0800 Subject: [PATCH 35/52] bug fix --- indexer/controller/stream_controller.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 15f2bdf4e..d1e23a172 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -121,8 +121,8 @@ def action( ) self.buffer_service.write(export_data) - logger.info("Writing last synced block {}".format(target_block)) - self.sync_recorder.set_last_synced_block(target_block) + # logger.info("Writing last synced block {}".format(target_block)) + # self.sync_recorder.set_last_synced_block(target_block) last_synced_block = target_block if synced_blocks <= 0: @@ -169,7 +169,7 @@ def job_with_retires(job, start_block, end_block, max_retries, processor=None): return job.run(start_block=start_block, end_block=end_block, processor=processor) except HemeraBaseException as e: - logger.error(f"An rpc response exception occurred while running {job.__name__}. error: {e}") + logger.error(f"An rpc response exception occurred while running {job.__class__.__name__}. error: {e}") if e.crashable: logger.error("Mission will crash immediately.") raise e @@ -181,7 +181,7 @@ def job_with_retires(job, start_block, end_block, max_retries, processor=None): raise e except Exception as e: - logger.error(f"An unknown exception occurred while running {job.__name__}. error: {e}") + logger.error(f"An unknown exception occurred while running {job.__class__.__name__}. error: {e}") raise e logger.info(f"The number of retry is reached limit {max_retries}. Program will exit.") From 751698398af4e37524331a2e795b036cc1b28f84 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Thu, 21 Nov 2024 18:29:02 +0800 Subject: [PATCH 36/52] update buffer default parameter --- indexer/utils/BufferService.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/indexer/utils/BufferService.py b/indexer/utils/BufferService.py index 91a263891..b347bb940 100644 --- a/indexer/utils/BufferService.py +++ b/indexer/utils/BufferService.py @@ -21,8 +21,8 @@ def __init__( self, item_exporters, required_output_types, - block_size: int = 1000, - linger_ms: int = 50000000, + block_size: int = 100, + linger_ms: int = 5000, max_buffer_size: int = 10000, export_workers: int = 5, ): From 62a4119521378ae9f3b154bb5a34bdf7c2ecce20 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Thu, 21 Nov 2024 19:09:40 +0800 Subject: [PATCH 37/52] fix transaction collecting --- indexer/jobs/export_transactions_and_logs_job.py | 1 + 1 file changed, 1 insertion(+) diff --git a/indexer/jobs/export_transactions_and_logs_job.py b/indexer/jobs/export_transactions_and_logs_job.py index b38750936..26d9e69ad 100644 --- a/indexer/jobs/export_transactions_and_logs_job.py +++ b/indexer/jobs/export_transactions_and_logs_job.py @@ -61,6 +61,7 @@ def _collect_batch(self, transactions: List[Transaction]): ) transaction.fill_with_receipt(receipt_entity) + self._collect_item(Transaction.type(), transaction) for log in transaction.receipt.logs: self._collect_item(Log.type(), log) From f02efba6fee207031060932b4dd753d6b10762f3 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Thu, 21 Nov 2024 19:50:00 +0800 Subject: [PATCH 38/52] shutdown gracefully --- cli/stream.py | 2 ++ indexer/controller/stream_controller.py | 5 +++-- indexer/executors/concurrent_job_executor.py | 9 ++++++--- indexer/utils/BufferService.py | 7 ++++--- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/cli/stream.py b/cli/stream.py index 31edb5de5..2282454fc 100644 --- a/cli/stream.py +++ b/cli/stream.py @@ -455,3 +455,5 @@ def stream( period_seconds=period_seconds, pid_file=pid_file, ) + + controller.shutdown() diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index d1e23a172..07f089279 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -134,8 +134,9 @@ def action( logger.info("Deleting pid file {}".format(pid_file)) delete_file(pid_file) - def _shutdown(self): - pass + def shutdown(self): + self.job_executor.shutdown() + self.buffer_service.shutdown() def split_blocks(self, start_block, end_block, step): blocks = [] diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index 805a085af..4ebdbb8bc 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -33,7 +33,7 @@ def __init__(self, buffer_service, max_processors=1, call_back=None, error_callb self.task_queue = Queue() self.task_processor = Thread(target=self._process_tasks) - self.task_processor.daemon = True + self.task_processor.daemon = False self.task_processor.start() self.logger = logging.getLogger(__name__) @@ -82,7 +82,7 @@ def _process_tasks(self): def _handle_task_completion(self, result, processor, param): self.buffer_service.write(result) - self.logger.info(f"Task with parameter:{param} completed successfully by processor: {processor}") + self.logger.debug(f"Task with parameter:{param} completed successfully by processor: {processor}") self._release_processor(processor) if self.call_back: @@ -113,8 +113,11 @@ def submit(self, func, *args, **kwargs): self.processor_semaphore.release() raise e - def __exit__(self, exc_type, exc_val, exc_tb): + def shutdown(self): self.shutdown_event.set() self.task_processor.join() self.pool.terminate() self.pool.join() + + def __exit__(self, exc_type, exc_val, exc_tb): + self.shutdown() diff --git a/indexer/utils/BufferService.py b/indexer/utils/BufferService.py index b347bb940..bae055753 100644 --- a/indexer/utils/BufferService.py +++ b/indexer/utils/BufferService.py @@ -43,7 +43,7 @@ def __init__( self.submit_export_pool = ThreadPoolExecutor(max_workers=export_workers) self._flush_thread = Thread(target=self._flush_loop) - self._flush_thread.daemon = True + self._flush_thread.daemon = False self._flush_thread.start() self._setup_signal_handlers() @@ -71,7 +71,7 @@ def _handle_export_completion(self, future: Future): def write(self, records: Dict): with self.buffer_lock: for dataclass in records.keys(): - if dataclass in self.required_output_types: + if dataclass in self.required_output_types or dataclass == "block": self.buffer[dataclass].extend(records[dataclass]) if len(self.buffer["block"]) >= self.max_buffer_size: @@ -97,7 +97,8 @@ def flush_buffer(self): flush_items = [] for key in self.buffer: - flush_items.extend(self.buffer[key]) + if key in self.required_output_types: + flush_items.extend(self.buffer[key]) self.buffer.clear() From e0ad0fa6d6198edf7a8853d173e5e9b3119ed10c Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Thu, 21 Nov 2024 21:33:02 +0800 Subject: [PATCH 39/52] update async sync-record submit and failure job record --- common/models/failures_records.py | 16 ++++++++ common/utils/exception_control.py | 14 +++++++ indexer/controller/stream_controller.py | 39 ++++++++++++-------- indexer/executors/concurrent_job_executor.py | 15 ++++++-- indexer/utils/BufferService.py | 29 ++++++++++++--- indexer/utils/sync_recorder.py | 34 +++++++++++++++++ 6 files changed, 121 insertions(+), 26 deletions(-) create mode 100644 common/models/failures_records.py diff --git a/common/models/failures_records.py b/common/models/failures_records.py new file mode 100644 index 000000000..2c1eb654e --- /dev/null +++ b/common/models/failures_records.py @@ -0,0 +1,16 @@ +from sqlalchemy import Column +from sqlalchemy.dialects.postgresql import BIGINT, JSON, TIMESTAMP, VARCHAR + +from common.models import HemeraModel + + +class FailuresRecords(HemeraModel): + __tablename__ = "failures_records" + record_id = Column(BIGINT, primary_key=True, autoincrement=True) + mission_sign = Column(VARCHAR) + output_types = Column(VARCHAR) + start_block_number = Column(BIGINT) + end_block_number = Column(BIGINT) + exception_stage = Column(VARCHAR) + exception = Column(JSON) + crash_time = Column(TIMESTAMP) diff --git a/common/utils/exception_control.py b/common/utils/exception_control.py index 2287173d8..d862deec9 100644 --- a/common/utils/exception_control.py +++ b/common/utils/exception_control.py @@ -1,4 +1,6 @@ import logging +import sys +import traceback from werkzeug.exceptions import HTTPException @@ -118,3 +120,15 @@ def decode_response_error(error): raise RetriableError(message) else: return None + + +def get_exception_details(e: Exception) -> dict: + exc_type, exc_value, exc_traceback = sys.exc_info() + + return { + "type": exc_type.__name__, + "module": exc_type.__module__, + "message": str(exc_value), + "traceback": traceback.format_exc(), + "line_number": exc_traceback.tb_lineno, + } diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 07f089279..6a9737684 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -43,10 +43,17 @@ def __init__( self.web3 = build_web3(batch_web3_provider) self.required_output_types = [output.type() for output in required_output_types] self.buffer_service = BufferService( - item_exporters, self.required_output_types, export_workers=max_processors, block_size=100 + item_exporters=item_exporters, + required_output_types=self.required_output_types, + export_workers=max_processors, + block_size=100, + success_callback=self.handle_success, + exception_callback=self.handle_failure, ) self.job_executor = ( - ConcurrentJobExecutor(buffer_service=self.buffer_service, max_processors=max_processors) + ConcurrentJobExecutor( + buffer_service=self.buffer_service, max_processors=max_processors, error_callback=self.handle_failure + ) if max_processors > 1 else None ) @@ -57,6 +64,15 @@ def __init__( self.retry_from_record = retry_from_record self.delay = delay + def handle_success(self, last_block_number): + self.sync_recorder.set_last_synced_block(last_block_number) + logger.info("Writing last synced block {}".format(last_block_number)) + + def handle_failure( + self, output_types: List[str], start_block: int, end_block: int, exception_stage: str, exception: str + ): + self.sync_recorder.set_failures_record(output_types, start_block, end_block, exception_stage, exception) + def action( self, start_block=None, @@ -68,7 +84,7 @@ def action( ): try: if pid_file is not None: - logger.info("Creating pid file {}".format(pid_file)) + logger.debug("Creating pid file {}".format(pid_file)) write_to_file(pid_file, str(os.getpid())) last_synced_block = self.sync_recorder.get_last_synced_block() @@ -126,12 +142,12 @@ def action( last_synced_block = target_block if synced_blocks <= 0: - logger.info("Nothing to sync. Sleeping for {} seconds...".format(period_seconds)) + logger.debug("Nothing to sync. Sleeping for {} seconds...".format(period_seconds)) time.sleep(period_seconds) finally: if pid_file is not None: - logger.info("Deleting pid file {}".format(pid_file)) + logger.debug("Deleting pid file {}".format(pid_file)) delete_file(pid_file) def shutdown(self): @@ -176,7 +192,7 @@ def job_with_retires(job, start_block, end_block, max_retries, processor=None): raise e if e.retriable: - logger.info(f"No: {retry} retry is about to start.") + logger.debug(f"No: {retry} retry is about to start.") else: logger.error("Mission will not retry, and exit immediately.") raise e @@ -185,17 +201,8 @@ def job_with_retires(job, start_block, end_block, max_retries, processor=None): logger.error(f"An unknown exception occurred while running {job.__class__.__name__}. error: {e}") raise e - logger.info(f"The number of retry is reached limit {max_retries}. Program will exit.") + logger.debug(f"The number of retry is reached limit {max_retries}. Program will exit.") raise FastShutdownError( f"The {job} with parameters start_block:{start_block}, end_block:{end_block} " f"can't be automatically resumed after reached out limit of retries. Program will exit." ) - - -def handle_success(processor: str, start_block: int, end_block: int): - # self.sync_recorder.set_last_synced_block(target_block) - pass - - -def handle_failure(processor: str, start_block: int, end_block: int): - pass diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index 4ebdbb8bc..65bc461ca 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -12,6 +12,8 @@ from mpire import WorkerPool +from common.utils.exception_control import get_exception_details + class ConcurrentJobExecutor: @@ -96,12 +98,17 @@ def _handle_task_failed(self, error, processor, param): if self.error_callback: try: param["processor"] = processor - self.error_callback(**param) + error_details = get_exception_details(error) + self.error_callback( + output_types=self.buffer_service.required_output_types, + start_block=param["start_block"], + end_block=param["end_block"], + exception_stage="Job Running", + exception=error_details, + ) except Exception as e: self.logger.error(f"An exception occurred while execute call back function. error: {e}") - raise error - def submit(self, func, *args, **kwargs): self.processor_semaphore.acquire() @@ -116,8 +123,8 @@ def submit(self, func, *args, **kwargs): def shutdown(self): self.shutdown_event.set() self.task_processor.join() + self.pool.join(keep_alive=True) self.pool.terminate() - self.pool.join() def __exit__(self, exc_type, exc_val, exc_tb): self.shutdown() diff --git a/indexer/utils/BufferService.py b/indexer/utils/BufferService.py index bae055753..ffcfc5caa 100644 --- a/indexer/utils/BufferService.py +++ b/indexer/utils/BufferService.py @@ -12,7 +12,9 @@ from collections import defaultdict from concurrent.futures import Future, ThreadPoolExecutor from threading import Event, Thread -from typing import Dict, Set +from typing import Callable, Dict + +from common.utils.exception_control import get_exception_details class BufferService: @@ -25,6 +27,8 @@ def __init__( linger_ms: int = 5000, max_buffer_size: int = 10000, export_workers: int = 5, + success_callback: Callable = None, + exception_callback: Callable = None, ): self.block_size = block_size self.linger_ms = linger_ms @@ -35,7 +39,7 @@ def __init__( self.buffer = defaultdict(list) self.buffer_lock = threading.Lock() - self.pending_futures: Set[Future] = set() + self.pending_futures: dict[Future, (int, int)] = dict() self.futures_lock = threading.Lock() self._shutdown_event = Event() @@ -48,6 +52,9 @@ def __init__( self._setup_signal_handlers() + self.success_callback = success_callback + self.exception_callback = exception_callback + self.logger = logging.getLogger(__name__) def _setup_signal_handlers(self): @@ -61,12 +68,21 @@ def _handle_shutdown(self, signum, frame): def _handle_export_completion(self, future: Future): with self.futures_lock: - self.pending_futures.discard(future) + start_block, end_block = self.pending_futures[future] + self.pending_futures.pop(future) try: future.result() + + try: + self.success_callback(end_block) + except Exception as e: + self.logger.error(f"Writing last synced block number {end_block} error.") + except Exception as e: - raise e + exception_details = get_exception_details(e) + self.exception_callback(self.required_output_types, start_block, end_block, "export", exception_details) + self.logger.error(f"Exporting items error: {exception_details}") def write(self, records: Dict): with self.buffer_lock: @@ -94,7 +110,8 @@ def flush_buffer(self): with self.buffer_lock: if len(self.buffer["block"]) == 0: return - + self.buffer["block"].sort(key=lambda x: x.number) + block_range = (self.buffer["block"][0].number, self.buffer["block"][-1].number) flush_items = [] for key in self.buffer: if key in self.required_output_types: @@ -106,7 +123,7 @@ def flush_buffer(self): future.add_done_callback(self._handle_export_completion) with self.futures_lock: - self.pending_futures.add(future) + self.pending_futures[future] = block_range self._last_flush_time = time.time() diff --git a/indexer/utils/sync_recorder.py b/indexer/utils/sync_recorder.py index 9a555d0a8..6a3a3c0f5 100644 --- a/indexer/utils/sync_recorder.py +++ b/indexer/utils/sync_recorder.py @@ -4,6 +4,7 @@ from sqlalchemy import func from sqlalchemy.dialects.postgresql import insert +from common.models.failures_records import FailuresRecords from common.models.sync_record import SyncRecord from common.services.postgresql_service import PostgreSQLService from common.utils.file_utils import smart_open, write_to_file @@ -16,6 +17,9 @@ def set_last_synced_block(self, last_synced_block): def get_last_synced_block(self): pass + def set_failures_record(self, output_types, start_block, end_block, exception_stage, exception): + pass + class FileSyncRecorder(BaseRecorder): @@ -32,6 +36,9 @@ def get_last_synced_block(self): with smart_open(self.file_name, "r") as last_synced_block_file: return int(last_synced_block_file.read()) + def set_failures_record(self, output_types, start_block, end_block, exception_stage, exception): + pass + class PGSyncRecorder(BaseRecorder): @@ -58,6 +65,7 @@ def set_last_synced_block(self, last_synced_block): "last_block_number": last_synced_block, "update_time": update_time, }, + where=(SyncRecord.last_block_number <= last_synced_block), ) ) session.execute(statement) @@ -79,6 +87,32 @@ def get_last_synced_block(self): return result return 0 + def set_failures_record(self, output_types, start_block, end_block, exception_stage, exception): + session = self.service.get_service_session() + try: + crash_time = func.to_timestamp(int(datetime.now(timezone.utc).timestamp())) + + statement = insert(FailuresRecords).values( + { + "mission_sign": self.key, + "output_types": ",".join(output_types), + "start_block_number": start_block, + "end_block_number": end_block, + "exception_stage": exception_stage, + "exception": exception, + "crash_time": crash_time, + } + ) + + session.execute(statement) + session.commit() + + except Exception as e: + raise e + + finally: + session.close() + def create_recorder(sync_recorder: str, config: dict) -> BaseRecorder: recorder_sign = sync_recorder.find(":") From 39316bb62808a88554f558362d8abae7c04af0ca Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Thu, 21 Nov 2024 21:41:54 +0800 Subject: [PATCH 40/52] add failure records table scripts --- .../20241121_add_failure_records_table.sql | 19 +++++++++ .../20241121_add_failure_records_table.py | 41 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 migrations/manual_versions/20241121_add_failure_records_table.sql create mode 100644 migrations/versions/20241121_add_failure_records_table.py diff --git a/migrations/manual_versions/20241121_add_failure_records_table.sql b/migrations/manual_versions/20241121_add_failure_records_table.sql new file mode 100644 index 000000000..d6c552d1b --- /dev/null +++ b/migrations/manual_versions/20241121_add_failure_records_table.sql @@ -0,0 +1,19 @@ +BEGIN; + +-- Running upgrade 3bd2e3099bae -> f846e3abeb18 + +CREATE TABLE IF NOT EXISTS failures_records ( + record_id BIGSERIAL NOT NULL, + mission_sign VARCHAR, + output_types VARCHAR, + start_block_number BIGINT, + end_block_number BIGINT, + exception_stage VARCHAR, + exception JSON, + crash_time TIMESTAMP WITHOUT TIME ZONE, + PRIMARY KEY (record_id) +); + +UPDATE alembic_version SET version_num='f846e3abeb18' WHERE alembic_version.version_num = '3bd2e3099bae'; + +COMMIT; \ No newline at end of file diff --git a/migrations/versions/20241121_add_failure_records_table.py b/migrations/versions/20241121_add_failure_records_table.py new file mode 100644 index 000000000..0ae7af2a3 --- /dev/null +++ b/migrations/versions/20241121_add_failure_records_table.py @@ -0,0 +1,41 @@ +"""add_failure_records_table + +Revision ID: f846e3abeb18 +Revises: 3bd2e3099bae +Create Date: 2024-11-21 21:37:25.662986 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = 'f846e3abeb18' +down_revision: Union[str, None] = '3bd2e3099bae' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('failures_records', + sa.Column('record_id', sa.BIGINT(), autoincrement=True, nullable=False), + sa.Column('mission_sign', sa.VARCHAR(), nullable=True), + sa.Column('output_types', sa.VARCHAR(), nullable=True), + sa.Column('start_block_number', sa.BIGINT(), nullable=True), + sa.Column('end_block_number', sa.BIGINT(), nullable=True), + sa.Column('exception_stage', sa.VARCHAR(), nullable=True), + sa.Column('exception', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column('crash_time', postgresql.TIMESTAMP(), nullable=True), + sa.PrimaryKeyConstraint('record_id'), + if_not_exists=True + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('failures_records', if_exists=True) + # ### end Alembic commands ### From 37e99e9e806a10f7169df8370d0a92682d401232 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Fri, 22 Nov 2024 18:29:47 +0800 Subject: [PATCH 41/52] file rename --- ...failures_records.py => failure_records.py} | 4 +-- indexer/controller/stream_controller.py | 16 +++++----- .../{BufferService.py => buffer_service.py} | 0 .../20241105_add_address_index_and_stats.sql | 4 +-- .../20241121_add_failure_records_table.sql | 2 +- .../20241121_add_failure_records_table.py | 32 ++++++++++--------- 6 files changed, 30 insertions(+), 28 deletions(-) rename common/models/{failures_records.py => failure_records.py} (86%) rename indexer/utils/{BufferService.py => buffer_service.py} (100%) diff --git a/common/models/failures_records.py b/common/models/failure_records.py similarity index 86% rename from common/models/failures_records.py rename to common/models/failure_records.py index 2c1eb654e..d48bf4680 100644 --- a/common/models/failures_records.py +++ b/common/models/failure_records.py @@ -4,8 +4,8 @@ from common.models import HemeraModel -class FailuresRecords(HemeraModel): - __tablename__ = "failures_records" +class FailureRecords(HemeraModel): + __tablename__ = "failure_records" record_id = Column(BIGINT, primary_key=True, autoincrement=True) mission_sign = Column(VARCHAR) output_types = Column(VARCHAR) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index 6a9737684..c11f2526d 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -9,7 +9,7 @@ from indexer.controller.base_controller import BaseController from indexer.executors.concurrent_job_executor import ConcurrentJobExecutor from indexer.jobs.base_job import BaseJob -from indexer.utils.BufferService import BufferService +from indexer.utils.buffer_service import BufferService from indexer.utils.exception_recorder import ExceptionRecorder from indexer.utils.limit_reader import LimitReader from indexer.utils.sync_recorder import BaseRecorder @@ -18,10 +18,6 @@ logger = logging.getLogger(__name__) -M_JOBS: int = int(os.environ.get("M_JOBS", 4)) -M_TIMEOUT: int = int(os.environ.get("M_TIMEOUT", 100)) -M_SIZE: int = int(os.environ.get("M_SIZE", 100)) - class StreamController(BaseController): @@ -65,13 +61,13 @@ def __init__( self.delay = delay def handle_success(self, last_block_number): - self.sync_recorder.set_last_synced_block(last_block_number) + self.sync_recorder.set_last_synced_block(last_block_number, multiprocess=self.job_executor is not None) logger.info("Writing last synced block {}".format(last_block_number)) def handle_failure( self, output_types: List[str], start_block: int, end_block: int, exception_stage: str, exception: str ): - self.sync_recorder.set_failures_record(output_types, start_block, end_block, exception_stage, exception) + self.sync_recorder.set_failure_record(output_types, start_block, end_block, exception_stage, exception) def action( self, @@ -144,6 +140,9 @@ def action( if synced_blocks <= 0: logger.debug("Nothing to sync. Sleeping for {} seconds...".format(period_seconds)) time.sleep(period_seconds) + except Exception as e: + self.shutdown() + raise e finally: if pid_file is not None: @@ -151,7 +150,8 @@ def action( delete_file(pid_file) def shutdown(self): - self.job_executor.shutdown() + if self.job_executor: + self.job_executor.shutdown() self.buffer_service.shutdown() def split_blocks(self, start_block, end_block, step): diff --git a/indexer/utils/BufferService.py b/indexer/utils/buffer_service.py similarity index 100% rename from indexer/utils/BufferService.py rename to indexer/utils/buffer_service.py diff --git a/migrations/manual_versions/20241105_add_address_index_and_stats.sql b/migrations/manual_versions/20241105_add_address_index_and_stats.sql index 9f05b3829..5ab53222f 100644 --- a/migrations/manual_versions/20241105_add_address_index_and_stats.sql +++ b/migrations/manual_versions/20241105_add_address_index_and_stats.sql @@ -1,6 +1,6 @@ BEGIN; --- Running upgrade bc23aa19668e -> 872094559593 +-- Running upgrade bc23aa19668e -> 3bd2e3099bae CREATE TABLE IF NOT EXISTS address_contract_operations ( address BYTEA NOT NULL, @@ -287,6 +287,6 @@ DROP TABLE IF EXISTS daily_transactions_aggregates; DROP TABLE IF EXISTS statistics_wallet_addresses; -UPDATE alembic_version SET version_num='872094559593' WHERE alembic_version.version_num = 'bc23aa19668e'; +UPDATE alembic_version SET version_num='3bd2e3099bae' WHERE alembic_version.version_num = 'bc23aa19668e'; COMMIT; \ No newline at end of file diff --git a/migrations/manual_versions/20241121_add_failure_records_table.sql b/migrations/manual_versions/20241121_add_failure_records_table.sql index d6c552d1b..9e8d7a71b 100644 --- a/migrations/manual_versions/20241121_add_failure_records_table.sql +++ b/migrations/manual_versions/20241121_add_failure_records_table.sql @@ -2,7 +2,7 @@ BEGIN; -- Running upgrade 3bd2e3099bae -> f846e3abeb18 -CREATE TABLE IF NOT EXISTS failures_records ( +CREATE TABLE IF NOT EXISTS failure_records ( record_id BIGSERIAL NOT NULL, mission_sign VARCHAR, output_types VARCHAR, diff --git a/migrations/versions/20241121_add_failure_records_table.py b/migrations/versions/20241121_add_failure_records_table.py index 0ae7af2a3..58a6984f8 100644 --- a/migrations/versions/20241121_add_failure_records_table.py +++ b/migrations/versions/20241121_add_failure_records_table.py @@ -5,37 +5,39 @@ Create Date: 2024-11-21 21:37:25.662986 """ + from typing import Sequence, Union -from alembic import op import sqlalchemy as sa +from alembic import op from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision: str = 'f846e3abeb18' -down_revision: Union[str, None] = '3bd2e3099bae' +revision: str = "f846e3abeb18" +down_revision: Union[str, None] = "3bd2e3099bae" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.create_table('failures_records', - sa.Column('record_id', sa.BIGINT(), autoincrement=True, nullable=False), - sa.Column('mission_sign', sa.VARCHAR(), nullable=True), - sa.Column('output_types', sa.VARCHAR(), nullable=True), - sa.Column('start_block_number', sa.BIGINT(), nullable=True), - sa.Column('end_block_number', sa.BIGINT(), nullable=True), - sa.Column('exception_stage', sa.VARCHAR(), nullable=True), - sa.Column('exception', postgresql.JSON(astext_type=sa.Text()), nullable=True), - sa.Column('crash_time', postgresql.TIMESTAMP(), nullable=True), - sa.PrimaryKeyConstraint('record_id'), - if_not_exists=True + op.create_table( + "failure_records", + sa.Column("record_id", sa.BIGINT(), autoincrement=True, nullable=False), + sa.Column("mission_sign", sa.VARCHAR(), nullable=True), + sa.Column("output_types", sa.VARCHAR(), nullable=True), + sa.Column("start_block_number", sa.BIGINT(), nullable=True), + sa.Column("end_block_number", sa.BIGINT(), nullable=True), + sa.Column("exception_stage", sa.VARCHAR(), nullable=True), + sa.Column("exception", postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column("crash_time", postgresql.TIMESTAMP(), nullable=True), + sa.PrimaryKeyConstraint("record_id"), + if_not_exists=True, ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('failures_records', if_exists=True) + op.drop_table("failure_records", if_exists=True) # ### end Alembic commands ### From 797272592f8250ce4bdb2d8191fb7c34c61e87de Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Fri, 22 Nov 2024 18:30:16 +0800 Subject: [PATCH 42/52] update file sync recorder for multiprocessing --- indexer/utils/sync_recorder.py | 66 +++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/indexer/utils/sync_recorder.py b/indexer/utils/sync_recorder.py index 6a3a3c0f5..931d1bdc8 100644 --- a/indexer/utils/sync_recorder.py +++ b/indexer/utils/sync_recorder.py @@ -1,23 +1,24 @@ +import json import os from datetime import datetime, timezone from sqlalchemy import func from sqlalchemy.dialects.postgresql import insert -from common.models.failures_records import FailuresRecords +from common.models.failure_records import FailureRecords from common.models.sync_record import SyncRecord from common.services.postgresql_service import PostgreSQLService from common.utils.file_utils import smart_open, write_to_file class BaseRecorder(object): - def set_last_synced_block(self, last_synced_block): + def set_last_synced_block(self, last_synced_block, multiprocess): pass def get_last_synced_block(self): pass - def set_failures_record(self, output_types, start_block, end_block, exception_stage, exception): + def set_failure_record(self, output_types, start_block, end_block, exception_stage, exception): pass @@ -26,18 +27,40 @@ class FileSyncRecorder(BaseRecorder): def __init__(self, file_name): self.file_name = file_name - def set_last_synced_block(self, last_synced_block): - write_to_file(self.file_name, str(last_synced_block) + "\n") + def set_last_synced_block(self, last_synced_block, multiprocess=False): + if multiprocess: + wrote_synced_block = self.get_last_synced_block() + if wrote_synced_block < last_synced_block: + write_to_file(self.file_name, str(last_synced_block) + "\n") + else: + write_to_file(self.file_name, str(last_synced_block) + "\n") def get_last_synced_block(self): if not os.path.isfile(self.file_name): self.set_last_synced_block(0) return 0 - with smart_open(self.file_name, "r") as last_synced_block_file: - return int(last_synced_block_file.read()) - def set_failures_record(self, output_types, start_block, end_block, exception_stage, exception): - pass + with smart_open(self.file_name, "r") as last_synced_block_file: + last_synced_block = last_synced_block_file.read() + try: + last_synced_block = int(last_synced_block) + except ValueError as e: + last_synced_block = 0 + return last_synced_block + + def set_failure_record(self, output_types, start_block, end_block, exception_stage, exception): + failure_file = self.file_name + "failure_records" + crash_time = int(datetime.now(timezone.utc).timestamp()) + content = { + "output_types": ",".join(output_types), + "start_block_number": start_block, + "end_block_number": end_block, + "exception_stage": exception_stage, + "exception": exception, + "crash_time": crash_time, + } + + write_to_file(failure_file, json.dumps(content) + "\n", "a+") class PGSyncRecorder(BaseRecorder): @@ -46,10 +69,20 @@ def __init__(self, key, service_url): self.key = key self.service = PostgreSQLService(service_url) - def set_last_synced_block(self, last_synced_block): + def set_last_synced_block(self, last_synced_block, multiprocess=False): session = self.service.get_service_session() update_time = func.to_timestamp(int(datetime.now(timezone.utc).timestamp())) try: + conflict_args = { + 'index_elements': [SyncRecord.mission_sign], + 'set_': { + "last_block_number": last_synced_block, + "update_time": update_time, + }, + } + if multiprocess: + conflict_args['where'] = (SyncRecord.last_block_number <= last_synced_block) + statement = ( insert(SyncRecord) .values( @@ -59,14 +92,7 @@ def set_last_synced_block(self, last_synced_block): "update_time": update_time, } ) - .on_conflict_do_update( - index_elements=[SyncRecord.mission_sign], - set_={ - "last_block_number": last_synced_block, - "update_time": update_time, - }, - where=(SyncRecord.last_block_number <= last_synced_block), - ) + .on_conflict_do_update(**conflict_args) ) session.execute(statement) session.commit() @@ -87,12 +113,12 @@ def get_last_synced_block(self): return result return 0 - def set_failures_record(self, output_types, start_block, end_block, exception_stage, exception): + def set_failure_record(self, output_types, start_block, end_block, exception_stage, exception): session = self.service.get_service_session() try: crash_time = func.to_timestamp(int(datetime.now(timezone.utc).timestamp())) - statement = insert(FailuresRecords).values( + statement = insert(FailureRecords).values( { "mission_sign": self.key, "output_types": ",".join(output_types), From 40bf98690bdc1ee8ce457a231c55f6b6e848aee2 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Fri, 22 Nov 2024 19:25:26 +0800 Subject: [PATCH 43/52] disable progress bar --- indexer/executors/batch_work_executor.py | 8 ++-- indexer/exporters/postgres_item_exporter.py | 48 ++++++++++----------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/indexer/executors/batch_work_executor.py b/indexer/executors/batch_work_executor.py index 6e7b2b618..ab6ccd96a 100644 --- a/indexer/executors/batch_work_executor.py +++ b/indexer/executors/batch_work_executor.py @@ -48,7 +48,7 @@ def __init__( self.progress_logger = ProgressLogger(name=job_name, logger=self.logger) def execute(self, work_iterable, work_handler, total_items=None, split_method=None): - self.progress_logger.start(total_items=total_items) + # self.progress_logger.start(total_items=total_items) submit_batches = ( dynamic_batch_iterator(work_iterable, lambda: self.batch_size) if split_method is None @@ -80,7 +80,7 @@ def _fail_safe_execute(self, work_handler, batch, custom_splitting): retry_exceptions=self.retry_exceptions, ) - self.progress_logger.track(len(batch)) + # self.progress_logger.track(len(batch)) # Some acceptable race conditions are possible def _try_decrease_batch_size(self, current_batch_size): @@ -110,7 +110,7 @@ def wait(self): if len(self._futures) != 0: raise FastShutdownError("Futures failed to complete successfully.") - self.progress_logger.finish() + # self.progress_logger.finish() def shutdown(self): self.executor.shutdown(wait=10) @@ -118,7 +118,7 @@ def shutdown(self): if len(self._futures) != 0: raise FastShutdownError("Futures failed to complete successfully.") - self.progress_logger.finish() + # self.progress_logger.finish() def _check_completed_futures(self): """Fail safe in this case means fail fast. TODO: Add retry logic""" diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index 83de250c5..1b051f886 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -33,14 +33,14 @@ def export_items(self, items, **kwargs): desc = f"{job_name}(PG)" else: desc = "Exporting items" - self.main_progress = TqdmExtraFormat( - total=len(items), - desc=desc.ljust(35), - unit="items", - position=0, - ncols=90, - bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}] Est: {total_time}", - ) + # self.main_progress = TqdmExtraFormat( + # total=len(items), + # desc=desc.ljust(35), + # unit="items", + # position=0, + # ncols=90, + # bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}] Est: {total_time}", + # ) conn = self.service.get_connection() try: @@ -60,15 +60,15 @@ def export_items(self, items, **kwargs): converter = pg_config["converter"] # Initialize sub-progress bar for current table - self.sub_progress = TqdmExtraFormat( - total=len(item_group), - desc=f"Processing {table.__tablename__}".ljust(35), - unit="items", - position=1, - leave=False, - ncols=90, - bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", - ) + # self.sub_progress = TqdmExtraFormat( + # total=len(item_group), + # desc=f"Processing {table.__tablename__}".ljust(35), + # unit="items", + # position=1, + # leave=False, + # ncols=90, + # bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", + # ) cur = conn.cursor() data = [] @@ -77,8 +77,8 @@ def export_items(self, items, **kwargs): for item in item_group: converted_item = converter(table, item, do_update) data.append(converted_item) - self.sub_progress.update(1) - self.main_progress.update(1) + # self.sub_progress.update(1) + # self.main_progress.update(1) if data: columns = list(data[0].keys()) @@ -93,7 +93,7 @@ def export_items(self, items, **kwargs): conn.commit() tables.append(table.__tablename__) - self.sub_progress.close() + # self.sub_progress.close() except Exception as e: logger.error(f"Error exporting items: {e}") @@ -101,10 +101,10 @@ def export_items(self, items, **kwargs): raise e finally: self.service.release_connection(conn) - if self.main_progress: - self.main_progress.close() - if self.sub_progress: - self.sub_progress.close() + # if self.main_progress: + # self.main_progress.close() + # if self.sub_progress: + # self.sub_progress.close() end_time = datetime.now(tzlocal()) From 1755bb56996ec231a636bc7314d42d4917c173b0 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Fri, 22 Nov 2024 19:25:46 +0800 Subject: [PATCH 44/52] enable daemon --- indexer/executors/concurrent_job_executor.py | 2 +- indexer/utils/buffer_service.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index 65bc461ca..4202ecde9 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -35,7 +35,7 @@ def __init__(self, buffer_service, max_processors=1, call_back=None, error_callb self.task_queue = Queue() self.task_processor = Thread(target=self._process_tasks) - self.task_processor.daemon = False + self.task_processor.daemon = True self.task_processor.start() self.logger = logging.getLogger(__name__) diff --git a/indexer/utils/buffer_service.py b/indexer/utils/buffer_service.py index ffcfc5caa..8363db4c4 100644 --- a/indexer/utils/buffer_service.py +++ b/indexer/utils/buffer_service.py @@ -47,7 +47,7 @@ def __init__( self.submit_export_pool = ThreadPoolExecutor(max_workers=export_workers) self._flush_thread = Thread(target=self._flush_loop) - self._flush_thread.daemon = False + self._flush_thread.daemon = True self._flush_thread.start() self._setup_signal_handlers() From 9b2bc5ee9cb320693787cf9a3c0e4f2813852e83 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Fri, 22 Nov 2024 19:25:53 +0800 Subject: [PATCH 45/52] make format --- indexer/utils/sync_recorder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/indexer/utils/sync_recorder.py b/indexer/utils/sync_recorder.py index 931d1bdc8..3d8b79e70 100644 --- a/indexer/utils/sync_recorder.py +++ b/indexer/utils/sync_recorder.py @@ -74,14 +74,14 @@ def set_last_synced_block(self, last_synced_block, multiprocess=False): update_time = func.to_timestamp(int(datetime.now(timezone.utc).timestamp())) try: conflict_args = { - 'index_elements': [SyncRecord.mission_sign], - 'set_': { + "index_elements": [SyncRecord.mission_sign], + "set_": { "last_block_number": last_synced_block, "update_time": update_time, }, } if multiprocess: - conflict_args['where'] = (SyncRecord.last_block_number <= last_synced_block) + conflict_args["where"] = SyncRecord.last_block_number <= last_synced_block statement = ( insert(SyncRecord) From 827b01facd24d51c8e7a320d850e3f71513acacc Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Mon, 25 Nov 2024 09:55:33 +0800 Subject: [PATCH 46/52] parameter tuning --- indexer/executors/concurrent_job_executor.py | 6 +++--- indexer/exporters/postgres_item_exporter.py | 2 +- indexer/utils/buffer_service.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index 4202ecde9..cf93de598 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -82,10 +82,10 @@ def _process_tasks(self): self.logger.error(f"Unexpected error in task processor: {e}") def _handle_task_completion(self, result, processor, param): - self.buffer_service.write(result) - - self.logger.debug(f"Task with parameter:{param} completed successfully by processor: {processor}") self._release_processor(processor) + self.logger.info(f"Task with parameter:{param} completed successfully by processor: {processor}") + + self.buffer_service.write(result) if self.call_back: param["processor"] = processor diff --git a/indexer/exporters/postgres_item_exporter.py b/indexer/exporters/postgres_item_exporter.py index 1b051f886..51165a44a 100644 --- a/indexer/exporters/postgres_item_exporter.py +++ b/indexer/exporters/postgres_item_exporter.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -COMMIT_BATCH_SIZE = 1000 +COMMIT_BATCH_SIZE = 50000 class PostgresItemExporter(BaseExporter): diff --git a/indexer/utils/buffer_service.py b/indexer/utils/buffer_service.py index 8363db4c4..924ae770b 100644 --- a/indexer/utils/buffer_service.py +++ b/indexer/utils/buffer_service.py @@ -118,7 +118,7 @@ def flush_buffer(self): flush_items.extend(self.buffer[key]) self.buffer.clear() - + self.logger.info(f"Flush data between block range: {block_range}") future = self.submit_export_pool.submit(self.export_items, flush_items) future.add_done_callback(self._handle_export_completion) From 7d74f5e806928258683701c58b74f4dca68f8df8 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Mon, 25 Nov 2024 10:48:49 +0800 Subject: [PATCH 47/52] running log --- indexer/controller/stream_controller.py | 3 +++ indexer/exporters/console_item_exporter.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index c11f2526d..e5c9049dd 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -168,6 +168,8 @@ def _calculate_target_block(self, current_block, last_synced_block, end_block, s def run_jobs(jobs, start_block, end_block, max_retries, processor=None): try: + if processor: + logger.info(f"Task in {processor} begin, run block range between {start_block} and {end_block}") jobs_export_data = {} for job in jobs: job_export_data = job_with_retires( @@ -183,6 +185,7 @@ def run_jobs(jobs, start_block, end_block, max_retries, processor=None): def job_with_retires(job, start_block, end_block, max_retries, processor=None): for retry in range(max_retries): try: + logger.info(f"Task in {processor} run {job.__class__.__name__}") return job.run(start_block=start_block, end_block=end_block, processor=processor) except HemeraBaseException as e: diff --git a/indexer/exporters/console_item_exporter.py b/indexer/exporters/console_item_exporter.py index 7df212393..412dd5aa7 100644 --- a/indexer/exporters/console_item_exporter.py +++ b/indexer/exporters/console_item_exporter.py @@ -14,6 +14,8 @@ class ConsoleItemExporter(BaseExporter): def export_items(self, items, **kwargs): + print(f"Items exported: {len(items)}") + return for item in items: self.export_item(item, **kwargs) From f6b7ec7ea552652694afd5eb6da9b65e94b4b7f3 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Mon, 25 Nov 2024 16:18:58 +0800 Subject: [PATCH 48/52] modify log --- common/utils/exception_control.py | 8 ++++---- indexer/controller/stream_controller.py | 4 +++- indexer/executors/concurrent_job_executor.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/common/utils/exception_control.py b/common/utils/exception_control.py index d862deec9..fdeb937da 100644 --- a/common/utils/exception_control.py +++ b/common/utils/exception_control.py @@ -126,9 +126,9 @@ def get_exception_details(e: Exception) -> dict: exc_type, exc_value, exc_traceback = sys.exc_info() return { - "type": exc_type.__name__, - "module": exc_type.__module__, - "message": str(exc_value), + "type": exc_type.__name__ if exc_type else None, + "module": exc_type.__module__ if exc_type else None, + "message": str(exc_value) if exc_value else str(e), "traceback": traceback.format_exc(), - "line_number": exc_traceback.tb_lineno, + "line_number": exc_traceback.tb_lineno if exc_traceback else None, } diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index e5c9049dd..c6eb6cce2 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -168,8 +168,10 @@ def _calculate_target_block(self, current_block, last_synced_block, end_block, s def run_jobs(jobs, start_block, end_block, max_retries, processor=None): try: - if processor: + if processor and processor != 'None': logger.info(f"Task in {processor} begin, run block range between {start_block} and {end_block}") + else: + logger.info(f"Task begin, run block range between {start_block} and {end_block}") jobs_export_data = {} for job in jobs: job_export_data = job_with_retires( diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index cf93de598..dbc485df8 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -92,8 +92,8 @@ def _handle_task_completion(self, result, processor, param): self.call_back(**param) def _handle_task_failed(self, error, processor, param): - self.logger.error(f"with parameter:{param} failed in processor:{processor} error: {error}") self._release_processor(processor) + self.logger.error(f"with parameter:{param} failed in processor:{processor} error: {error}") if self.error_callback: try: From e59374838462d413dce193566c9b3ca7b61c1667 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Mon, 25 Nov 2024 16:35:25 +0800 Subject: [PATCH 49/52] modify log --- indexer/controller/stream_controller.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index c6eb6cce2..ea63612b8 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -187,7 +187,10 @@ def run_jobs(jobs, start_block, end_block, max_retries, processor=None): def job_with_retires(job, start_block, end_block, max_retries, processor=None): for retry in range(max_retries): try: - logger.info(f"Task in {processor} run {job.__class__.__name__}") + if processor and processor != 'None': + logger.info(f"Task in {processor} run {job.__class__.__name__}") + else: + logger.info(f"Task run {job.__class__.__name__}") return job.run(start_block=start_block, end_block=end_block, processor=processor) except HemeraBaseException as e: From 85df4b414a5242c652a3c9477416289fa2e09e1b Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Tue, 26 Nov 2024 14:28:09 +0800 Subject: [PATCH 50/52] rebuild exception detail while missing context --- common/utils/exception_control.py | 4 ++++ indexer/executors/batch_work_executor.py | 25 ++++++++++++++------ indexer/executors/concurrent_job_executor.py | 1 + 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/common/utils/exception_control.py b/common/utils/exception_control.py index fdeb937da..6b6b8373e 100644 --- a/common/utils/exception_control.py +++ b/common/utils/exception_control.py @@ -32,8 +32,12 @@ def __init__(self, message): self.crashable = None self.retriable = None self.message = message + self.detail = None super().__init__(message) + def update_detail(self, detail): + self.detail = detail + class RetriableError(HemeraBaseException): def __init__(self, message=""): diff --git a/indexer/executors/batch_work_executor.py b/indexer/executors/batch_work_executor.py index ab6ccd96a..abd791d8a 100644 --- a/indexer/executors/batch_work_executor.py +++ b/indexer/executors/batch_work_executor.py @@ -7,7 +7,7 @@ from requests.exceptions import TooManyRedirects from web3._utils.threads import Timeout as Web3Timeout -from common.utils.exception_control import FastShutdownError, RetriableError +from common.utils.exception_control import FastShutdownError, RetriableError, get_exception_details from indexer.executors.bounded_executor import BoundedExecutor from indexer.utils.progress_logger import ProgressLogger @@ -73,12 +73,23 @@ def _fail_safe_execute(self, work_handler, batch, custom_splitting): for sub_batch in dynamic_batch_iterator(batch, lambda: self.batch_size): self._fail_safe_execute(work_handler, sub_batch, custom_splitting) else: - execute_with_retries( - work_handler, - batch, - max_retries=self.max_retries, - retry_exceptions=self.retry_exceptions, - ) + try: + execute_with_retries( + work_handler, + batch, + max_retries=self.max_retries, + retry_exceptions=self.retry_exceptions, + ) + except Exception as e: + error_details = get_exception_details(e) + warped_exception = FastShutdownError(str(e)) + warped_exception.update_detail(error_details) + raise warped_exception + except Exception as e: + error_details = get_exception_details(e) + warped_exception = FastShutdownError(str(e)) + warped_exception.update_detail(error_details) + raise warped_exception # self.progress_logger.track(len(batch)) diff --git a/indexer/executors/concurrent_job_executor.py b/indexer/executors/concurrent_job_executor.py index dbc485df8..9c7a18d5f 100644 --- a/indexer/executors/concurrent_job_executor.py +++ b/indexer/executors/concurrent_job_executor.py @@ -99,6 +99,7 @@ def _handle_task_failed(self, error, processor, param): try: param["processor"] = processor error_details = get_exception_details(error) + error_details = error_details if error_details["type"] or not hasattr(error, "detail") else error.detail self.error_callback( output_types=self.buffer_service.required_output_types, start_block=param["start_block"], From f424fe4d8f3550dc9e8ec7a53de51409225fc1b8 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Tue, 26 Nov 2024 17:32:47 +0800 Subject: [PATCH 51/52] compatible change for reorg --- cli/reorg.py | 83 +++++++++---------- indexer/controller/reorg_controller.py | 9 +- indexer/controller/scheduler/job_scheduler.py | 2 - .../controller/scheduler/reorg_scheduler.py | 26 +++--- indexer/jobs/base_job.py | 12 +-- indexer/jobs/export_blocks_job.py | 6 +- indexer/jobs/export_reorg_job.py | 6 +- indexer/jobs/export_traces_job.py | 34 ++++++-- indexer/utils/reorg.py | 51 +++++++++++- 9 files changed, 144 insertions(+), 85 deletions(-) diff --git a/cli/reorg.py b/cli/reorg.py index b600524a5..420a33851 100644 --- a/cli/reorg.py +++ b/cli/reorg.py @@ -11,6 +11,7 @@ from indexer.utils.exception_recorder import ExceptionRecorder from indexer.utils.logging_utils import configure_logging, configure_signals from indexer.utils.provider import get_provider_from_uri +from indexer.utils.reorg import check_reorg from indexer.utils.rpc_utils import pick_random_provider_uri from indexer.utils.thread_local_proxy import ThreadLocalProxy @@ -46,18 +47,6 @@ envvar="POSTGRES_URL", help="The required postgres connection url." "e.g. postgresql+psycopg2://postgres:admin@127.0.0.1:5432/ethereum", ) -@click.option( - "-v", - "--db-version", - default="head", - show_default=True, - type=str, - envvar="DB_VERSION", - help="The database version to initialize the database. using the alembic script's revision ID to " - "specify a version." - " e.g. head, indicates the latest version." - "or base, indicates the empty database without any table.", -) @click.option( "-b", "--batch-size", @@ -77,6 +66,7 @@ ) @click.option( "--block-number", + default=None, show_default=True, type=int, envvar="BLOCK_NUMBER", @@ -85,12 +75,20 @@ @click.option( "-r", "--ranges", - default=1000, + default=10, show_default=True, type=int, envvar="RANGES", help="Specify the range limit for data fixing.", ) +@click.option( + "--check-ranges", + default=None, + show_default=True, + type=int, + envvar="CHECK_RANGES", + help="Specify the range for block continuous checking.", +) @click.option( "--log-file", default=None, @@ -109,14 +107,6 @@ envvar="MULTI_CALL_ENABLE", ) @click.option("--cache", default=None, show_default=True, type=str, envvar="CACHE", help="Cache") -@click.option( - "--auto-upgrade-db", - default=True, - show_default=True, - type=bool, - envvar="AUTO_UPGRADE_DB", - help="Whether to automatically run database migration scripts to update the database to the latest version.", -) @click.option( "--log-level", default="INFO", @@ -131,14 +121,13 @@ def reorg( postgres_url, block_number, ranges, + check_ranges, batch_size, debug_batch_size, - db_version="head", multicall=True, log_file=None, cache=None, config_file=None, - auto_upgrade_db=True, log_level="INFO", ): configure_logging(log_level=log_level, log_file=log_file) @@ -151,8 +140,8 @@ def reorg( # build postgresql service if postgres_url: - service = PostgreSQLService(postgres_url, db_version=db_version, init_schema=auto_upgrade_db) - config = {"db_service": service} + service = PostgreSQLService(postgres_url) + config = {"db_service": postgres_url} # exception_recorder.init_pg_service(service) else: logging.error("No postgres url provided. Exception recorder will not be useful.") @@ -177,11 +166,9 @@ def reorg( output_types = list(generate_output_types(entity_types)) job_scheduler = ReorgScheduler( - batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), - batch_web3_debug_provider=ThreadLocalProxy(lambda: get_provider_from_uri(debug_provider_uri, batch=True)), - item_exporters=PostgresItemExporter( - postgres_url=postgres_url, db_version=db_version, init_schema=auto_upgrade_db - ), + web3_provider_uri=provider_uri, + web3_debug_provider_uri=debug_provider_uri, + item_exporters=PostgresItemExporter(service_url=postgres_url), batch_size=batch_size, debug_batch_size=debug_batch_size, required_output_types=output_types, @@ -194,23 +181,29 @@ def reorg( batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=False)), job_scheduler=job_scheduler, ranges=ranges, - config=config, + service=service, ) - job = None + current_block = controller.get_current_block_number() + if check_ranges: + check_begin = current_block - check_ranges + check_reorg(service, check_begin) + else: + check_reorg(service) + while True: - if job: - controller.action( - job_id=job.job_id, - block_number=job.last_fixed_block_number - 1, - remains=job.remain_process, - ) - else: + if block_number: controller.action(block_number=block_number) - - job = controller.wake_up_next_job() - if job: - logging.info(f"Waking up uncompleted job: {job.job_id}.") else: - logging.info("No more uncompleted jobs to wake-up, reorg process will terminate.") - break + job = controller.wake_up_next_job() + if job: + logging.info(f"Waking up uncompleted job: {job.job_id}.") + + controller.action( + job_id=job.job_id, + block_number=job.last_fixed_block_number - 1, + remains=job.remain_process, + ) + else: + logging.info("No more uncompleted jobs to wake-up, reorg process will terminate.") + break diff --git a/indexer/controller/reorg_controller.py b/indexer/controller/reorg_controller.py index 816aff5d5..1cd615e0d 100644 --- a/indexer/controller/reorg_controller.py +++ b/indexer/controller/reorg_controller.py @@ -19,16 +19,17 @@ class ReorgController(BaseController): - def __init__(self, batch_web3_provider, job_scheduler, ranges, config, max_retries=5): + def __init__(self, batch_web3_provider, job_scheduler, ranges, service, max_retries=5): self.ranges = ranges self.web3 = build_web3(batch_web3_provider) - self.db_service = config.get("db_service") + self.db_service = service self.job_scheduler = job_scheduler self.max_retries = max_retries + def get_current_block_number(self): + return int(self.web3.eth.block_number) + def action(self, job_id=None, block_number=None, remains=None, retry_errors=True): - if block_number is None: - raise ValueError("Reorging mission must provide a block_number.") if remains is None: remains = self.ranges diff --git a/indexer/controller/scheduler/job_scheduler.py b/indexer/controller/scheduler/job_scheduler.py index 3180395f7..e13c85a55 100644 --- a/indexer/controller/scheduler/job_scheduler.py +++ b/indexer/controller/scheduler/job_scheduler.py @@ -10,10 +10,8 @@ from common.utils.format_utils import bytes_to_hex_str from common.utils.module_loading import import_submodules from enumeration.record_level import RecordLevel -from indexer.exporters.console_item_exporter import ConsoleItemExporter from indexer.jobs import CSVSourceJob from indexer.jobs.base_job import BaseExportJob, BaseJob, ExtensionJob, FilterTransactionDataJob -from indexer.jobs.check_block_consensus_job import CheckBlockConsensusJob from indexer.jobs.export_blocks_job import ExportBlocksJob from indexer.jobs.source_job.pg_source_job import PGSourceJob from indexer.utils.exception_recorder import ExceptionRecorder diff --git a/indexer/controller/scheduler/reorg_scheduler.py b/indexer/controller/scheduler/reorg_scheduler.py index f1c88c651..3130d5ad4 100644 --- a/indexer/controller/scheduler/reorg_scheduler.py +++ b/indexer/controller/scheduler/reorg_scheduler.py @@ -6,6 +6,7 @@ from redis.client import Redis from common.models.tokens import Tokens +from common.services.postgresql_service import PostgreSQLService from common.utils.format_utils import bytes_to_hex_str from common.utils.module_loading import import_submodules from indexer.jobs import FilterTransactionDataJob @@ -37,8 +38,8 @@ def get_tokens_from_db(service): class ReorgScheduler: def __init__( self, - batch_web3_provider, - batch_web3_debug_provider, + web3_provider_uri, + web3_debug_provider_uri, batch_size=100, debug_batch_size=1, max_workers=5, @@ -48,8 +49,8 @@ def __init__( cache="memory", multicall=None, ): - self.batch_web3_provider = batch_web3_provider - self.batch_web3_debug_provider = batch_web3_debug_provider + self.web3_provider_uri = web3_provider_uri + self.web3_debug_provider_uri = web3_debug_provider_uri self.item_exporters = item_exporters self.batch_size = batch_size self.debug_batch_size = debug_batch_size @@ -60,7 +61,7 @@ def __init__( self.job_classes = [] self.job_map = defaultdict(list) self.dependency_map = defaultdict(list) - self.pg_service = config.get("db_service") if "db_service" in config else None + self.pg_service = PostgreSQLService(config.get("db_service")) if "db_service" in config else None self._is_multicall = multicall self.discover_and_register_job_classes() @@ -112,9 +113,8 @@ def instantiate_jobs(self): continue job = job_class( required_output_types=self.required_output_types, - batch_web3_provider=self.batch_web3_provider, - batch_web3_debug_provider=self.batch_web3_debug_provider, - item_exporters=self.item_exporters, + web3_provider_uri=self.web3_provider_uri, + web3_debug_provider_uri=self.web3_debug_provider_uri, batch_size=self.batch_size, debug_batch_size=self.debug_batch_size, max_workers=self.max_workers, @@ -130,9 +130,8 @@ def instantiate_jobs(self): if ExportBlocksJob in self.resolved_job_classes: export_blocks_job = ExportBlocksJob( required_output_types=self.required_output_types, - batch_web3_provider=self.batch_web3_provider, - batch_web3_debug_provider=self.batch_web3_debug_provider, - item_exporters=self.item_exporters, + web3_provider_uri=self.web3_provider_uri, + web3_debug_provider_uri=self.web3_debug_provider_uri, batch_size=self.batch_size, debug_batch_size=self.debug_batch_size, max_workers=self.max_workers, @@ -146,9 +145,8 @@ def instantiate_jobs(self): export_reorg_job = ExportReorgJob( required_output_types=self.required_output_types, - batch_web3_provider=self.batch_web3_provider, - batch_web3_debug_provider=self.batch_web3_debug_provider, - item_exporters=self.item_exporters, + web3_provider_uri=self.web3_provider_uri, + web3_debug_provider_uri=self.web3_debug_provider_uri, batch_size=self.batch_size, debug_batch_size=self.debug_batch_size, max_workers=self.max_workers, diff --git a/indexer/jobs/base_job.py b/indexer/jobs/base_job.py index ba0af8c15..178c2c8ab 100644 --- a/indexer/jobs/base_job.py +++ b/indexer/jobs/base_job.py @@ -6,6 +6,7 @@ from web3 import Web3 from common.converter.pg_converter import domain_model_mapping +from common.services.postgresql_service import PostgreSQLService from common.utils.exception_control import FastShutdownError from common.utils.format_utils import to_snake_case from indexer.domain import Domain @@ -68,7 +69,7 @@ def init_token_cache(cls, _token=None): def __init__(self, **kwargs): - self._multiprocess = kwargs["multiprocess"] + self._multiprocess = kwargs.get("multiprocess", False) self._required_output_types = kwargs["required_output_types"] self._web3_provider_uri = kwargs["web3_provider_uri"] self._web3_debug_provider_uri = kwargs["web3_debug_provider_uri"] @@ -82,7 +83,7 @@ def __init__(self, **kwargs): self._should_reorg = False self._should_reorg_type = set() - self._service = kwargs["config"].get("db_service", None) + self._service_url = kwargs["config"].get("db_service", None) job_name_snake = to_snake_case(self.job_name) self.user_defined_config = kwargs["config"][job_name_snake] if kwargs["config"].get(job_name_snake) else {} @@ -143,18 +144,19 @@ def _start(self, **kwargs): self._data_buff[dataclass.type()].clear() def _pre_reorg(self, **kwargs): - if self._service is None: + if self._service_url is None: raise FastShutdownError("PG Service is not set") + service = PostgreSQLService(self._service_url) reorg_block = int(kwargs["start_block"]) output_table = {} for domain in self.output_types: - output_table[domain_model_mapping[domain.__name__]["table"]] = domain.type() + output_table[domain_model_mapping[domain]["table"]] = domain.type() # output_table.add(domain_model_mapping[domain.__name__]["table"]) for table in output_table.keys(): - if should_reorg(reorg_block, table, self._service): + if should_reorg(reorg_block, table, service): self._should_reorg_type.add(output_table[table]) self._should_reorg = True diff --git a/indexer/jobs/export_blocks_job.py b/indexer/jobs/export_blocks_job.py index d018a3528..3b3d175cb 100644 --- a/indexer/jobs/export_blocks_job.py +++ b/indexer/jobs/export_blocks_job.py @@ -2,6 +2,7 @@ import orjson +from common.services.postgresql_service import PostgreSQLService from common.utils.exception_control import FastShutdownError from indexer.domain.block import Block from indexer.domain.block_ts_mapper import BlockTsMapper @@ -55,11 +56,12 @@ def _start(self, **kwargs): ) def _pre_reorg(self, **kwargs): - if self._service is None: + if self._service_url is None: raise FastShutdownError("PG Service is not set") + service = PostgreSQLService(self._service_url) reorg_block = int(kwargs["start_block"]) - set_reorg_sign(self._reorg_jobs, reorg_block, self._service) + set_reorg_sign(self._reorg_jobs, reorg_block, service) self._should_reorg_type.add(Block.type()) self._should_reorg = True diff --git a/indexer/jobs/export_reorg_job.py b/indexer/jobs/export_reorg_job.py index 114c5a14f..0ad052065 100644 --- a/indexer/jobs/export_reorg_job.py +++ b/indexer/jobs/export_reorg_job.py @@ -3,6 +3,7 @@ from psycopg2.extras import execute_values from common.converter.pg_converter import domain_model_mapping +from common.services.postgresql_service import PostgreSQLService from indexer.exporters.postgres_item_exporter import sql_insert_statement from indexer.jobs.base_job import BaseJob @@ -14,10 +15,11 @@ class ExportReorgJob(BaseJob): def __init__(self, **kwargs): super().__init__(**kwargs) self._should_reorg = True + self._service = PostgreSQLService(self._service_url) def _process(self, **kwargs): block_number = int(kwargs["start_block"]) - conn = self._service.get_conn() + conn = self._service.get_connection() cur = conn.cursor() try: @@ -57,7 +59,7 @@ def _process(self, **kwargs): # print(item_type, insert_stmt, [i[-1] for i in data]) raise Exception("Reorg chain data error") finally: - self._service.release_conn(conn) + self._service.release_connection(conn) self._data_buff.clear() @staticmethod diff --git a/indexer/jobs/export_traces_job.py b/indexer/jobs/export_traces_job.py index b1c1b2f40..9314679ac 100644 --- a/indexer/jobs/export_traces_job.py +++ b/indexer/jobs/export_traces_job.py @@ -15,7 +15,9 @@ from indexer.jobs.base_job import BaseExportJob from indexer.utils.exception_recorder import ExceptionRecorder from indexer.utils.json_rpc_requests import generate_trace_block_by_number_json_rpc +from indexer.utils.provider import get_provider_from_uri from indexer.utils.rpc_utils import rpc_response_to_result, zip_rpc_response +from indexer.utils.thread_local_proxy import ThreadLocalProxy logger = logging.getLogger(__name__) exception_recorder = ExceptionRecorder() @@ -26,17 +28,35 @@ class ExportTracesJob(BaseExportJob): dependency_types = [Block] output_types = [Trace, ContractInternalTransaction, UpdateBlockInternalCount] able_to_reorg = True + able_to_multi_process = True def __init__(self, **kwargs): super().__init__(**kwargs) - self._batch_web3_provider = kwargs["batch_web3_debug_provider"] - self._batch_work_executor = BatchWorkExecutor( - kwargs["debug_batch_size"], - kwargs["max_workers"], - job_name=self.__class__.__name__, - ) - self._is_batch = kwargs["debug_batch_size"] > 1 + if not self._multiprocess: + self._batch_work_provider = ThreadLocalProxy( + lambda: get_provider_from_uri(self._web3_debug_provider_uri, batch=True) + ) + + self._batch_work_executor = BatchWorkExecutor( + self._batch_size, + self._max_workers, + job_name=self.logger_name, + ) + + def _start(self, **kwargs): + super()._start(**kwargs) + + if self._multiprocess: + self._batch_work_provider = ThreadLocalProxy( + lambda: get_provider_from_uri(self._web3_debug_provider_uri, batch=True) + ) + + self._batch_work_executor = BatchWorkExecutor( + self._batch_size, + self._max_workers, + job_name=self.logger_name, + ) def _collect(self, **kwargs): self._batch_work_executor.execute( diff --git a/indexer/utils/reorg.py b/indexer/utils/reorg.py index d84e22baf..0cc8044d4 100644 --- a/indexer/utils/reorg.py +++ b/indexer/utils/reorg.py @@ -1,22 +1,24 @@ import logging from datetime import datetime, timezone -from sqlalchemy import and_ +from sqlalchemy import and_, func, insert, literal, select from common.converter.pg_converter import domain_model_mapping from common.models import HemeraModel +from common.models.blocks import Blocks +from common.models.fix_record import FixRecord from common.services.postgresql_service import PostgreSQLService from common.utils.exception_control import RetriableError def set_reorg_sign(jobs, block_number, service): - conn = service.get_conn() + conn = service.get_connection() cur = conn.cursor() try: table_done = set() for job in jobs: for output in job.output_types: - model = domain_model_mapping[output.__name__] + model = domain_model_mapping[output] table = model["table"] if table.__name__ in table_done: continue @@ -47,7 +49,7 @@ def set_reorg_sign(jobs, block_number, service): logging.error(e) raise RetriableError(e) finally: - service.release_conn(conn) + service.release_connection(conn) def should_reorg(block_number: int, table: HemeraModel, service: PostgreSQLService): @@ -67,3 +69,44 @@ def should_reorg(block_number: int, table: HemeraModel, service: PostgreSQLServi finally: session.close() return result is not None + + +def check_reorg(service: PostgreSQLService, check_range: int = None): + check_where = and_(Blocks.reorg == False, Blocks.number >= check_range) if check_range else Blocks.reorg == False + + inner_query = ( + select( + Blocks.number, + Blocks.hash, + Blocks.parent_hash, + func.lag(Blocks.number, 1).over(order_by=Blocks.number).label("parent_number"), + func.lag(Blocks.hash, 1).over(order_by=Blocks.number).label("lag_hash"), + ) + .where(check_where) + .alias("align_table") + ) + + select_stmt = select( + inner_query.c.number.label("start_block_number"), + (inner_query.c.number + 1).label("last_fixed_block_number"), + literal(5).label("remain_process"), + literal("submitted").label("job_status"), + ).where( + and_( + inner_query.c.parent_hash != inner_query.c.lag_hash, inner_query.c.number == inner_query.c.parent_number + 1 + ) + ) + + insert_stmt = insert(FixRecord).from_select( + ["start_block_number", "last_fixed_block_number", "remain_process", "job_status"], select_stmt + ) + + db_session = service.get_service_session() + db_session.execute(insert_stmt) + db_session.commit() + db_session.close() + + +if __name__ == "__main__": + service = PostgreSQLService("postgresql://postgres:admin@localhost:5432/hemera_indexer") + check_reorg(service) From 961688a21ade3dac3dd150e7570b1c6c6d1dba39 Mon Sep 17 00:00:00 2001 From: "zihao.xu" Date: Tue, 26 Nov 2024 17:33:04 +0800 Subject: [PATCH 52/52] make format --- indexer/controller/stream_controller.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/indexer/controller/stream_controller.py b/indexer/controller/stream_controller.py index ea63612b8..b9f131a7b 100644 --- a/indexer/controller/stream_controller.py +++ b/indexer/controller/stream_controller.py @@ -168,7 +168,7 @@ def _calculate_target_block(self, current_block, last_synced_block, end_block, s def run_jobs(jobs, start_block, end_block, max_retries, processor=None): try: - if processor and processor != 'None': + if processor and processor != "None": logger.info(f"Task in {processor} begin, run block range between {start_block} and {end_block}") else: logger.info(f"Task begin, run block range between {start_block} and {end_block}") @@ -187,7 +187,7 @@ def run_jobs(jobs, start_block, end_block, max_retries, processor=None): def job_with_retires(job, start_block, end_block, max_retries, processor=None): for retry in range(max_retries): try: - if processor and processor != 'None': + if processor and processor != "None": logger.info(f"Task in {processor} run {job.__class__.__name__}") else: logger.info(f"Task run {job.__class__.__name__}")