From 1a49df3a6174eb8502b93f3ea2d997500a010324 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Wed, 14 Aug 2024 16:58:21 +0800 Subject: [PATCH 1/9] support brave search api --- lagent/actions/brave_search.py | 144 +++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 lagent/actions/brave_search.py diff --git a/lagent/actions/brave_search.py b/lagent/actions/brave_search.py new file mode 100644 index 00000000..b9abaf10 --- /dev/null +++ b/lagent/actions/brave_search.py @@ -0,0 +1,144 @@ +import os +from typing import List, Optional, Tuple, Type, Union + +import requests + +from lagent.schema import ActionReturn, ActionStatusCode +from .base_action import BaseAction, tool_api +from .parser import BaseParser, JsonParser + + +class BraveSearch(BaseAction): + """Wrapper around the Brave Search API. + + To use, you should pass your Brave API key to the constructor. + + Code is modified from lang-chain BraveSearchWrapper + (https://github.com/daver987/langchain/blob/c5016e2 + b0b4878b0a920e809a5169d80b409288b/libs/community/ + langchain_community/utilities/brave_search.py) + + Args: + api_key (str): API KEY to use brave search API, + You can create a free API key at https://brave.com/search/api/. + timeout (int): Upper bound of waiting time for a brave request. + search_type (str): Brave API support ['web', 'images', 'news', + 'videos'] types of search. + description (dict): The description of the action. Defaults to ``None``. + parser (Type[BaseParser]): The parser class to process the + action's inputs and outputs. Defaults to :class:`JsonParser`. + enable (bool): Whether the action is enabled. Defaults to ``True``. + """ + # result_key_for_type = { + # 'news': 'news', + # 'places': 'places', + # 'images': 'images', + # 'search': 'organic', + # } + + def __init__(self, + api_key: Optional[str] = None, + timeout: int = 5, + search_type: str = 'web', + description: Optional[dict] = None, + parser: Type[BaseParser] = JsonParser, + enable: bool = True): + super().__init__(description, parser, enable) + api_key = os.environ.get('BRAVE_API_KEY', api_key) + if api_key is None: + raise ValueError( + 'Please set BRAVE API key either in the environment ' + 'as BRAVE_API_KEY or pass it as `api_key` parameter.') + self.api_key = api_key + self.timeout = timeout + self.search_type = search_type + + @tool_api + def run(self, query: str, k: int = 10) -> ActionReturn: + """一个可以从BRAVE浏览器搜索结果的API。当你需要对于一个特定问题找到简短明了的回答时,可以使用它。输入应该是一个搜索查询。 + + Args: + query (str): the search content + k (int): select first k results in the search results as response + """ + tool_return = ActionReturn(type=self.name) + status_code, response = self._search(query, count=k) + # convert search results to ToolReturn format + if status_code == -1: + tool_return.errmsg = response + tool_return.state = ActionStatusCode.HTTP_ERROR + elif status_code == 200: + parsed_res = self._parse_results(response) + tool_return.result = [dict(type='text', content=str(parsed_res))] + tool_return.state = ActionStatusCode.SUCCESS + else: + tool_return.errmsg = str(status_code) + tool_return.state = ActionStatusCode.API_ERROR + return tool_return + + def _parse_results(self, results: dict) -> Union[str, List[str]]: + """Parse the search results from Brave API. + + Args: + results (dict): The search content from Brave API + in json format. + + Returns: + List[str]: The parsed search results. + """ + + if self.search_type=="web": + filtered_result=results.get("web", {}).get("results", []) + else: + filtered_result=results.get("results", {}) + + snippets = [ + { + "title": item.get("title"), + "snippets": " ".join( + filter( + None, [item.get("description"), *item.get("extra_snippets", [])] + ) + ), + } + for item in filtered_result + ] + + if len(snippets) == 0: + return ['No good Brave Search Result was found'] + return snippets + + def _search(self, + search_term: str, + **kwargs) -> Tuple[int, Union[dict, str]]: + """HTTP requests to Brave API. + + Args: + search_term (str): The search query. + + Returns: + tuple: the return value is a tuple contains: + - status_code (int): HTTP status code from Serper API. + - response (dict): response context with json format. + """ + headers = { + "X-Subscription-Token": self.api_key or '', + "Accept": "application/json", + } + + params = { + 'q': search_term, + **{ + key: value + for key, value in kwargs.items() if value is not None + }, + } + try: + response = requests.get( + f'https://api.search.brave.com/res/v1/{self.search_type}/search', + headers=headers, + params=params, + timeout=self.timeout) + except Exception as e: + return -1, str(e) + return response.status_code, response.json() \ No newline at end of file From d441612afe4127fea59bfc578f1f32691d55118a Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Wed, 14 Aug 2024 16:59:02 +0800 Subject: [PATCH 2/9] Update brave_search.py --- lagent/actions/brave_search.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lagent/actions/brave_search.py b/lagent/actions/brave_search.py index b9abaf10..7c9f8b8a 100644 --- a/lagent/actions/brave_search.py +++ b/lagent/actions/brave_search.py @@ -29,12 +29,6 @@ class BraveSearch(BaseAction): action's inputs and outputs. Defaults to :class:`JsonParser`. enable (bool): Whether the action is enabled. Defaults to ``True``. """ - # result_key_for_type = { - # 'news': 'news', - # 'places': 'places', - # 'images': 'images', - # 'search': 'organic', - # } def __init__(self, api_key: Optional[str] = None, @@ -141,4 +135,4 @@ def _search(self, timeout=self.timeout) except Exception as e: return -1, str(e) - return response.status_code, response.json() \ No newline at end of file + return response.status_code, response.json() From a2fd718ab50f4217163d513a57b7e676ac8a4d01 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:21:15 +0800 Subject: [PATCH 3/9] Update __init__.py --- lagent/actions/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lagent/actions/__init__.py b/lagent/actions/__init__.py index fdffebeb..c03b4e44 100644 --- a/lagent/actions/__init__.py +++ b/lagent/actions/__init__.py @@ -5,6 +5,7 @@ from .base_action import TOOL_REGISTRY, BaseAction, tool_api from .bing_browser import BingBrowser from .bing_map import BINGMap +from .brave_search import BraveSearch from .builtin_actions import FinishAction, InvalidAction, NoAction from .google_scholar_search import GoogleScholar from .google_search import GoogleSearch @@ -21,7 +22,7 @@ 'GoogleScholar', 'IPythonInterpreter', 'IPythonInteractive', 'IPythonInteractiveManager', 'PythonInterpreter', 'PPT', 'BaseParser', 'JsonParser', 'TupleParser', 'tool_api', 'list_tools', 'get_tool_cls', - 'get_tool', 'BingBrowser' + 'get_tool', 'BingBrowser','BraveSearch' ] From 246288c8bc8030dc119e76d38f623be5d543369e Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Thu, 15 Aug 2024 15:23:03 +0800 Subject: [PATCH 4/9] support brave search api and refractor google serper api --- lagent/actions/bing_browser.py | 156 +++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/lagent/actions/bing_browser.py b/lagent/actions/bing_browser.py index c5596f1a..f633a898 100755 --- a/lagent/actions/bing_browser.py +++ b/lagent/actions/bing_browser.py @@ -149,6 +149,162 @@ def _parse_response(self, response: dict) -> dict: return self._filter_results(raw_results) +class BraveSearch(BaseSearch): + def __init__(self, + api_key: str, + region: str = 'ALL', + language: str = 'zh-hans', + extra_snippests: bool = True, + topk: int = 3, + black_list: List[str] = [ + 'enoN', + 'youtube.com', + 'bilibili.com', + 'researchgate.net', + ], + **kwargs): + self.api_key = api_key + self.market = region + self.proxy = kwargs.get('proxy') + self.language = language + self.extra_snippests = extra_snippests + self.search_type = kwargs.get('search_type', 'web') + self.kwargs=kwargs + super().__init__(topk, black_list) + + @cached(cache=TTLCache(maxsize=100, ttl=600)) + def search(self, query: str, max_retry: int = 3) -> dict: + for attempt in range(max_retry): + try: + response = self._call_brave_api(query) + return self._parse_response(response) + except Exception as e: + logging.exception(str(e)) + warnings.warn( + f'Retry {attempt + 1}/{max_retry} due to error: {e}') + time.sleep(random.randint(2, 5)) + raise Exception( + 'Failed to get search results from Brave Search after retries.') + + def _call_brave_api(self, query: str) -> dict: + endpoint = f'https://api.search.brave.com/res/v1/{self.search_type}/search' + params = {'q': query, 'country':self.market, 'search_lang':self.language, 'extra_snippets':self.extra_snippests, 'count':self.topk, + **{ + key: value + for key, value in self.kwargs.items() if value is not None + }, + } + headers = {"X-Subscription-Token": self.api_key or '', "Accept": "application/json"} + response = requests.get( + endpoint, headers=headers, params=params, proxies=self.proxy) + response.raise_for_status() + return response.json() + + def _parse_response(self, response: dict) -> dict: + if self.search_type=="web": + filtered_result=response.get("web", {}).get("results", []) + else: + filtered_result=response.get("results", {}) + raw_results = [] + + for item in filtered_result: + raw_results.append(( + item.get("url",""), + " ".join( + filter( + None, [item.get("description"), *item.get("extra_snippets", [])] + ) + ), + item.get("title",""), + )) + return self._filter_results(raw_results) + +class GoogleSearch(BaseSearch): + + result_key_for_type = { + 'news': 'news', + 'places': 'places', + 'images': 'images', + 'search': 'organic', + } + + def __init__(self, + api_key: str, + topk: int = 3, + black_list: List[str] = [ + 'enoN', + 'youtube.com', + 'bilibili.com', + 'researchgate.net', + ], + **kwargs): + self.api_key = api_key + self.proxy = kwargs.get('proxy') + self.search_type = kwargs.get('search_type', 'search') + self.kwargs=kwargs + super().__init__(topk, black_list) + + @cached(cache=TTLCache(maxsize=100, ttl=600)) + def search(self, query: str, max_retry: int = 3) -> dict: + for attempt in range(max_retry): + try: + response = self._call_serper_api(query) + return self._parse_response(response) + except Exception as e: + logging.exception(str(e)) + warnings.warn( + f'Retry {attempt + 1}/{max_retry} due to error: {e}') + time.sleep(random.randint(2, 5)) + raise Exception( + 'Failed to get search results from Google Serper Search after retries.') + + def _call_serper_api(self, query: str) -> dict: + endpoint = f'https://google.serper.dev/{self.search_type}' + params = { + 'q': query, + 'num': self.topk, + **{ + key: value + for key, value in self.kwargs.items() if value is not None + }, + } + headers = {"X-API-KEY": self.api_key or '',"Content-Type": "application/json"} + response = requests.get( + endpoint, headers=headers, params=params, proxies=self.proxy) + response.raise_for_status() + return response.json() + + def _parse_response(self, response: dict) -> dict: + raw_results = [] + + if response.get('answerBox'): + answer_box = response.get('answerBox', {}) + if answer_box.get('answer'): + raw_results.append(("",answer_box.get('answer'),"")) + elif answer_box.get('snippet'): + raw_results.append(("",answer_box.get('snippet').replace('\n', ' '),"")) + elif answer_box.get('snippetHighlighted'): + raw_results.append(("",answer_box.get('snippetHighlighted'),"")) + + if response.get('knowledgeGraph'): + kg = response.get('knowledgeGraph', {}) + description = kg.get('description', '') + attributes = '. '.join(f'{attribute}: {value}' for attribute, value in kg.get('attributes', {}).items()) + raw_results.append((kg.get("descriptionLink", ""), + f"{description}. {attributes}" if attributes else description, + f"{kg.get('title', '')}: {kg.get('type', '')}.")) + + for result in response[self.result_key_for_type[ + self.search_type]][:self.topk]: + description = result.get('snippet', '') + attributes = '. '.join(f'{attribute}: {value}' for attribute, value in result.get('attributes', {}).items()) + raw_results.append((result.get('link', '') , + f'{description}. {attributes}' if attributes else description, + result.get('title', ''))) + + return self._filter_results(raw_results) + + class ContentFetcher: From b119a7440a102ed924970330d287f287ced48055 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Thu, 15 Aug 2024 15:29:24 +0800 Subject: [PATCH 5/9] pre-commit --- lagent/actions/bing_browser.py | 98 +++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 38 deletions(-) diff --git a/lagent/actions/bing_browser.py b/lagent/actions/bing_browser.py index f633a898..1de4a893 100755 --- a/lagent/actions/bing_browser.py +++ b/lagent/actions/bing_browser.py @@ -149,7 +149,9 @@ def _parse_response(self, response: dict) -> dict: return self._filter_results(raw_results) + class BraveSearch(BaseSearch): + def __init__(self, api_key: str, region: str = 'ALL', @@ -169,7 +171,7 @@ def __init__(self, self.language = language self.extra_snippests = extra_snippests self.search_type = kwargs.get('search_type', 'web') - self.kwargs=kwargs + self.kwargs = kwargs super().__init__(topk, black_list) @cached(cache=TTLCache(maxsize=100, ttl=600)) @@ -185,40 +187,49 @@ def search(self, query: str, max_retry: int = 3) -> dict: time.sleep(random.randint(2, 5)) raise Exception( 'Failed to get search results from Brave Search after retries.') - + def _call_brave_api(self, query: str) -> dict: endpoint = f'https://api.search.brave.com/res/v1/{self.search_type}/search' - params = {'q': query, 'country':self.market, 'search_lang':self.language, 'extra_snippets':self.extra_snippests, 'count':self.topk, - **{ + params = { + 'q': query, + 'country': self.market, + 'search_lang': self.language, + 'extra_snippets': self.extra_snippests, + 'count': self.topk, + **{ key: value for key, value in self.kwargs.items() if value is not None }, - } - headers = {"X-Subscription-Token": self.api_key or '', "Accept": "application/json"} + } + headers = { + 'X-Subscription-Token': self.api_key or '', + 'Accept': 'application/json' + } response = requests.get( endpoint, headers=headers, params=params, proxies=self.proxy) response.raise_for_status() return response.json() - + def _parse_response(self, response: dict) -> dict: - if self.search_type=="web": - filtered_result=response.get("web", {}).get("results", []) + if self.search_type == 'web': + filtered_result = response.get('web', {}).get('results', []) else: - filtered_result=response.get("results", {}) + filtered_result = response.get('results', {}) raw_results = [] for item in filtered_result: raw_results.append(( - item.get("url",""), - " ".join( - filter( - None, [item.get("description"), *item.get("extra_snippets", [])] - ) - ), - item.get("title",""), - )) + item.get('url', ''), + ' '.join( + filter(None, [ + item.get('description'), + *item.get('extra_snippets', []) + ])), + item.get('title', ''), + )) return self._filter_results(raw_results) - + + class GoogleSearch(BaseSearch): result_key_for_type = { @@ -241,9 +252,9 @@ def __init__(self, self.api_key = api_key self.proxy = kwargs.get('proxy') self.search_type = kwargs.get('search_type', 'search') - self.kwargs=kwargs + self.kwargs = kwargs super().__init__(topk, black_list) - + @cached(cache=TTLCache(maxsize=100, ttl=600)) def search(self, query: str, max_retry: int = 3) -> dict: for attempt in range(max_retry): @@ -256,8 +267,9 @@ def search(self, query: str, max_retry: int = 3) -> dict: f'Retry {attempt + 1}/{max_retry} due to error: {e}') time.sleep(random.randint(2, 5)) raise Exception( - 'Failed to get search results from Google Serper Search after retries.') - + 'Failed to get search results from Google Serper Search after retries.' + ) + def _call_serper_api(self, query: str) -> dict: endpoint = f'https://google.serper.dev/{self.search_type}' params = { @@ -268,7 +280,10 @@ def _call_serper_api(self, query: str) -> dict: for key, value in self.kwargs.items() if value is not None }, } - headers = {"X-API-KEY": self.api_key or '',"Content-Type": "application/json"} + headers = { + 'X-API-KEY': self.api_key or '', + 'Content-Type': 'application/json' + } response = requests.get( endpoint, headers=headers, params=params, proxies=self.proxy) response.raise_for_status() @@ -280,30 +295,37 @@ def _parse_response(self, response: dict) -> dict: if response.get('answerBox'): answer_box = response.get('answerBox', {}) if answer_box.get('answer'): - raw_results.append(("",answer_box.get('answer'),"")) + raw_results.append(('', answer_box.get('answer'), '')) elif answer_box.get('snippet'): - raw_results.append(("",answer_box.get('snippet').replace('\n', ' '),"")) + raw_results.append( + ('', answer_box.get('snippet').replace('\n', ' '), '')) elif answer_box.get('snippetHighlighted'): - raw_results.append(("",answer_box.get('snippetHighlighted'),"")) + raw_results.append( + ('', answer_box.get('snippetHighlighted'), '')) if response.get('knowledgeGraph'): kg = response.get('knowledgeGraph', {}) description = kg.get('description', '') - attributes = '. '.join(f'{attribute}: {value}' for attribute, value in kg.get('attributes', {}).items()) - raw_results.append((kg.get("descriptionLink", ""), - f"{description}. {attributes}" if attributes else description, - f"{kg.get('title', '')}: {kg.get('type', '')}.")) - + attributes = '. '.join( + f'{attribute}: {value}' + for attribute, value in kg.get('attributes', {}).items()) + raw_results.append( + (kg.get('descriptionLink', ''), + f'{description}. {attributes}' if attributes else description, + f"{kg.get('title', '')}: {kg.get('type', '')}.")) + for result in response[self.result_key_for_type[ self.search_type]][:self.topk]: description = result.get('snippet', '') - attributes = '. '.join(f'{attribute}: {value}' for attribute, value in result.get('attributes', {}).items()) - raw_results.append((result.get('link', '') , - f'{description}. {attributes}' if attributes else description, - result.get('title', ''))) - + attributes = '. '.join( + f'{attribute}: {value}' + for attribute, value in result.get('attributes', {}).items()) + raw_results.append( + (result.get('link', ''), + f'{description}. {attributes}' if attributes else description, + result.get('title', ''))) + return self._filter_results(raw_results) - class ContentFetcher: From 36ee5d329890ee5e9de17b1a43dd50cd7b05e7d6 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Thu, 15 Aug 2024 15:52:06 +0800 Subject: [PATCH 6/9] Delete lagent/actions/brave_search.py --- lagent/actions/brave_search.py | 138 --------------------------------- 1 file changed, 138 deletions(-) delete mode 100644 lagent/actions/brave_search.py diff --git a/lagent/actions/brave_search.py b/lagent/actions/brave_search.py deleted file mode 100644 index 7c9f8b8a..00000000 --- a/lagent/actions/brave_search.py +++ /dev/null @@ -1,138 +0,0 @@ -import os -from typing import List, Optional, Tuple, Type, Union - -import requests - -from lagent.schema import ActionReturn, ActionStatusCode -from .base_action import BaseAction, tool_api -from .parser import BaseParser, JsonParser - - -class BraveSearch(BaseAction): - """Wrapper around the Brave Search API. - - To use, you should pass your Brave API key to the constructor. - - Code is modified from lang-chain BraveSearchWrapper - (https://github.com/daver987/langchain/blob/c5016e2 - b0b4878b0a920e809a5169d80b409288b/libs/community/ - langchain_community/utilities/brave_search.py) - - Args: - api_key (str): API KEY to use brave search API, - You can create a free API key at https://brave.com/search/api/. - timeout (int): Upper bound of waiting time for a brave request. - search_type (str): Brave API support ['web', 'images', 'news', - 'videos'] types of search. - description (dict): The description of the action. Defaults to ``None``. - parser (Type[BaseParser]): The parser class to process the - action's inputs and outputs. Defaults to :class:`JsonParser`. - enable (bool): Whether the action is enabled. Defaults to ``True``. - """ - - def __init__(self, - api_key: Optional[str] = None, - timeout: int = 5, - search_type: str = 'web', - description: Optional[dict] = None, - parser: Type[BaseParser] = JsonParser, - enable: bool = True): - super().__init__(description, parser, enable) - api_key = os.environ.get('BRAVE_API_KEY', api_key) - if api_key is None: - raise ValueError( - 'Please set BRAVE API key either in the environment ' - 'as BRAVE_API_KEY or pass it as `api_key` parameter.') - self.api_key = api_key - self.timeout = timeout - self.search_type = search_type - - @tool_api - def run(self, query: str, k: int = 10) -> ActionReturn: - """一个可以从BRAVE浏览器搜索结果的API。当你需要对于一个特定问题找到简短明了的回答时,可以使用它。输入应该是一个搜索查询。 - - Args: - query (str): the search content - k (int): select first k results in the search results as response - """ - tool_return = ActionReturn(type=self.name) - status_code, response = self._search(query, count=k) - # convert search results to ToolReturn format - if status_code == -1: - tool_return.errmsg = response - tool_return.state = ActionStatusCode.HTTP_ERROR - elif status_code == 200: - parsed_res = self._parse_results(response) - tool_return.result = [dict(type='text', content=str(parsed_res))] - tool_return.state = ActionStatusCode.SUCCESS - else: - tool_return.errmsg = str(status_code) - tool_return.state = ActionStatusCode.API_ERROR - return tool_return - - def _parse_results(self, results: dict) -> Union[str, List[str]]: - """Parse the search results from Brave API. - - Args: - results (dict): The search content from Brave API - in json format. - - Returns: - List[str]: The parsed search results. - """ - - if self.search_type=="web": - filtered_result=results.get("web", {}).get("results", []) - else: - filtered_result=results.get("results", {}) - - snippets = [ - { - "title": item.get("title"), - "snippets": " ".join( - filter( - None, [item.get("description"), *item.get("extra_snippets", [])] - ) - ), - } - for item in filtered_result - ] - - if len(snippets) == 0: - return ['No good Brave Search Result was found'] - return snippets - - def _search(self, - search_term: str, - **kwargs) -> Tuple[int, Union[dict, str]]: - """HTTP requests to Brave API. - - Args: - search_term (str): The search query. - - Returns: - tuple: the return value is a tuple contains: - - status_code (int): HTTP status code from Serper API. - - response (dict): response context with json format. - """ - headers = { - "X-Subscription-Token": self.api_key or '', - "Accept": "application/json", - } - - params = { - 'q': search_term, - **{ - key: value - for key, value in kwargs.items() if value is not None - }, - } - try: - response = requests.get( - f'https://api.search.brave.com/res/v1/{self.search_type}/search', - headers=headers, - params=params, - timeout=self.timeout) - except Exception as e: - return -1, str(e) - return response.status_code, response.json() From 121923f426cd9fe9adddea7a4a8daebc6202fed5 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Thu, 15 Aug 2024 16:01:39 +0800 Subject: [PATCH 7/9] Update __init__.py --- lagent/actions/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lagent/actions/__init__.py b/lagent/actions/__init__.py index c03b4e44..fdffebeb 100644 --- a/lagent/actions/__init__.py +++ b/lagent/actions/__init__.py @@ -5,7 +5,6 @@ from .base_action import TOOL_REGISTRY, BaseAction, tool_api from .bing_browser import BingBrowser from .bing_map import BINGMap -from .brave_search import BraveSearch from .builtin_actions import FinishAction, InvalidAction, NoAction from .google_scholar_search import GoogleScholar from .google_search import GoogleSearch @@ -22,7 +21,7 @@ 'GoogleScholar', 'IPythonInterpreter', 'IPythonInteractive', 'IPythonInteractiveManager', 'PythonInterpreter', 'PPT', 'BaseParser', 'JsonParser', 'TupleParser', 'tool_api', 'list_tools', 'get_tool_cls', - 'get_tool', 'BingBrowser','BraveSearch' + 'get_tool', 'BingBrowser' ] From 6cb08e7343a2740f584a2ac0992707f158d9f107 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Mon, 19 Aug 2024 15:00:12 +0800 Subject: [PATCH 8/9] add docstring --- lagent/actions/bing_browser.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/lagent/actions/bing_browser.py b/lagent/actions/bing_browser.py index 1de4a893..0cb032a9 100755 --- a/lagent/actions/bing_browser.py +++ b/lagent/actions/bing_browser.py @@ -151,6 +151,23 @@ def _parse_response(self, response: dict) -> dict: class BraveSearch(BaseSearch): + """ + Wrapper around the Brave Search API. + + To use, you should pass your Brave Search API key to the constructor. + + Args: + api_key (str): API KEY to use Brave Search API. + You can create a free API key at https://api.search.brave.com/app/keys. + search_type (str): Brave Search API supports ['web', 'news', 'images', 'videos'], + currently only supports 'news' and 'web'. + topk (int): The number of search results returned in response from API search results. + region (str): The country code string. Specifies the country where the search results come from. + language (str): The language code string. Specifies the preferred language for the search results. + extra_snippets (bool): Allows retrieving up to 5 additional snippets, which are alternative excerpts from the search results. + **kwargs: Any other parameters related to the Brave Search API. Find more details at + https://api.search.brave.com/app/documentation/web-search/get-started. + """ def __init__(self, api_key: str, @@ -231,6 +248,20 @@ def _parse_response(self, response: dict) -> dict: class GoogleSearch(BaseSearch): + """ + Wrapper around the Serper.dev Google Search API. + + To use, you should pass your serper API key to the constructor. + + Args: + api_key (str): API KEY to use serper google search API. + You can create a free API key at https://serper.dev. + search_type (str): Serper API supports ['search', 'images', 'news', + 'places'] types of search, currently we only support 'search' and 'news'. + topk (int): The number of search results returned in response from api search results. + **kwargs: Any other parameters related to the Serper API. Find more details at + https://serper.dev/playground + """ result_key_for_type = { 'news': 'news', From aa5edd0e9afa67ef34988348f2614c2c47b06e49 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Mon, 26 Aug 2024 19:01:10 +0800 Subject: [PATCH 9/9] support BoCha web search api --- lagent/actions/bing_browser.py | 98 ++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 4 deletions(-) diff --git a/lagent/actions/bing_browser.py b/lagent/actions/bing_browser.py index 0cb032a9..db6dbec3 100755 --- a/lagent/actions/bing_browser.py +++ b/lagent/actions/bing_browser.py @@ -52,7 +52,7 @@ def __init__(self, ], **kwargs): self.proxy = kwargs.get('proxy') - self.timeout = kwargs.get('timeout', 10) + self.timeout = kwargs.get('timeout', 30) super().__init__(topk, black_list) @cached(cache=TTLCache(maxsize=100, ttl=600)) @@ -70,12 +70,26 @@ def search(self, query: str, max_retry: int = 3) -> dict: raise Exception( 'Failed to get search results from DuckDuckGo after retries.') + async def _async_call_ddgs(self, query: str, **kwargs) -> dict: + ddgs = DDGS(**kwargs) + try: + response = await asyncio.wait_for( + asyncio.to_thread(ddgs.text, query.strip("'"), max_results=10), + timeout=self.timeout) + return response + except asyncio.TimeoutError: + logging.exception('Request to DDGS timed out.') + raise + def _call_ddgs(self, query: str, **kwargs) -> dict: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) - ddgs = DDGS(**kwargs) - response = ddgs.text(query.strip("'"), max_results=10) - return response + try: + response = loop.run_until_complete( + self._async_call_ddgs(query, **kwargs)) + return response + finally: + loop.close() def _parse_response(self, response: dict) -> dict: raw_results = [] @@ -359,6 +373,82 @@ def _parse_response(self, response: dict) -> dict: return self._filter_results(raw_results) +class BoChaSearch(BaseSearch): + """ + Wrapper around the BoCha Web Search API. + + To use, you should pass your BoCha API key to the constructor. + + Args: + api_key (str): API KEY to use BoCha web search API. + You can create a API key at https://bochaai.com/. + summary (bool): Indicates whether the content of the website should be summarized LLM. + If True, the summary will be retrieved as part of the result from the web search API. + topk (int): The number of search results returned in response from api search results. + **kwargs: Any other parameters related to the BoCha API. Find more details at + https://bochaai.com/ + """ + + def __init__(self, + api_key: str, + topk: int = 3, + black_list: List[str] = [ + 'enoN', + 'youtube.com', + 'bilibili.com', + 'researchgate.net', + ], + **kwargs): + self.api_key = api_key + self.summary = True + self.proxy = kwargs.get('proxy') + self.kwargs = kwargs + super().__init__(topk, black_list) + + @cached(cache=TTLCache(maxsize=100, ttl=600)) + def search(self, query: str, max_retry: int = 3) -> dict: + for attempt in range(max_retry): + try: + response = self._call_bocha_api(query) + return self._parse_response(response) + except Exception as e: + logging.exception(str(e)) + warnings.warn( + f'Retry {attempt + 1}/{max_retry} due to error: {e}') + time.sleep(random.randint(2, 5)) + raise Exception( + 'Failed to get search results from BoCha Search after retries.') + + def _call_bocha_api(self, query: str) -> dict: + endpoint = 'https://api.bochaai.com/v1/web-search' + params = json.dumps({ + 'query': query, + 'count': self.topk, + 'summary': self.summary, + **{ + key: value + for key, value in self.kwargs.items() if value is not None + }, + }) + headers = { + 'Authorization': f'Bearer {self.api_key}', + 'Content-Type': 'application/json' + } + response = requests.request( + 'POST', endpoint, headers=headers, data=params) + response.raise_for_status() + return response.json() + + def _parse_response(self, response: dict) -> dict: + raw_results = [(w.get('url', + ''), w.get('snippet', '') + w.get('summary', ''), + w.get('name', + '')) for w in response.get('data', {}).get( + 'webPages', {}).get('value', [])] + + return self._filter_results(raw_results) + + class ContentFetcher: def __init__(self, timeout: int = 5):