deedy5 · scarletkc · Nov 17, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/.gitignore b/.gitignore
@@ -137,7 +137,7 @@ celerybeat.pid
 # Environments
 .env
 .envrc
-.venv
+.venv*
 env/
 venv/
 ENV/

diff --git a/README.md b/README.md
@@ -5,21 +5,23 @@ A metasearch library that aggregates results from diverse web search services.
 
 
 ## Table of Contents
-* [Install](#install)
-* [CLI version](#cli-version)
-* [DDGS search operators](#ddgs-search-operators)
-* [Regions](#regions)
-* [Engines](#engines)
-* [Tips](#tips)
-* [DDGS class](#ddgs-class)
-* [Proxy](#proxy)
-* [Exceptions](#exceptions)
-* [1. text()](#1-text)
-* [2. images()](#2-images)
-* [3. videos()](#3-videos)
-* [4. news()](#4-news)
-* [5. books()](#5-books)
-* [Disclaimer](#disclaimer)
+- [DDGS | Dux Distributed Global Search](#ddgs--dux-distributed-global-search)
+  - [Table of Contents](#table-of-contents)
+  - [Install](#install)
+  - [CLI version](#cli-version)
+  - [DDGS search operators](#ddgs-search-operators)
+  - [Regions](#regions)
+  - [Engines](#engines)
+  - [Tips](#tips)
+  - [DDGS class](#ddgs-class)
+  - [Proxy](#proxy)
+  - [Exceptions](#exceptions)
+  - [1. text()](#1-text)
+  - [2. images()](#2-images)
+  - [3. videos()](#3-videos)
+  - [4. news()](#4-news)
+  - [5. books()](#5-books)
+  - [Disclaimer](#disclaimer)
 
 ## Install
 ```python
@@ -156,7 +158,7 @@ ___
 
 | DDGS function | Available backends |
 | --------------|:-------------------|
-| text()        | `bing`, `brave`, `duckduckgo`, `google`, `mojeek`, `yandex`, `yahoo`, `wikipedia`|
+| text()        | `bing`, `brave`, `duckduckgo`, `google`, `mojeek`, `sogou`, `yandex`, `yahoo`, `wikipedia`|
 | images()      | `duckduckgo` |
 | videos()      | `duckduckgo` |
 | news()        | `bing`, `duckduckgo`, `yahoo` |

diff --git a/ddgs/cli.py b/ddgs/cli.py
@@ -190,6 +190,7 @@ def version() -> str:
             "duckduckgo",
             "google",
             "mojeek",
+            "sogou",
             "yandex",
             "yahoo",
             "wikipedia",

diff --git a/ddgs/engines/sogou.py b/ddgs/engines/sogou.py
@@ -0,0 +1,89 @@
+"""Sogou search engine implementation."""
+
+from __future__ import annotations
+
+import logging
+import re
+from typing import TYPE_CHECKING, Any, ClassVar
+from urllib.parse import urljoin
+
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+
+from ddgs.base import BaseSearchEngine
+from ddgs.results import TextResult
+
+logger = logging.getLogger(__name__)
+
+
+class Sogou(BaseSearchEngine[TextResult]):
+    """Sogou search engine."""
+
+    name = "sogou"
+    category = "text"
+    provider = "sogou"
+
+    search_url = "https://www.sogou.com/web"
+    search_method = "GET"
+
+    items_xpath = "//div[contains(@class, 'vrwrap') and not(contains(@class, 'hint'))]"
+    elements_xpath: ClassVar[Mapping[str, str]] = {
+        "title": ".//h3//a//text()",
+        "href": ".//h3//a/@href",
+        "body": ".//div[contains(@class, 'space-txt')]//text()",
+    }
+
+    _redirect_pattern = re.compile(r"window\.location\.replace\([\"'](?P<url>[^\"']+)[\"']\)")
+    _meta_refresh_pattern = re.compile(r"URL='?(?P<url>[^'\"]+)", re.IGNORECASE)
+
+    def __init__(self, proxy: str | None = None, timeout: int | None = None, *, verify: bool | str = True) -> None:
+        super().__init__(proxy=proxy, timeout=timeout, verify=verify)
+        self._href_cache: dict[str, str] = {}
+
+    def build_payload(
+        self,
+        query: str,
+        region: str,  # noqa: ARG002
+        safesearch: str,  # noqa: ARG002
+        timelimit: str | None,
+        page: int = 1,
+        **kwargs: str,  # noqa: ARG002
+    ) -> dict[str, Any]:
+        """Build a payload for the search request."""
+        payload = {"query": query, "ie": "utf8", "p": "40040100", "dp": "1"}
+        if timelimit:
+            payload["tsn"] = {"d": "1", "w": "7", "m": "30", "y": "365"}[timelimit]
+        if page > 1:
+            payload["page"] = str(page)
+        return payload
+
+    def post_extract_results(self, results: list[TextResult]) -> list[TextResult]:
+        """Post-process search results."""
+        post_results = []
+        for result in results:
+            if result.href and result.title:
+                result.href = self._normalize_href(result.href)
+                post_results.append(result)
+        return post_results
+
+    def _normalize_href(self, href: str) -> str:
+        """Normalize Sogou link to an absolute URL and resolve redirects when possible."""
+        href = urljoin(self.search_url, href)
+        if "sogou.com/link?url=" not in href:
+            return href
+
+        if href in self._href_cache:
+            return self._href_cache[href]
+
+        resolved = href
+        try:
+            resp = self.http_client.request("GET", href)
+        except Exception as exc:  # noqa: BLE001
+            logger.debug("Error resolving Sogou link %s: %r", href, exc)
+        else:
+            if resp.status_code == 200 and resp.text:
+                match = self._redirect_pattern.search(resp.text) or self._meta_refresh_pattern.search(resp.text)
+                if match:
+                    resolved = match.group("url")
+        self._href_cache[href] = resolved
+        return resolved
-Original file line number
+Diff line change
@@ Expand Up / @@ -137,7 +137,7 @@ celerybeat.pid @@
     # Environments
     .env
     .envrc
-    .venv
+    .venv*
     env/
     venv/
     ENV/
@@ Expand Down @@