PyThaiNLP · Copilot · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -143,6 +143,8 @@ wtp = ["transformers>=4.22.1", "wtpsplit>=1.0.1"]
 
 wunsen = ["wunsen>=0.0.3"]
 
+qwen3 = ["torch>=1.0.0", "transformers>=4.22.1"]
+
 # Compact dependencies - safe small set of optional dependencies
 compact = [
     "nlpo3>=1.3.1",

diff --git a/pythainlp/lm/__init__.py b/pythainlp/lm/__init__.py
@@ -2,9 +2,21 @@
 # SPDX-FileType: SOURCE
 # SPDX-License-Identifier: Apache-2.0
 
-__all__ = ["calculate_ngram_counts", "remove_repeated_ngrams"]
+__all__ = ["calculate_ngram_counts", "remove_repeated_ngrams", "Qwen3"]
 
 from pythainlp.lm.text_util import (
     calculate_ngram_counts,
     remove_repeated_ngrams,
 )
+
+try:
+    from pythainlp.lm.qwen3 import Qwen3
+except ImportError:
+    # If dependencies are not installed, make Qwen3 available but raise
+    # error when instantiated
+    class Qwen3:  # type: ignore
+        def __init__(self):
+            raise ImportError(
+                "Qwen3 requires additional dependencies. "
+                "Install with: pip install pythainlp[qwen3]"
+            )
diff --git a/pythainlp/lm/qwen3.py b/pythainlp/lm/qwen3.py
@@ -0,0 +1,197 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+from typing import Any
+
+import torch
+
+
+class Qwen3:
+    """Qwen3-0.6B language model for Thai text generation.
+
+    A small but capable language model from Alibaba Cloud's Qwen family,
+    optimized for various NLP tasks including Thai language processing.
+    """
+
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+        self.device = None
+        self.torch_dtype = None
+        self.model_path = None
+
+    def load_model(
+        self,
+        model_path: str = "Qwen/Qwen3-0.6B",
+        device: str = "cuda",
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage: bool = True,
+    ):
+        """Load Qwen3 model.
+
+        :param str model_path: model path or HuggingFace model ID
+        :param str device: device (cpu, cuda or other)
+        :param torch_dtype: torch data type (e.g., torch.float16, torch.bfloat16)
+        :param bool low_cpu_mem_usage: low cpu mem usage
+
+        :Example:
+        ::
+
+            from pythainlp.lm import Qwen3
+            import torch
+
+            model = Qwen3()
+            model.load_model(device="cpu", torch_dtype=torch.bfloat16)
+        """
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+
+        self.device = device
+        self.torch_dtype = torch_dtype
+        self.model_path = model_path
+
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_path,
+            torch_dtype=torch_dtype,
+            low_cpu_mem_usage=low_cpu_mem_usage,
+        )
+        self.model.to(device)
+
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            self.model_path,
-            torch_dtype=torch_dtype,
-            low_cpu_mem_usage=low_cpu_mem_usage,
-        )
-        self.model.to(device)
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
+        except OSError as exc:
+            raise RuntimeError(
+                f"Failed to load tokenizer from '{self.model_path}'. "
+                "Check the model path or your network connection."
+            ) from exc
+
+        try:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_path,
+                torch_dtype=torch_dtype,
+                low_cpu_mem_usage=low_cpu_mem_usage,
+            )
+        except OSError as exc:
+            raise RuntimeError(
+                f"Failed to load model from '{self.model_path}'. "
+                "This can happen due to an invalid model path, missing files, "
+                "or insufficient disk space."
+            ) from exc
+        except RuntimeError as exc:
+            raise RuntimeError(
+                "Failed to load model weights. "
+                "This can be caused by insufficient memory or an incompatible "
+                "torch_dtype setting."
+            ) from exc
+
+        if isinstance(device, str) and device.startswith("cuda"):
+            if not torch.cuda.is_available():
+                raise RuntimeError(
+                    "CUDA device requested but CUDA is not available. "
+                    "Check your PyTorch installation and GPU drivers, or use "
+                    "device='cpu' instead."
+                )
+
+        try:
+            self.model.to(device)
+        except RuntimeError as exc:
+            raise RuntimeError(
+                f"Failed to move model to device '{device}'. "
+                "Ensure the device exists and has enough memory, and that your "
+                "PyTorch installation supports this device."
+            ) from exc
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            self.model_path,
-            torch_dtype=torch_dtype,
-            low_cpu_mem_usage=low_cpu_mem_usage,
-        )
-        self.model.to(device)
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
+        except OSError as exc:
+            raise RuntimeError(
+                f"Failed to load tokenizer from '{self.model_path}'. "
+                "Check the model path or your network connection."
+            ) from exc
+
+        try:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_path,
+                torch_dtype=torch_dtype,
+                low_cpu_mem_usage=low_cpu_mem_usage,
+            )
+        except OSError as exc:
+            raise RuntimeError(
+                f"Failed to load model from '{self.model_path}'. "
+                "This can happen due to an invalid model path, missing files, "
+                "or insufficient disk space."
+            ) from exc
+        except RuntimeError as exc:
+            raise RuntimeError(
+                "Failed to load model weights. "
+                "This can be caused by insufficient memory or an incompatible "
+                "torch_dtype setting."
+            ) from exc
+
+        if isinstance(device, str) and device.startswith("cuda"):
+            if not torch.cuda.is_available():
+                raise RuntimeError(
+                    "CUDA device requested but CUDA is not available. "
+                    "Check your PyTorch installation and GPU drivers, or use "
+                    "device='cpu' instead."
+                )
+
+        try:
+            self.model.to(device)
+        except RuntimeError as exc:
+            raise RuntimeError(
+                f"Failed to move model to device '{device}'. "
+                "Ensure the device exists and has enough memory, and that your "
+                "PyTorch installation supports this device."
+            ) from exc
+    def generate(
+        self,
+        text: str,
+        max_new_tokens: int = 512,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        top_k: int = 50,
+        do_sample: bool = True,
+        skip_special_tokens: bool = True,
+    ) -> str:
+        """Generate text from a prompt.
+
+        :param str text: input text prompt
+        :param int max_new_tokens: maximum number of new tokens to generate
+        :param float temperature: temperature for sampling (higher = more random)
+        :param float top_p: top p for nucleus sampling
+        :param int top_k: top k for top-k sampling
+        :param bool do_sample: whether to use sampling or greedy decoding
+        :param bool skip_special_tokens: skip special tokens in output
+        :return: generated text
+        :rtype: str
+
+        :Example:
+        ::
+
+            from pythainlp.lm import Qwen3
+            import torch
+
+            model = Qwen3()
+            model.load_model(device="cpu", torch_dtype=torch.bfloat16)
+
+            result = model.generate("สวัสดี")
+            print(result)
+        """
+        if self.model is None or self.tokenizer is None or self.device is None:
+            raise RuntimeError(
+                "Model not loaded. Please call load_model() first."
+            )
+
+        inputs = self.tokenizer(text, return_tensors="pt")
+        input_ids = inputs["input_ids"].to(self.device)
+
+        with torch.inference_mode():
+            output_ids = self.model.generate(
+                input_ids,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                do_sample=do_sample,
+            )
+
+        # Decode only the newly generated tokens
+        generated_text = self.tokenizer.decode(
+            output_ids[0][len(input_ids[0]) :],
-        # Decode only the newly generated tokens
-        generated_text = self.tokenizer.decode(
-            output_ids[0][len(input_ids[0]) :],
+        # Decode only the newly generated tokens.
+        if (
+            output_ids.dim() == 2
+            and input_ids.dim() == 2
+            and output_ids.size(0) > 0
+            and input_ids.size(0) > 0
+        ):
+            start_idx = input_ids.size(1)
+            generated_ids = output_ids[0, start_idx:]
+        else:
+            raise RuntimeError(
+                "Unexpected tensor shape from model.generate(); "
+                "expected 2D tensors with non-empty batch dimension."
+            )
+
+        generated_text = self.tokenizer.decode(
+            generated_ids,
-        # Decode only the newly generated tokens
-        generated_text = self.tokenizer.decode(
-            output_ids[0][len(input_ids[0]) :],
+        # Decode only the newly generated tokens.
+        if (
+            output_ids.dim() == 2
+            and input_ids.dim() == 2
+            and output_ids.size(0) > 0
+            and input_ids.size(0) > 0
+        ):
+            start_idx = input_ids.size(1)
+            generated_ids = output_ids[0, start_idx:]
+        else:
+            raise RuntimeError(
+                "Unexpected tensor shape from model.generate(); "
+                "expected 2D tensors with non-empty batch dimension."
+            )
+
+        generated_text = self.tokenizer.decode(
+            generated_ids,
+            skip_special_tokens=skip_special_tokens,
+        )
+
+        return generated_text
+
+    def chat(
+        self,
+        messages: list[dict[str, Any]],
+        max_new_tokens: int = 512,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        top_k: int = 50,
+        do_sample: bool = True,
+        skip_special_tokens: bool = True,
+    ) -> str:
+        """Generate text using chat format.
+
+        :param list[dict[str, Any]] messages: list of message dictionaries with 'role' and 'content' keys
+        :param int max_new_tokens: maximum number of new tokens to generate
+        :param float temperature: temperature for sampling
+        :param float top_p: top p for nucleus sampling
+        :param int top_k: top k for top-k sampling
+        :param bool do_sample: whether to use sampling
+        :param bool skip_special_tokens: skip special tokens in output
+        :return: generated response
+        :rtype: str
+
+        :Example:
+        ::
+
+            from pythainlp.lm import Qwen3
+            import torch
+
+            model = Qwen3()
+            model.load_model(device="cpu", torch_dtype=torch.bfloat16)
+
+            messages = [{"role": "user", "content": "สวัสดีครับ"}]
+            response = model.chat(messages)
+            print(response)
+        """
+        if self.model is None or self.tokenizer is None or self.device is None:
+            raise RuntimeError(
+                "Model not loaded. Please call load_model() first."
+            )
+
+        # Apply chat template if available, otherwise format manually
+        if hasattr(self.tokenizer, "apply_chat_template"):
+            text = self.tokenizer.apply_chat_template(
+                messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+        else:
+            # Simple fallback format
+            text = ""
+            for msg in messages:
+                role = msg.get("role", "user")
+                content = msg.get("content", "")
-                role = msg.get("role", "user")
-                content = msg.get("content", "")
+                role = str(msg.get("role", "user")).replace("\n", " ")
+                content = str(msg.get("content", "")).replace("\n", "\\n")
-                role = msg.get("role", "user")
-                content = msg.get("content", "")
+                role = str(msg.get("role", "user")).replace("\n", " ")
+                content = str(msg.get("content", "")).replace("\n", "\\n")
+                text += f"{role}: {content}\n"
+            text += "assistant: "
+
+        inputs = self.tokenizer(text, return_tensors="pt")
+        input_ids = inputs["input_ids"].to(self.device)
+
+        with torch.inference_mode():
+            output_ids = self.model.generate(
+                input_ids,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                do_sample=do_sample,
+            )
+
+        # Decode only the newly generated tokens
+        generated_text = self.tokenizer.decode(
+            output_ids[0][len(input_ids[0]) :],
+            skip_special_tokens=skip_special_tokens,
+        )
+
+        return generated_text
diff --git a/tests/extra/testx_lm.py b/tests/extra/testx_lm.py
@@ -0,0 +1,40 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+from pythainlp.lm import Qwen3
+
+
+class LMTestCaseX(unittest.TestCase):
+    def test_qwen3_initialization(self):
+        # Test that Qwen3 can be instantiated
+        try:
+            model = Qwen3()
+            self.assertIsNotNone(model)
+            self.assertIsNone(model.model)
+            self.assertIsNone(model.tokenizer)
+        except ImportError:
+            # Skip if dependencies not installed
+            self.skipTest("Qwen3 dependencies not installed")
+
+    def test_qwen3_generate_without_load(self):
+        # Test that generate raises error when model is not loaded
+        try:
+            model = Qwen3()
+            with self.assertRaises(RuntimeError):
+                model.generate("test")
+        except ImportError:
+            # Skip if dependencies not installed
+            self.skipTest("Qwen3 dependencies not installed")
+
+    def test_qwen3_chat_without_load(self):
+        # Test that chat raises error when model is not loaded
+        try:
+            model = Qwen3()
+            with self.assertRaises(RuntimeError):
+                model.chat([{"role": "user", "content": "test"}])
+        except ImportError:
+            # Skip if dependencies not installed
+            self.skipTest("Qwen3 dependencies not installed")