diff --git a/apps/pre-processing-service/app/api/endpoints/product.py b/apps/pre-processing-service/app/api/endpoints/product.py
index 0e5f6e79..4e8c6682 100644
--- a/apps/pre-processing-service/app/api/endpoints/product.py
+++ b/apps/pre-processing-service/app/api/endpoints/product.py
@@ -18,7 +18,7 @@ async def search(request: RequestSadaguSearch):
     """
     상품 검색 엔드포인트
     """
-    return search_products(request)
+    return await search_products(request)
 
 @router.post("/match", response_model=ResponseSadaguMatch)
 async def match(request: RequestSadaguMatch):
diff --git a/apps/pre-processing-service/app/core/config.py b/apps/pre-processing-service/app/core/config.py
index 536e3ddc..a35f048b 100644
--- a/apps/pre-processing-service/app/core/config.py
+++ b/apps/pre-processing-service/app/core/config.py
@@ -1,9 +1,65 @@
 # pydantic_settings에서 SettingsConfigDict를 추가로 import 합니다.
 from pydantic_settings import BaseSettings, SettingsConfigDict
 import os
+import platform
+import subprocess
 from typing import Optional
 
 
+def detect_mecab_dicdir() -> Optional[str]:
+    """MeCab 사전 경로 자동 감지"""
+
+    # 1. mecab-config 명령어로 사전 경로 확인 (가장 정확한 방법)
+    try:
+        result = subprocess.run(['mecab-config', '--dicdir'],
+                                capture_output=True, text=True, timeout=5)
+        if result.returncode == 0:
+            dicdir = result.stdout.strip()
+            if os.path.exists(dicdir):
+                print(f"mecab-config에서 사전 경로 발견: {dicdir}")
+                return dicdir
+    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
+        pass
+
+    # 2. 플랫폼별 일반적인 경로들 확인
+    system = platform.system().lower()
+
+    if system == "darwin":  # macOS
+        candidate_paths = [
+            "/opt/homebrew/lib/mecab/dic/mecab-ko-dic",  # Apple Silicon
+            "/usr/local/lib/mecab/dic/mecab-ko-dic",  # Intel Mac
+            "/opt/homebrew/lib/mecab/dic/mecab-ipadic",  # 기본 사전
+            "/usr/local/lib/mecab/dic/mecab-ipadic"
+        ]
+    elif system == "linux":
+        candidate_paths = [
+            "/usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ko-dic",
+            "/usr/lib/mecab/dic/mecab-ko-dic",
+            "/usr/local/lib/mecab/dic/mecab-ko-dic",
+            "/usr/share/mecab/dic/mecab-ko-dic",
+            "/usr/lib/mecab/dic/mecab-ipadic",
+            "/usr/local/lib/mecab/dic/mecab-ipadic"
+        ]
+    elif system == "windows":
+        candidate_paths = [
+            "C:/Program Files/MeCab/dic/mecab-ko-dic",
+            "C:/mecab/dic/mecab-ko-dic",
+            "C:/Program Files/MeCab/dic/mecab-ipadic"
+        ]
+    else:
+        candidate_paths = []
+
+    # 경로 존재 여부 확인
+    for path in candidate_paths:
+        if os.path.exists(path):
+            # dicrc 파일 존재 확인 (실제 사전인지 검증)
+            dicrc_path = os.path.join(path, "dicrc")
+            if os.path.exists(dicrc_path):
+                print(f"플랫폼 기본 경로에서 사전 발견: {path}")
+                return path
+
+    return None
+
 # 공통 설정을 위한 BaseSettings
 class BaseSettingsConfig(BaseSettings):
 
@@ -13,7 +69,19 @@ class BaseSettingsConfig(BaseSettings):
     db_user: str
     db_pass: str
     db_name: str
-    env_name: str = "dev"
+    env_name: str = ".dev"
+
+    # MeCab 사전 경로 (자동 감지)
+    mecab_path: Optional[str] = None
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        # mecab_path가 설정되지 않았으면 자동 감지
+        if not self.mecab_path:
+            self.mecab_path = detect_mecab_dicdir()
+            if not self.mecab_path:
+                print("MeCab 사전 경로를 찾을 수 없어 기본 설정으로 실행합니다.")
 
     @property
     def db_url(self) -> str:
@@ -25,11 +93,11 @@ def db_url(self) -> str:
 
 # 환경별 설정 클래스
 class DevSettings(BaseSettingsConfig):
-    model_config = SettingsConfigDict(env_file=['.env', 'dev.env'])
+    model_config = SettingsConfigDict(env_file=['.env', '.dev.env'])
 
 
 class PrdSettings(BaseSettingsConfig):
-    model_config = SettingsConfigDict(env_file=['.env', 'prd.env'])
+    model_config = SettingsConfigDict(env_file=['.env', '.prd.env'])
 
 def get_settings() -> BaseSettingsConfig:
     """환경 변수에 따라 적절한 설정 객체를 반환하는 함수"""
diff --git a/apps/pre-processing-service/app/model/schemas.py b/apps/pre-processing-service/app/model/schemas.py
index b811a4e5..5e72fcfb 100644
--- a/apps/pre-processing-service/app/model/schemas.py
+++ b/apps/pre-processing-service/app/model/schemas.py
@@ -1,40 +1,31 @@
 from datetime import datetime
-from typing import Optional, List, Dict
+from typing import Optional, List, Dict, Union
 from pydantic import BaseModel, Field, HttpUrl
 
-#기본 요청
+# 기본 요청
 class RequestBase(BaseModel):
     job_id: int
     schedule_id: int
-    sschdule_his_id: Optional[int] = None
+    schedule_his_id: Optional[int] = None
 
-#기본 응답
+# 기본 응답
 class ResponseBase(BaseModel):
     job_id: int
     schedule_id: int
-    sschdule_his_id : Optional[int] = None
+    schedule_his_id: Optional[int] = None
     status: str
 
-
-#네이버 키워드 추출
+# 네이버 키워드 추출
 class RequestNaverSearch(RequestBase):
     tag: str
     category: Optional[str] = None
-    start_date : Optional[str] = None
-    end_date : Optional[str] = None
+    start_date: Optional[str] = None
+    end_date: Optional[str] = None
 
 class ResponseNaverSearch(ResponseBase):
     category: Optional[str] = None
     keyword: str
-    total_keyword: dict[int, str]
-
-# #키워드 사다구몰 검증
-# class RequestSadaguValidate(RequestBase):
-#     tag: str
-#     category: str
-#
-# class ResponsetSadaguValidate(ResponseBase):
-#     keyword: str
+    total_keyword: Dict[int, str]
 
 # 2단계: 검색
 class RequestSadaguSearch(RequestBase):
@@ -42,43 +33,51 @@ class RequestSadaguSearch(RequestBase):
 
 class ResponseSadaguSearch(ResponseBase):
     keyword: str
-    search_results: list[dict]
+    search_results: List[Dict]
 
 # 3단계: 매칭
 class RequestSadaguMatch(RequestBase):
     keyword: str
-    search_results: list[dict]
+    search_results: List[Dict]
 
 class ResponseSadaguMatch(ResponseBase):
     keyword: str
-    matched_products: list[dict]
+    matched_products: List[Dict]
 
 # 4단계: 유사도
 class RequestSadaguSimilarity(RequestBase):
     keyword: str
-    matched_products: list[dict]
+    matched_products: List[Dict]
+    search_results: Optional[List[Dict]] = None  # 3단계에서 매칭 실패시 폴백용
 
 class ResponseSadaguSimilarity(ResponseBase):
     keyword: str
-    selected_product: dict | None = None
-    reason: str | None = None
-
-#사다구몰 크롤링
-class RequestSadaguCrawl(RequestBase):
+    selected_product: Optional[Dict] = None
+    reason: Optional[str] = None
+
+# 사다구몰 크롤링
+class RequestSadaguCrawl(BaseModel):
+    job_id: int = Field(..., description="작업 ID")
+    schedule_id: int = Field(..., description="스케줄 ID")
+    schedule_his_id: int = Field(..., description="스케줄 히스토리 ID")
     tag: str = Field(..., description="크롤링 태그 (예: 'detail')")
     product_url: HttpUrl = Field(..., description="크롤링할 상품의 URL")
     use_selenium: bool = Field(default=True, description="Selenium 사용 여부")
     include_images: bool = Field(default=False, description="이미지 정보 포함 여부")
 
-class ResponseSadaguCrawl(ResponseBase):
+class ResponseSadaguCrawl(BaseModel):
+    job_id: int
+    schedule_id: int
+    schedule_his_id: int
     tag: str
     product_url: str
     use_selenium: bool
     include_images: bool
-    product_detail: Optional[dict] = None
+    product_detail: Optional[Dict] = None
+    status: str
     crawled_at: Optional[str] = None
 
-#블로그 생성
+# 블로그 생성
 class RequestBlogCreate(RequestBase):
     tag: str
     category: str
@@ -86,10 +85,10 @@ class RequestBlogCreate(RequestBase):
 class ResponseBlogCreate(ResponseBase):
     pass
 
-#블로그 배포
+# 블로그 배포
 class RequestBlogPublish(RequestBase):
     tag: str
     category: str
 
 class ResponseBlogPublish(ResponseBase):
-    pass
+    pass
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/service/crawl_service.py b/apps/pre-processing-service/app/service/crawl_service.py
index c2a4e13a..11844ead 100644
--- a/apps/pre-processing-service/app/service/crawl_service.py
+++ b/apps/pre-processing-service/app/service/crawl_service.py
@@ -1,270 +1,49 @@
-import json
+# app/service/crawl_service.py
 import time
-import re
-import httpx
-from bs4 import BeautifulSoup
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.common.exceptions import TimeoutException, NoSuchElementException
-
+from app.utils.crawler_utils import DetailCrawler
 from app.errors.CustomException import InvalidItemDataException
 from app.model.schemas import RequestSadaguCrawl
 
+
 async def crawl_product_detail(request: RequestSadaguCrawl) -> dict:
     """
-    선택된 상품의 상세 정보를 크롤링하는 비즈니스 로직입니다.
+    선택된 상품의 상세 정보를 크롤링하는 비즈니스 로직입니다. (5단계)
     상품 URL을 입력받아 상세 정보를 크롤링하여 딕셔너리로 반환합니다.
     """
-    crawler = ProductDetailCrawler(use_selenium=request.use_selenium)
+    crawler = DetailCrawler(use_selenium=request.use_selenium)
 
     try:
+        print(f"상품 상세 크롤링 시작: {request.product_url}")
+
         # 상세 정보 크롤링 실행
         product_detail = await crawler.crawl_detail(
-            product_url=str(request.product_url),  # HttpUrl을 문자열로 변환
+            product_url=str(request.product_url),
             include_images=request.include_images
         )
 
         if not product_detail:
             raise InvalidItemDataException("상품 상세 정보 크롤링 실패")
 
+        print(f"크롤링 완료: {product_detail.get('title', 'Unknown')[:50]}")
+
         # 응답 데이터 구성
         response_data = {
             "job_id": request.job_id,
             "schedule_id": request.schedule_id,
+            "schedule_his_id": request.schedule_his_id,
             "tag": request.tag,
             "product_url": str(request.product_url),
             "use_selenium": request.use_selenium,
             "include_images": request.include_images,
             "product_detail": product_detail,
-            "status": "success",  # "200"에서 "success"로 변경
+            "status": "success",
             "crawled_at": time.strftime('%Y-%m-%d %H:%M:%S')
         }
 
         return response_data
 
     except Exception as e:
+        print(f"크롤링 서비스 오류: {e}")
         raise InvalidItemDataException(f"상품 상세 크롤링 오류: {e}")
     finally:
-        await crawler.close()
-
-
-class ProductDetailCrawler:
-    def __init__(self, use_selenium=True):
-        self.base_url = "https://ssadagu.kr"
-        self.use_selenium = use_selenium
-
-        if use_selenium:
-            self._setup_selenium()
-        else:
-            self._setup_httpx()
-
-    def _setup_selenium(self):
-        """Selenium WebDriver 초기화"""
-        chrome_options = Options()
-        chrome_options.add_argument('--headless')
-        chrome_options.add_argument('--no-sandbox')
-        chrome_options.add_argument('--disable-dev-shm-usage')
-        chrome_options.add_argument('--disable-gpu')
-        chrome_options.add_argument('--window-size=1920,1080')
-        chrome_options.add_argument(
-            '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
-
-        try:
-            self.driver = webdriver.Chrome(options=chrome_options)
-            self.wait = WebDriverWait(self.driver, 10)
-        except Exception as e:
-            print(f"Selenium 초기화 실패, httpx로 대체: {e}")
-            self.use_selenium = False
-            self._setup_httpx()
-
-    def _setup_httpx(self):
-        """httpx 클라이언트 초기화"""
-        self.client = httpx.AsyncClient(
-            headers={
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
-            },
-            timeout=30.0
-        )
-
-    async def crawl_detail(self, product_url: str, include_images: bool = False) -> dict:
-        """상품 상세 정보 크롤링"""
-        try:
-            if self.use_selenium:
-                soup = await self._get_soup_selenium(product_url)
-            else:
-                soup = await self._get_soup_httpx(product_url)
-
-            # 기본 정보 추출
-            title = self._extract_title(soup)
-            price = self._extract_price(soup)
-            rating = self._extract_rating(soup)
-            options = self._extract_options(soup)
-            material_info = self._extract_material_info(soup)
-
-            product_data = {
-                'url': product_url,
-                'title': title,
-                'price': price,
-                'rating': rating,
-                'options': options,
-                'material_info': material_info,
-                'crawled_at': time.strftime('%Y-%m-%d %H:%M:%S')
-            }
-
-            # 이미지 정보 추가 (선택적)
-            if include_images:
-                product_images = self._extract_images(soup)
-                product_data['product_images'] = [{'original_url': img_url} for img_url in product_images]
-            else:
-                product_data['product_images'] = []
-
-            return product_data
-
-        except Exception as e:
-            print(f"크롤링 오류: {e}")
-            raise InvalidItemDataException(f"크롤링 실패: {str(e)}")
-
-    async def _get_soup_selenium(self, product_url: str) -> BeautifulSoup:
-        """Selenium으로 HTML 가져오기"""
-        try:
-            self.driver.get(product_url)
-            self.wait.until(lambda driver: driver.execute_script("return document.readyState") == "complete")
-            return BeautifulSoup(self.driver.page_source, 'html.parser')
-        except Exception as e:
-            raise Exception(f"Selenium HTML 로딩 실패: {e}")
-
-    async def _get_soup_httpx(self, product_url: str) -> BeautifulSoup:
-        """httpx로 HTML 가져오기"""
-        try:
-            response = await self.client.get(product_url)
-            response.raise_for_status()
-            return BeautifulSoup(response.content, 'html.parser')
-        except Exception as e:
-            raise Exception(f"HTTP 요청 실패: {e}")
-
-    def _extract_title(self, soup: BeautifulSoup) -> str:
-        """제목 추출"""
-        title_element = soup.find('h1', {'id': 'kakaotitle'})
-        return title_element.get_text(strip=True) if title_element else "제목 없음"
-
-    def _extract_price(self, soup: BeautifulSoup) -> int:
-        """가격 추출"""
-        price_selectors = [
-            'span.price.gsItemPriceKWR',
-            '.pdt_price span.price',
-            'span.price',
-            '.price'
-        ]
-
-        for selector in price_selectors:
-            price_element = soup.select_one(selector)
-            if price_element:
-                price_text = price_element.get_text(strip=True).replace(',', '').replace('원', '')
-                price_match = re.search(r'(\d+)', price_text)
-                if price_match:
-                    return int(price_match.group(1))
-        return 0
-
-    def _extract_rating(self, soup: BeautifulSoup) -> float:
-        """별점 추출"""
-        rating = 0.0
-        star_containers = [
-            soup.find('a', class_='start'),
-            soup.find('div', class_=re.compile(r'star|rating')),
-            soup.find('a', href='#reviews_wrap')
-        ]
-
-        for container in star_containers:
-            if container:
-                star_imgs = container.find_all('img')
-                for img in star_imgs:
-                    src = img.get('src', '')
-                    if 'icon_star.svg' in src:
-                        rating += 1
-                    elif 'icon_star_half.svg' in src:
-                        rating += 0.5
-                break
-        return rating
-
-    def _extract_options(self, soup: BeautifulSoup) -> list[dict]:
-        """상품 옵션 추출"""
-        options = []
-        sku_list = soup.find('ul', {'id': 'skubox'})
-
-        if sku_list:
-            option_items = sku_list.find_all('li', class_=re.compile(r'imgWrapper'))
-            for item in option_items:
-                title_element = item.find('a', title=True)
-                if title_element:
-                    option_name = title_element.get('title', '').strip()
-
-                    # 재고 정보 추출
-                    stock = 0
-                    item_text = item.get_text()
-                    stock_match = re.search(r'재고\s*:\s*(\d+)', item_text)
-                    if stock_match:
-                        stock = int(stock_match.group(1))
-
-                    # 이미지 URL 추출
-                    img_element = item.find('img', class_='colorSpec_hashPic')
-                    image_url = ""
-                    if img_element and img_element.get('src'):
-                        image_url = img_element['src']
-
-                    if option_name:
-                        options.append({
-                            'name': option_name,
-                            'stock': stock,
-                            'image_url': image_url
-                        })
-        return options
-
-    def _extract_material_info(self, soup: BeautifulSoup) -> dict:
-        """소재/재료 정보 추출"""
-        material_info = {}
-        info_items = soup.find_all('div', class_='pro-info-item')
-
-        for item in info_items:
-            title_element = item.find('div', class_='pro-info-title')
-            info_element = item.find('div', class_='pro-info-info')
-
-            if title_element and info_element:
-                title = title_element.get_text(strip=True)
-                info = info_element.get_text(strip=True)
-                material_info[title] = info
-
-        return material_info
-
-    def _extract_images(self, soup: BeautifulSoup) -> list[str]:
-        """상품 이미지 URL 추출"""
-        images = []
-        img_elements = soup.find_all('img', {'id': re.compile(r'img_translate_\d+')})
-
-        for img in img_elements:
-            src = img.get('src', '')
-            if src:
-                if src.startswith('//'):
-                    src = 'https:' + src
-                elif src.startswith('/'):
-                    src = self.base_url + src
-                elif src.startswith('http'):
-                    pass
-                else:
-                    continue
-                images.append(src)
-
-        return images
-
-    async def close(self):
-        """리소스 정리"""
-        if self.use_selenium and hasattr(self, 'driver'):
-            try:
-                self.driver.quit()
-            except Exception:
-                pass
-        elif hasattr(self, 'client'):
-            try:
-                await self.client.aclose()
-            except Exception:
-                pass
\ No newline at end of file
+        await crawler.close()
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/service/match_service.py b/apps/pre-processing-service/app/service/match_service.py
index 5e0926c3..6b1cc171 100644
--- a/apps/pre-processing-service/app/service/match_service.py
+++ b/apps/pre-processing-service/app/service/match_service.py
@@ -1,21 +1,66 @@
-import urllib
-
+from app.utils.keyword_matcher import KeywordMatcher
+from app.errors.CustomException import InvalidItemDataException
 from ..model.schemas import RequestSadaguMatch
 
+
 def match_products(request: RequestSadaguMatch) -> dict:
     """
-    키워드 매칭 로직 (MeCab 등 사용)
+    키워드 매칭 로직 (MeCab 등 사용) - 3단계
     """
     keyword = request.keyword
     products = request.search_results
 
-    # 키워드 매칭 로직 적용 해야함
-    matched = [p for p in products if keyword in p["title"]]
+    if not products:
+        return {
+            "job_id": request.job_id,
+            "schedule_id": request.schedule_id,
+            "schedule_his_id": request.schedule_his_id,
+            "keyword": keyword,
+            "matched_products": [],
+            "status": "success"
+        }
+
+    try:
+        matcher = KeywordMatcher()
+        matched_products = []
+
+        print(f"키워드 '{keyword}'와 {len(products)}개 상품 매칭 분석 시작...")
+
+        for i, product in enumerate(products):
+            title = product.get('title', '')
+            if not title:
+                continue
+
+            # 키워드 매칭 분석
+            match_result = matcher.analyze_keyword_match(title, keyword)
+
+            print(f"상품 {i + 1}: {title[:50]} | {match_result['reason']}")
+
+            if match_result['is_match']:
+                # 매칭된 상품에 매칭 정보 추가
+                matched_product = product.copy()
+                matched_product['match_info'] = {
+                    'match_type': match_result['match_type'],
+                    'match_score': match_result['score'],
+                    'match_reason': match_result['reason']
+                }
+                matched_products.append(matched_product)
+                print(f"  ✅ 매칭됨!")
+
+        print(f"매칭 결과: {len(matched_products)}개 상품")
+
+        # 매칭 스코어 기준으로 정렬 (높은 순)
+        matched_products.sort(key=lambda x: x['match_info']['match_score'], reverse=True)
+
+        return {
+            "job_id": request.job_id,
+            "schedule_id": request.schedule_id,
+            "schedule_his_id": request.schedule_his_id,
+            "keyword": keyword,
+            "matched_products": matched_products,
+            "status": "success"
+        }
 
-    return {
-        "job_id": request.job_id,
-        "schedule_id": request.schedule_id,
-        "keyword": keyword,
-        "matched_products": matched,
-        "status": "success"
-    }
\ No newline at end of file
+    except Exception as e:
+        print(f"매칭 서비스 오류: {e}")
+        raise InvalidItemDataException(f"키워드 매칭 실패: {str(e)}")
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/service/search_service.py b/apps/pre-processing-service/app/service/search_service.py
index 45acb34a..da7aa1fd 100644
--- a/apps/pre-processing-service/app/service/search_service.py
+++ b/apps/pre-processing-service/app/service/search_service.py
@@ -1,23 +1,81 @@
+from app.utils.crawler_utils import SearchCrawler
+from app.errors.CustomException import InvalidItemDataException
 from ..model.schemas import RequestSadaguSearch
-import urllib.parse
 
-def search_products(request: RequestSadaguSearch) -> dict:
+
+async def search_products(request: RequestSadaguSearch) -> dict:
     """
-    키워드 기반으로 상품을 검색하는 비즈니스 로직
+    키워드 기반으로 상품을 검색하는 비즈니스 로직 (2단계)
     """
     keyword = request.keyword
-    encoded_keyword = urllib.parse.quote(keyword)
-
-    # Selenium/requests 로직 추가 해야함
-    search_results = [
-        {"url": f"https://ssadagu.kr/view.php?id=123"},
-        {"url": f"https://ssadagu.kr/view.php?id=456"}
-    ]
-
-    return {
-        "job_id": request.job_id,
-        "schedule_id": request.schedule_id,
-        "keyword": keyword,
-        "search_results": search_results,
-        "status": "success"
-    }
+    crawler = SearchCrawler(use_selenium=True)
+
+    try:
+        print(f"키워드 '{keyword}'로 상품 검색 시작...")
+
+        # Selenium 또는 httpx로 상품 검색
+        if crawler.use_selenium:
+            search_results = await crawler.search_products_selenium(keyword)
+        else:
+            search_results = await crawler.search_products_httpx(keyword)
+
+        if not search_results:
+            print("검색 결과가 없습니다.")
+            return {
+                "job_id": request.job_id,
+                "schedule_id": request.schedule_id,
+                "schedule_his_id": request.schedule_his_id,
+                "keyword": keyword,
+                "search_results": [],
+                "status": "success"
+            }
+
+        # 상품별 기본 정보 수집 (제목이 없는 경우 다시 크롤링)
+        enriched_results = []
+        print(f"총 {len(search_results)}개 상품의 기본 정보를 수집 중...")
+
+        for i, product in enumerate(search_results):
+            try:
+                # 이미 제목이 있고 유효한 경우 그대로 사용
+                if product.get('title') and product['title'] != 'Unknown Title' and len(product['title'].strip()) > 0:
+                    enriched_results.append(product)
+                else:
+                    # 제목이 없거나 유효하지 않은 경우 다시 크롤링
+                    print(f"상품 {i + 1}: 제목 재수집 중... ({product['url']})")
+                    basic_info = await crawler.get_basic_product_info(product['url'])
+
+                    if basic_info and basic_info['title'] != "제목 없음":
+                        enriched_results.append({
+                            'url': product['url'],
+                            'title': basic_info['title']
+                        })
+                    else:
+                        # 그래도 제목을 못 찾으면 제외
+                        print(f"  제목 추출 실패, 제외")
+                        continue
+
+                # 최대 20개까지만 처리
+                if len(enriched_results) >= 20:
+                    break
+
+            except Exception as e:
+                print(f"상품 {i + 1} 처리 중 오류: {e}")
+                continue
+
+        print(f"최종 수집된 유효 상품: {len(enriched_results)}개")
+
+        return {
+            "job_id": request.job_id,
+            "schedule_id": request.schedule_id,
+            "schedule_his_id": request.schedule_his_id,
+            "keyword": keyword,
+            "search_results": enriched_results,
+            "status": "success"
+        }
+
+    except Exception as e:
+        print(f"검색 서비스 오류: {e}")
+        raise InvalidItemDataException(f"상품 검색 실패: {str(e)}")
+
+    finally:
+        await crawler.close()
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/service/similarity_service.py b/apps/pre-processing-service/app/service/similarity_service.py
index 29aa3b20..27823e9e 100644
--- a/apps/pre-processing-service/app/service/similarity_service.py
+++ b/apps/pre-processing-service/app/service/similarity_service.py
@@ -1,20 +1,137 @@
+from app.utils.similarity_analyzer import SimilarityAnalyzer
+from app.errors.CustomException import InvalidItemDataException
 from ..model.schemas import RequestSadaguSimilarity
 
+
 def select_product_by_similarity(request: RequestSadaguSimilarity) -> dict:
     """
-    BERT 기반 유사도 분석 후 상품 선택
+    BERT 기반 유사도 분석 후 상품 선택 - 4단계
     """
     keyword = request.keyword
     candidates = request.matched_products
+    fallback_products = request.search_results or []
+
+    # 매칭된 상품이 없으면 전체 검색 결과로 폴백
+    if not candidates:
+        if not fallback_products:
+            return {
+                "job_id": request.job_id,
+                "schedule_id": request.schedule_id,
+                "schedule_his_id": request.schedule_his_id,
+                "keyword": keyword,
+                "selected_product": None,
+                "reason": "매칭된 상품과 검색 결과가 모두 없음",
+                "status": "success"
+            }
+
+        print("매칭된 상품 없음 → 전체 검색 결과에서 유사도 분석")
+        candidates = fallback_products
+        analysis_mode = "fallback_similarity_only"
+    else:
+        analysis_mode = "matched_products"
+
+    try:
+        analyzer = SimilarityAnalyzer()
+
+        print(f"키워드 '{keyword}'와 {len(candidates)}개 상품의 유사도 분석 시작... (모드: {analysis_mode})")
+
+        # 한 개만 있으면 바로 선택
+        if len(candidates) == 1:
+            selected_product = candidates[0]
+
+            # 유사도 계산
+            similarity = analyzer.calculate_similarity(keyword, selected_product['title'])
+
+            # 폴백 모드에서는 임계값 검증
+            if analysis_mode == "fallback_similarity_only":
+                similarity_threshold = 0.3
+                if similarity < similarity_threshold:
+                    return {
+                        "job_id": request.job_id,
+                        "schedule_id": request.schedule_id,
+                        "schedule_his_id": request.schedule_his_id,
+                        "keyword": keyword,
+                        "selected_product": None,
+                        "reason": f"단일 상품 유사도({similarity:.4f}) < 기준({similarity_threshold})",
+                        "status": "success"
+                    }
+
+            selected_product['similarity_info'] = {
+                'similarity_score': float(similarity),
+                'analysis_type': 'single_candidate',
+                'analysis_mode': analysis_mode
+            }
+
+            return {
+                "job_id": request.job_id,
+                "schedule_id": request.schedule_id,
+                "schedule_his_id": request.schedule_his_id,
+                "keyword": keyword,
+                "selected_product": selected_product,
+                "reason": f"단일 상품 - 유사도: {similarity:.4f} ({analysis_mode})",
+                "status": "success"
+            }
+
+        # 여러 개가 있으면 유사도 비교
+        print("여러 상품 중 최고 유사도로 선택...")
+
+        # 제목만 추출해서 배치 분석
+        titles = [product['title'] for product in candidates]
+        similarity_results = analyzer.analyze_similarity_batch(keyword, titles)
+
+        # 결과 출력
+        for result in similarity_results:
+            print(f"  {result['title'][:40]} | 유사도: {result['similarity']:.4f}")
+
+        # 최고 유사도 선택
+        best_result = similarity_results[0]
+        selected_product = candidates[best_result['index']].copy()
+
+        # 폴백 모드에서는 임계값 검증
+        similarity_threshold = 0.3
+        if analysis_mode == "fallback_similarity_only" and best_result['similarity'] < similarity_threshold:
+            return {
+                "job_id": request.job_id,
+                "schedule_id": request.schedule_id,
+                "schedule_his_id": request.schedule_his_id,
+                "keyword": keyword,
+                "selected_product": None,
+                "reason": f"최고 유사도({best_result['similarity']:.4f}) < 기준({similarity_threshold})",
+                "status": "success"
+            }
+
+        # 유사도 정보 추가
+        selected_product['similarity_info'] = {
+            'similarity_score': best_result['similarity'],
+            'analysis_type': 'multi_candidate_bert',
+            'analysis_mode': analysis_mode,
+            'rank': 1,
+            'total_candidates': len(candidates)
+        }
+
+        # 매칭 모드에서는 종합 점수도 계산
+        if analysis_mode == "matched_products" and 'match_info' in selected_product:
+            match_score = selected_product['match_info']['match_score']
+            similarity_score = best_result['similarity']
+            # 가중치: 매칭 40%, 유사도 60%
+            final_score = match_score * 0.4 + similarity_score * 0.6
+            selected_product['final_score'] = final_score
+            reason = f"종합점수({final_score:.4f}) = 매칭({match_score:.4f})*0.4 + 유사도({similarity_score:.4f})*0.6"
+        else:
+            reason = f"유사도({best_result['similarity']:.4f}) 기준 선택 ({analysis_mode})"
+
+        print(f"선택됨: {selected_product['title'][:50]} | {reason}")
+
+        return {
+            "job_id": request.job_id,
+            "schedule_id": request.schedule_id,
+            "schedule_his_id": request.schedule_his_id,
+            "keyword": keyword,
+            "selected_product": selected_product,
+            "reason": reason,
+            "status": "success"
+        }
 
-    # 유사도 분석 로직 적용 해야함
-    selected = candidates[0] if candidates else None
-
-    return {
-        "job_id": request.job_id,
-        "schedule_id": request.schedule_id,
-        "keyword": keyword,
-        "selected_product": selected,
-        "reason": "샘플 로직: 첫 번째 매칭 선택",
-        "status": "success"
-    }
+    except Exception as e:
+        print(f"유사도 분석 서비스 오류: {e}")
+        raise InvalidItemDataException(f"유사도 분석 실패: {str(e)}")
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/test/test_keyword.py b/apps/pre-processing-service/app/test/test_keyword.py
index 572cfd41..e0432139 100644
--- a/apps/pre-processing-service/app/test/test_keyword.py
+++ b/apps/pre-processing-service/app/test/test_keyword.py
@@ -1,6 +1,6 @@
 import pytest
 from fastapi.testclient import TestClient
-from ..main import app  # main.py에서 FastAPI app 객체를 가져옵니다.
+from app.main import app
 
 client = TestClient(app)
 
@@ -9,39 +9,36 @@
 SCHEDULE_HIS_ID = 1
 
 
-
 def test_read_root():
-    # client를 사용하여 API에 요청을 보냅니다.
     response = client.get("/keyword/")
-    # HTTP 상태 코드가 200 OK인지 확인합니다.
     assert response.status_code == 200
-    # 응답 본문(JSON)이 예상과 같은지 확인합니다.
     assert response.json() == {"message": "keyword API"}
 
 
 @pytest.mark.parametrize("tag, category, start_date, end_date", [
-    ("naver","50000000","2025-09-01","2025-09-02"),
-    ("naver","50000001","2025-09-01","2025-09-02"),
-    ("naver","50000002","2025-09-01","2025-09-02"),
-    # ("naver","50000002","2025-08-08","2025-08-09"),
-    ("naver_store","","2025-09-01","2025-09-02"),
+    ("naver", "50000000", "2025-09-01", "2025-09-02"),
+    ("naver", "50000001", "2025-09-01", "2025-09-02"),
+    ("naver", "50000002", "2025-09-01", "2025-09-02"),
+    ("naver_store", "", "2025-09-01", "2025-09-02"),
 ])
-def test_search(tag,category, start_date, end_date):
-
+def test_search(tag, category, start_date, end_date):
     body = {
-        "job_id":JOB_ID,
+        "job_id": JOB_ID,
         "schedule_id": SCHEDULE_ID,
-        "sschdule_his_id":SCHEDULE_HIS_ID,
-        "tag":tag,
-        "category":category,
-        "start_date":start_date,
-        "end_date":end_date
+        "schedule_his_id": SCHEDULE_HIS_ID,  # 오타 수정
+        "tag": tag,
+        "category": category,
+        "start_date": start_date,
+        "end_date": end_date
     }
-    response = client.post("/keyword/search",json=body)
-    assert response.json()["job_id"] == body["job_id"]
-    assert response.json()["schedule_id"] == body["schedule_id"]
-    assert response.json()["sschdule_his_id"] == body["sschdule_his_id"]
-    assert response.json()["status"] == "success"
-    assert "keyword" in response.json()
-    assert isinstance(response.json()["total_keyword"], dict)
-    assert response.status_code == 200
\ No newline at end of file
+
+    response = client.post("/keyword/search", json=body)
+    assert response.status_code == 200
+
+    response_data = response.json()
+    assert response_data["job_id"] == body["job_id"]
+    assert response_data["schedule_id"] == body["schedule_id"]
+    assert response_data["schedule_his_id"] == body["schedule_his_id"]  # 오타 수정
+    assert response_data["status"] == "success"
+    assert "keyword" in response_data
+    assert isinstance(response_data["total_keyword"], dict)
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/test/test_match_service.py b/apps/pre-processing-service/app/test/test_match_service.py
new file mode 100644
index 00000000..7b80c258
--- /dev/null
+++ b/apps/pre-processing-service/app/test/test_match_service.py
@@ -0,0 +1,97 @@
+import pytest
+from fastapi.testclient import TestClient
+from app.main import app
+
+client = TestClient(app)
+
+
+def test_match_success():
+    """키워드 매칭 성공 테스트"""
+    sample_search_results = [
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=123",
+            "title": "925 실버 반지 여성용 결혼반지"
+        },
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=456",
+            "title": "골드 목걸이 체인 펜던트"
+        },
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=789",
+            "title": "반지 세트 커플링 약혼반지"
+        }
+    ]
+
+    body = {
+        "job_id": 1,
+        "schedule_id": 1,
+        "schedule_his_id": 1,
+        "keyword": "반지",
+        "search_results": sample_search_results
+    }
+
+    response = client.post("/product/match", json=body)
+    print(f"Match Response: {response.json()}")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["job_id"] == body["job_id"]
+    assert data["keyword"] == body["keyword"]
+    assert data["status"] == "success"
+    assert isinstance(data["matched_products"], list)
+
+    # 반지가 포함된 상품들이 매칭되어야 함
+    if data["matched_products"]:
+        for product in data["matched_products"]:
+            assert "match_info" in product
+            assert "match_type" in product["match_info"]
+            assert "match_score" in product["match_info"]
+
+
+def test_match_no_results():
+    """검색 결과가 없는 경우"""
+    body = {
+        "job_id": 2,
+        "schedule_id": 2,
+        "schedule_his_id": 2,
+        "keyword": "반지",
+        "search_results": []
+    }
+
+    response = client.post("/product/match", json=body)
+    print(f"No results response: {response.json()}")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["matched_products"] == []
+
+
+def test_match_no_matches():
+    """키워드와 매칭되지 않는 상품들"""
+    sample_search_results = [
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=123",
+            "title": "컴퓨터 키보드 게이밍"
+        },
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=456",
+            "title": "스마트폰 케이스 투명"
+        }
+    ]
+
+    body = {
+        "job_id": 3,
+        "schedule_id": 3,
+        "schedule_his_id": 3,
+        "keyword": "반지",
+        "search_results": sample_search_results
+    }
+
+    response = client.post("/product/match", json=body)
+    print(f"No matches response: {response.json()}")
+
+    assert response.status_code == 200
+    data = response.json()
+    # 매칭되지 않아도 성공으로 처리
+    assert data["status"] == "success"
+    assert isinstance(data["matched_products"], list)
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/test/test_sadagu_crawl.py b/apps/pre-processing-service/app/test/test_sadagu_crawl.py
index 3f336cdf..d034be43 100644
--- a/apps/pre-processing-service/app/test/test_sadagu_crawl.py
+++ b/apps/pre-processing-service/app/test/test_sadagu_crawl.py
@@ -1,16 +1,14 @@
-# app/test/test_sadagu_crawl.py
 import pytest
 from fastapi.testclient import TestClient
 from app.main import app
-from app.errors.CustomException import InvalidItemDataException, ItemNotFoundException
 
 client = TestClient(app)
 
 
 def test_crawl_success():
     body = {
-        "job_id": "test-job-001",
-        "schedule_id": "schedule-001",
+        "job_id": 1,  # 문자열 -> 숫자로 수정
+        "schedule_id": 1,  # 문자열 -> 숫자로 수정
         "schedule_his_id": 1,
         "tag": "detail",
         "product_url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=886788894790",
@@ -19,7 +17,8 @@ def test_crawl_success():
     }
 
     response = client.post("/product/crawl", json=body)
-    print(response.json())
+    print(f"Response: {response.json()}")
+
     assert response.status_code == 200
     data = response.json()
     assert data["job_id"] == body["job_id"]
@@ -29,10 +28,10 @@ def test_crawl_success():
 
 
 def test_crawl_invalid_url():
-    """잘못된 URL이지만 페이지는 존재하는 경우 - 빈 데이터로 성공"""
+    """잘못된 URL이지만 페이지는 존재하는 경우"""
     body = {
-        "job_id": "test-job-002",
-        "schedule_id": "schedule-002",
+        "job_id": 2,
+        "schedule_id": 2,
         "schedule_his_id": 2,
         "tag": "detail",
         "product_url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=invalid",
@@ -41,25 +40,22 @@ def test_crawl_invalid_url():
     }
 
     response = client.post("/product/crawl", json=body)
-    print(response.json())
+    print(f"Response: {response.json()}")
 
-    # 200으로 성공하지만 유효한 데이터가 없는 경우를 테스트
     assert response.status_code == 200
     data = response.json()
 
-    # 빈 데이터 또는 기본값들을 확인
     product_detail = data.get("product_detail", {})
-    assert product_detail.get("title") in ["제목 없음", "제목 추출 실패"]
-    assert product_detail.get("price") == 0
-    assert len(product_detail.get("options", [])) == 0
+    assert product_detail.get("title") in ["제목 없음", "제목 추출 실패", None]
+    assert product_detail.get("price", 0) == 0
 
 
 def test_crawl_completely_invalid_url():
-    """완전히 존재하지 않는 도메인 - 실제 오류 발생"""
+    """완전히 존재하지 않는 도메인"""
     body = {
-        "job_id": "test-job-002-invalid",
-        "schedule_id": "schedule-002-invalid",
-        "schedule_his_id": 2,
+        "job_id": 3,
+        "schedule_id": 3,
+        "schedule_his_id": 3,
         "tag": "detail",
         "product_url": "https://nonexistent-domain-12345.com/invalid",
         "use_selenium": False,
@@ -67,17 +63,16 @@ def test_crawl_completely_invalid_url():
     }
 
     response = client.post("/product/crawl", json=body)
-    print(response.json())
+    print(f"Response: {response.json()}")
 
-    # 이 경우에는 실제로 오류가 발생해야 함
     assert response.status_code in (400, 422, 500)
 
 
 def test_crawl_include_images():
     body = {
-        "job_id": "test-job-003",
-        "schedule_id": "schedule-003",
-        "schedule_his_id": 3,
+        "job_id": 4,
+        "schedule_id": 4,
+        "schedule_his_id": 4,
         "tag": "detail",
         "product_url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=886788894790",
         "use_selenium": False,
@@ -85,7 +80,8 @@ def test_crawl_include_images():
     }
 
     response = client.post("/product/crawl", json=body)
-    print(response.json())
+    print(f"Response: {response.json()}")
+
     assert response.status_code == 200
     data = response.json()
     assert data["include_images"] is True
diff --git a/apps/pre-processing-service/app/test/test_search_service.py b/apps/pre-processing-service/app/test/test_search_service.py
new file mode 100644
index 00000000..6dd415e0
--- /dev/null
+++ b/apps/pre-processing-service/app/test/test_search_service.py
@@ -0,0 +1,62 @@
+import pytest
+from fastapi.testclient import TestClient
+from app.main import app
+
+client = TestClient(app)
+
+
+def test_search_success():
+    """상품 검색 성공 테스트"""
+    body = {
+        "job_id": 1,
+        "schedule_id": 1,
+        "schedule_his_id": 1,
+        "keyword": "반지"
+    }
+
+    response = client.post("/product/search", json=body)
+    print(f"Search Response: {response.json()}")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["job_id"] == body["job_id"]
+    assert data["keyword"] == body["keyword"]
+    assert data["status"] == "success"
+    assert isinstance(data["search_results"], list)
+
+
+def test_search_empty_keyword():
+    """빈 키워드 검색 테스트"""
+    body = {
+        "job_id": 2,
+        "schedule_id": 2,
+        "schedule_his_id": 2,
+        "keyword": ""
+    }
+
+    response = client.post("/product/search", json=body)
+    print(f"Empty keyword response: {response.json()}")
+
+    # 빈 키워드라도 에러가 아닌 빈 결과를 반환해야 함
+    assert response.status_code == 200
+    data = response.json()
+    assert data["search_results"] == []
+
+
+def test_search_nonexistent_keyword():
+    """존재하지 않는 키워드 검색"""
+    body = {
+        "job_id": 3,
+        "schedule_id": 3,
+        "schedule_his_id": 3,
+        "keyword": "zxcvbnmasdfghjklqwertyuiop123456789"
+    }
+
+    response = client.post("/product/search", json=body)
+    print(f"Nonexistent keyword response: {response.json()}")
+
+    assert response.status_code == 200
+    data = response.json()
+    # 검색 결과가 없어도 성공으로 처리
+    assert data["status"] == "success"
+    assert isinstance(data["search_results"], list)
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/test/test_similarity_service.py b/apps/pre-processing-service/app/test/test_similarity_service.py
new file mode 100644
index 00000000..1888b873
--- /dev/null
+++ b/apps/pre-processing-service/app/test/test_similarity_service.py
@@ -0,0 +1,136 @@
+import pytest
+from fastapi.testclient import TestClient
+from app.main import app
+
+client = TestClient(app)
+
+
+def test_similarity_with_matched_products():
+    """매칭된 상품들 중에서 유사도 분석"""
+    matched_products = [
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=123",
+            "title": "925 실버 반지 여성용",
+            "match_info": {
+                "match_type": "exact",
+                "match_score": 1.0,
+                "match_reason": "완전 매칭"
+            }
+        },
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=456",
+            "title": "반지 세트 커플링",
+            "match_info": {
+                "match_type": "morphological",
+                "match_score": 0.8,
+                "match_reason": "형태소 매칭"
+            }
+        }
+    ]
+
+    body = {
+        "job_id": 1,
+        "schedule_id": 1,
+        "schedule_his_id": 1,
+        "keyword": "반지",
+        "matched_products": matched_products
+    }
+
+    response = client.post("/product/similarity", json=body)
+    print(f"Similarity Response: {response.json()}")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["job_id"] == body["job_id"]
+    assert data["keyword"] == body["keyword"]
+    assert data["status"] == "success"
+
+    if data["selected_product"]:
+        assert "similarity_info" in data["selected_product"]
+        assert "similarity_score" in data["selected_product"]["similarity_info"]
+        assert data["reason"] is not None
+
+
+def test_similarity_fallback_to_search_results():
+    """매칭 실패시 전체 검색 결과에서 유사도 분석"""
+    search_results = [
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=123",
+            "title": "실버 링 악세서리"
+        },
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=456",
+            "title": "골드 반지 여성"
+        }
+    ]
+
+    body = {
+        "job_id": 2,
+        "schedule_id": 2,
+        "schedule_his_id": 2,
+        "keyword": "반지",
+        "matched_products": [],  # 매칭된 상품 없음
+        "search_results": search_results  # 폴백용
+    }
+
+    response = client.post("/product/similarity", json=body)
+    print(f"Fallback Response: {response.json()}")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["status"] == "success"
+
+    # 폴백 모드에서는 임계값을 통과한 경우에만 상품이 선택됨
+    if data["selected_product"]:
+        assert "similarity_info" in data["selected_product"]
+        assert data["selected_product"]["similarity_info"]["analysis_mode"] == "fallback_similarity_only"
+
+
+def test_similarity_single_candidate():
+    """후보가 1개만 있는 경우"""
+    single_product = [
+        {
+            "url": "https://ssadagu.kr/shop/view.php?platform=1688&num_iid=123",
+            "title": "925 실버 반지 여성용",
+            "match_info": {
+                "match_type": "exact",
+                "match_score": 1.0
+            }
+        }
+    ]
+
+    body = {
+        "job_id": 3,
+        "schedule_id": 3,
+        "schedule_his_id": 3,
+        "keyword": "반지",
+        "matched_products": single_product
+    }
+
+    response = client.post("/product/similarity", json=body)
+    print(f"Single candidate response: {response.json()}")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["selected_product"] is not None
+    assert data["selected_product"]["similarity_info"]["analysis_type"] == "single_candidate"
+
+
+def test_similarity_no_candidates():
+    """후보가 없는 경우"""
+    body = {
+        "job_id": 4,
+        "schedule_id": 4,
+        "schedule_his_id": 4,
+        "keyword": "반지",
+        "matched_products": [],
+        "search_results": []
+    }
+
+    response = client.post("/product/similarity", json=body)
+    print(f"No candidates response: {response.json()}")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["selected_product"] is None
+    assert "검색 결과가 모두 없음" in data["reason"]
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/utils/__init__.py b/apps/pre-processing-service/app/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/apps/pre-processing-service/app/utils/crawler_utils.py b/apps/pre-processing-service/app/utils/crawler_utils.py
new file mode 100644
index 00000000..8246788a
--- /dev/null
+++ b/apps/pre-processing-service/app/utils/crawler_utils.py
@@ -0,0 +1,340 @@
+import urllib.parse
+import httpx
+import re
+import time
+from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.common.exceptions import TimeoutException, NoSuchElementException
+
+
+class SearchCrawler:
+    def __init__(self, use_selenium=True):
+        self.base_url = "https://ssadagu.kr"
+        self.use_selenium = use_selenium
+
+        if use_selenium:
+            self._setup_selenium()
+        else:
+            self._setup_httpx()
+
+    def _setup_selenium(self):
+        """Selenium WebDriver 초기화"""
+        chrome_options = Options()
+        chrome_options.add_argument('--headless')
+        chrome_options.add_argument('--no-sandbox')
+        chrome_options.add_argument('--disable-dev-shm-usage')
+        chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_argument('--window-size=1920,1080')
+        chrome_options.add_argument(
+            '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+        )
+
+        try:
+            self.driver = webdriver.Chrome(options=chrome_options)
+            self.wait = WebDriverWait(self.driver, 10)
+            print("Selenium WebDriver 초기화 완료")
+        except Exception as e:
+            print(f"Selenium 초기화 실패, httpx로 대체: {e}")
+            self.use_selenium = False
+            self._setup_httpx()
+
+    def _setup_httpx(self):
+        """httpx 클라이언트 초기화"""
+        self.client = httpx.AsyncClient(
+            headers={
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+            },
+            timeout=30.0
+        )
+
+    async def search_products_selenium(self, keyword: str) -> list[dict]:
+        """Selenium을 사용한 상품 검색"""
+        encoded_keyword = urllib.parse.quote(keyword)
+        search_url = f"{self.base_url}/shop/search.php?ss_tx={encoded_keyword}"
+
+        try:
+            self.driver.get(search_url)
+            time.sleep(5)
+
+            product_links = []
+            link_elements = self.driver.find_elements(By.TAG_NAME, "a")
+
+            for element in link_elements:
+                href = element.get_attribute('href')
+                if href and 'view.php' in href and ('platform=1688' in href or 'num_iid' in href):
+                    try:
+                        title = element.get_attribute('title') or element.text.strip()
+                        if title:
+                            product_links.append({
+                                'url': href,
+                                'title': title
+                            })
+                    except:
+                        product_links.append({
+                            'url': href,
+                            'title': 'Unknown Title'
+                        })
+
+            # 중복 제거
+            seen_urls = set()
+            unique_products = []
+            for product in product_links:
+                if product['url'] not in seen_urls:
+                    seen_urls.add(product['url'])
+                    unique_products.append(product)
+
+            print(f"Selenium으로 발견한 상품 링크: {len(unique_products)}개")
+            return unique_products[:20]
+
+        except Exception as e:
+            print(f"Selenium 검색 오류: {e}")
+            return []
+
+    async def search_products_httpx(self, keyword: str) -> list[dict]:
+        """httpx를 사용한 상품 검색"""
+        encoded_keyword = urllib.parse.quote(keyword)
+        search_url = f"{self.base_url}/shop/search.php?ss_tx={encoded_keyword}"
+
+        try:
+            response = await self.client.get(search_url)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.content, 'html.parser')
+
+            product_links = []
+            all_links = soup.find_all('a', href=True)
+
+            for link in all_links:
+                href = link['href']
+                if 'view.php' in href and ('platform=1688' in href or 'num_iid' in href):
+                    full_url = f"{self.base_url}{href}" if href.startswith('/') else href
+                    title = link.get('title', '') or link.get_text(strip=True) or 'Unknown Title'
+
+                    product_links.append({
+                        'url': full_url,
+                        'title': title
+                    })
+
+            print(f"httpx로 발견한 상품 링크: {len(product_links)}개")
+            return product_links[:20]
+
+        except Exception as e:
+            print(f"httpx 검색 오류: {e}")
+            return []
+
+    async def get_basic_product_info(self, product_url: str) -> dict:
+        """기본 상품 정보만 크롤링"""
+        try:
+            if self.use_selenium:
+                self.driver.get(product_url)
+                self.wait.until(lambda driver: driver.execute_script("return document.readyState") == "complete")
+                soup = BeautifulSoup(self.driver.page_source, 'html.parser')
+            else:
+                response = await self.client.get(product_url)
+                response.raise_for_status()
+                soup = BeautifulSoup(response.content, 'html.parser')
+
+            title_element = soup.find('h1', {'id': 'kakaotitle'})
+            title = title_element.get_text(strip=True) if title_element else "제목 없음"
+
+            return {
+                'url': product_url,
+                'title': title
+            }
+
+        except Exception as e:
+            print(f"기본 상품 크롤링 오류 ({product_url}): {e}")
+            return None
+
+    async def close(self):
+        """리소스 정리"""
+        if self.use_selenium and hasattr(self, 'driver'):
+            try:
+                self.driver.quit()
+            except Exception:
+                pass
+        elif hasattr(self, 'client'):
+            try:
+                await self.client.aclose()
+            except Exception:
+                pass
+
+
+class DetailCrawler(SearchCrawler):
+    """SearchCrawler를 확장한 상세 크롤링 클래스"""
+
+    async def crawl_detail(self, product_url: str, include_images: bool = False) -> dict:
+        """상품 상세 정보 크롤링"""
+        try:
+            if self.use_selenium:
+                soup = await self._get_soup_selenium(product_url)
+            else:
+                soup = await self._get_soup_httpx(product_url)
+
+            # 기본 정보 추출
+            title = self._extract_title(soup)
+            price = self._extract_price(soup)
+            rating = self._extract_rating(soup)
+            options = self._extract_options(soup)
+            material_info = self._extract_material_info(soup)
+
+            product_data = {
+                'url': product_url,
+                'title': title,
+                'price': price,
+                'rating': rating,
+                'options': options,
+                'material_info': material_info,
+                'crawled_at': time.strftime('%Y-%m-%d %H:%M:%S')
+            }
+
+            if include_images:
+                print("이미지 정보 추출 중...")
+                product_images = self._extract_images(soup)
+                product_data['product_images'] = [{'original_url': img_url} for img_url in product_images]
+                print(f"추출된 이미지: {len(product_images)}개")
+            else:
+                product_data['product_images'] = []
+
+            return product_data
+
+        except Exception as e:
+            print(f"크롤링 오류: {e}")
+            raise Exception(f"크롤링 실패: {str(e)}")
+
+    async def _get_soup_selenium(self, product_url: str) -> BeautifulSoup:
+        """Selenium으로 HTML 가져오기"""
+        try:
+            self.driver.get(product_url)
+            self.wait.until(lambda driver: driver.execute_script("return document.readyState") == "complete")
+            time.sleep(2)
+            return BeautifulSoup(self.driver.page_source, 'html.parser')
+        except Exception as e:
+            raise Exception(f"Selenium HTML 로딩 실패: {e}")
+
+    async def _get_soup_httpx(self, product_url: str) -> BeautifulSoup:
+        """httpx로 HTML 가져오기"""
+        try:
+            response = await self.client.get(product_url)
+            response.raise_for_status()
+            return BeautifulSoup(response.content, 'html.parser')
+        except Exception as e:
+            raise Exception(f"HTTP 요청 실패: {e}")
+
+    def _extract_title(self, soup: BeautifulSoup) -> str:
+        """제목 추출"""
+        title_element = soup.find('h1', {'id': 'kakaotitle'})
+        return title_element.get_text(strip=True) if title_element else "제목 없음"
+
+    def _extract_price(self, soup: BeautifulSoup) -> int:
+        """가격 추출"""
+        price = 0
+        price_selectors = [
+            'span.price.gsItemPriceKWR',
+            '.pdt_price span.price',
+            'span.price',
+            '.price'
+        ]
+
+        for selector in price_selectors:
+            price_element = soup.select_one(selector)
+            if price_element:
+                price_text = price_element.get_text(strip=True).replace(',', '').replace('원', '')
+                price_match = re.search(r'(\d+)', price_text)
+                if price_match:
+                    price = int(price_match.group(1))
+                    break
+        return price
+
+    def _extract_rating(self, soup: BeautifulSoup) -> float:
+        """평점 추출"""
+        rating = 0.0
+        star_containers = [
+            soup.find('a', class_='start'),
+            soup.find('div', class_=re.compile(r'star|rating')),
+            soup.find('a', href='#reviews_wrap')
+        ]
+
+        for container in star_containers:
+            if container:
+                star_imgs = container.find_all('img')
+                for img in star_imgs:
+                    src = img.get('src', '')
+                    if 'icon_star.svg' in src:
+                        rating += 1
+                    elif 'icon_star_half.svg' in src:
+                        rating += 0.5
+                break
+        return rating
+
+    def _extract_options(self, soup: BeautifulSoup) -> list[dict]:
+        """상품 옵션 추출"""
+        options = []
+        sku_list = soup.find('ul', {'id': 'skubox'})
+
+        if sku_list:
+            option_items = sku_list.find_all('li', class_=re.compile(r'imgWrapper'))
+            for item in option_items:
+                title_element = item.find('a', title=True)
+                if title_element:
+                    option_name = title_element.get('title', '').strip()
+
+                    # 재고 정보 추출
+                    stock = 0
+                    item_text = item.get_text()
+                    stock_match = re.search(r'재고\s*:\s*(\d+)', item_text)
+                    if stock_match:
+                        stock = int(stock_match.group(1))
+
+                    # 이미지 URL 추출
+                    img_element = item.find('img', class_='colorSpec_hashPic')
+                    image_url = ""
+                    if img_element and img_element.get('src'):
+                        image_url = img_element['src']
+
+                    if option_name:
+                        options.append({
+                            'name': option_name,
+                            'stock': stock,
+                            'image_url': image_url
+                        })
+
+        return options
+
+    def _extract_material_info(self, soup: BeautifulSoup) -> dict:
+        """소재 정보 추출"""
+        material_info = {}
+        info_items = soup.find_all('div', class_='pro-info-item')
+
+        for item in info_items:
+            title_element = item.find('div', class_='pro-info-title')
+            info_element = item.find('div', class_='pro-info-info')
+
+            if title_element and info_element:
+                title = title_element.get_text(strip=True)
+                info = info_element.get_text(strip=True)
+                material_info[title] = info
+
+        return material_info
+
+    def _extract_images(self, soup: BeautifulSoup) -> list[str]:
+        """상품 이미지 추출"""
+        images = []
+        img_elements = soup.find_all('img', {'id': re.compile(r'img_translate_\d+')})
+
+        for img in img_elements:
+            src = img.get('src', '')
+            if src:
+                if src.startswith('//'):
+                    src = 'https:' + src
+                elif src.startswith('/'):
+                    src = self.base_url + src
+                elif src.startswith('http'):
+                    pass
+                else:
+                    continue
+                images.append(src)
+
+        return images
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/utils/keyword_matcher.py b/apps/pre-processing-service/app/utils/keyword_matcher.py
new file mode 100644
index 00000000..8fab2730
--- /dev/null
+++ b/apps/pre-processing-service/app/utils/keyword_matcher.py
@@ -0,0 +1,148 @@
+from app.core.config import settings  # pydantic_settings 기반
+
+try:
+    import MeCab
+
+    print("MeCab 라이브러리 로딩 성공")
+    MECAB_AVAILABLE = True
+except ImportError:
+    print("MeCab 라이브러리를 찾을 수 없습니다. pip install mecab-python3 를 실행해주세요.")
+    MeCab = None
+    MECAB_AVAILABLE = False
+
+
+class KeywordMatcher:
+    """키워드 매칭 분석기"""
+
+    def __init__(self):
+        self.konlpy_available = False
+
+        # MeCab 사용 가능 여부 확인
+        if MECAB_AVAILABLE:
+            try:
+                # 경로가 있으면 사용, 없으면 기본값
+                if settings.mecab_path:
+                    self.mecab = MeCab.Tagger(f"-d {settings.mecab_path}")
+                else:
+                    self.mecab = MeCab.Tagger()  # 기본 경로
+
+                # 테스트 실행
+                test_result = self.mecab.parse("테스트")
+                if test_result and test_result.strip():
+                    self.konlpy_available = True
+                    print(f"MeCab 형태소 분석기 사용 가능 (경로: {settings.mecab_path or '기본'})")
+                else:
+                    print("MeCab 테스트 실패")
+            except Exception as e:
+                print(f"MeCab 사용 불가 (규칙 기반으로 대체): {e}")
+        else:
+            print("MeCab 라이브러리가 설치되지 않았습니다. 규칙 기반으로 대체합니다.")
+
+    def analyze_keyword_match(self, title: str, keyword: str) -> dict:
+        """키워드 매칭 분석 결과 반환"""
+        title_lower = title.lower().strip()
+        keyword_lower = keyword.lower().strip()
+
+        # 1. 완전 포함 검사
+        exact_match = keyword_lower in title_lower
+        if exact_match:
+            return {
+                'is_match': True,
+                'match_type': 'exact',
+                'score': 1.0,
+                'reason': f"완전 포함: '{keyword}' in '{title[:50]}'"
+            }
+
+        # 2. 형태소 분석 (MeCab 사용)
+        if self.konlpy_available:
+            morphological_result = self._morphological_match(title_lower, keyword_lower)
+            if morphological_result['is_match']:
+                return morphological_result
+
+        # 3. 규칙 기반 분석 (MeCab 실패시)
+        simple_result = self._simple_keyword_match(title_lower, keyword_lower)
+        return simple_result
+
+    def _morphological_match(self, title: str, keyword: str) -> dict:
+        """형태소 분석 기반 매칭"""
+        try:
+            # 키워드 형태소 분석
+            keyword_result = self.mecab.parse(keyword)
+            keyword_morphs = []
+            for line in keyword_result.split('\n'):
+                if line == 'EOS' or line == '':
+                    continue
+                parts = line.split('\t')
+                if len(parts) >= 1:
+                    morph = parts[0].strip()
+                    if len(morph) >= 1:
+                        keyword_morphs.append(morph)
+
+            # 제목 형태소 분석
+            title_result = self.mecab.parse(title)
+            title_morphs = []
+            for line in title_result.split('\n'):
+                if line == 'EOS' or line == '':
+                    continue
+                parts = line.split('\t')
+                if len(parts) >= 1:
+                    morph = parts[0].strip()
+                    if len(morph) >= 1:
+                        title_morphs.append(morph)
+
+            # 형태소 매칭
+            matched = 0
+            for kw in keyword_morphs:
+                if len(kw) >= 2:  # 의미있는 형태소만 검사
+                    for tw in title_morphs:
+                        if kw == tw or kw in tw or tw in kw:
+                            matched += 1
+                            break
+
+            match_ratio = matched / len(keyword_morphs) if keyword_morphs else 0
+            threshold = 0.4
+
+            if match_ratio >= threshold:
+                return {
+                    'is_match': True,
+                    'match_type': 'morphological',
+                    'score': match_ratio,
+                    'reason': f"형태소 매칭: {matched}/{len(keyword_morphs)} = {match_ratio:.3f}"
+                }
+
+        except Exception as e:
+            print(f"형태소 분석 오류: {e}")
+
+        return {'is_match': False, 'match_type': 'morphological', 'score': 0.0, 'reason': '형태소 분석 실패'}
+
+    def _simple_keyword_match(self, title: str, keyword: str) -> dict:
+        """간단한 키워드 매칭"""
+        # 공백으로 분리
+        title_words = title.split()
+        keyword_words = keyword.split()
+
+        matched = 0
+        for kw in keyword_words:
+            if len(kw) >= 2:
+                for tw in title_words:
+                    if kw in tw or tw in kw:
+                        matched += 1
+                        break
+
+        match_ratio = matched / len(keyword_words) if keyword_words else 0
+        threshold = 0.3
+
+        if match_ratio >= threshold:
+            return {
+                'is_match': True,
+                'match_type': 'simple',
+                'score': match_ratio,
+                'reason': f"규칙 기반 매칭: {matched}/{len(keyword_words)} = {match_ratio:.3f}"
+            }
+
+        return {
+            'is_match': False,
+            'match_type': 'simple',
+            'score': match_ratio,
+            'reason': f"규칙 기반 미달: {matched}/{len(keyword_words)} = {match_ratio:.3f} < {threshold}"
+        }
\ No newline at end of file
diff --git a/apps/pre-processing-service/app/utils/similarity_analyzer.py b/apps/pre-processing-service/app/utils/similarity_analyzer.py
new file mode 100644
index 00000000..d155ee2e
--- /dev/null
+++ b/apps/pre-processing-service/app/utils/similarity_analyzer.py
@@ -0,0 +1,65 @@
+import torch
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+from transformers import AutoTokenizer, AutoModel
+
+
+class SimilarityAnalyzer:
+    """텍스트 유사도 분석기"""
+
+    def __init__(self):
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained('klue/bert-base')
+            self.model = AutoModel.from_pretrained('klue/bert-base')
+            print("KLUE BERT 모델 로딩 성공")
+        except Exception as e:
+            print(f"KLUE BERT 로딩 실패, 다국어 BERT로 대체: {e}")
+            try:
+                self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')
+                self.model = AutoModel.from_pretrained('bert-base-multilingual-cased')
+                print("다국어 BERT 모델 로딩 성공")
+            except Exception as e2:
+                print(f"모든 BERT 모델 로딩 실패: {e2}")
+                raise e2
+
+    def get_embedding(self, text: str) -> np.ndarray:
+        """텍스트 임베딩 생성"""
+        inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=128)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        return outputs.last_hidden_state[:, 0, :].numpy()
+
+    def calculate_similarity(self, text1: str, text2: str) -> float:
+        """두 텍스트 간 유사도 계산"""
+        embedding1 = self.get_embedding(text1)
+        embedding2 = self.get_embedding(text2)
+        return cosine_similarity(embedding1, embedding2)[0][0]
+
+    def analyze_similarity_batch(self, keyword: str, product_titles: list[str]) -> list[dict]:
+        """배치로 유사도 분석"""
+        keyword_embedding = self.get_embedding(keyword)
+        results = []
+
+        for i, title in enumerate(product_titles):
+            try:
+                title_embedding = self.get_embedding(title)
+                similarity = cosine_similarity(keyword_embedding, title_embedding)[0][0]
+
+                results.append({
+                    'index': i,
+                    'title': title,
+                    'similarity': float(similarity),
+                    'score': float(similarity)
+                })
+            except Exception as e:
+                print(f"유사도 계산 오류 (제목: {title[:30]}): {e}")
+                results.append({
+                    'index': i,
+                    'title': title,
+                    'similarity': 0.0,
+                    'score': 0.0
+                })
+
+        # 유사도 기준 내림차순 정렬
+        results.sort(key=lambda x: x['similarity'], reverse=True)
+        return results
\ No newline at end of file
diff --git a/apps/pre-processing-service/pyproject.toml b/apps/pre-processing-service/pyproject.toml
index 35b2e563..af7d2124 100644
--- a/apps/pre-processing-service/pyproject.toml
+++ b/apps/pre-processing-service/pyproject.toml
@@ -26,12 +26,9 @@ dependencies = [
     "scikit-learn (>=1.7.1,<2.0.0)",
     "python-dotenv (>=1.1.1,<2.0.0)",
     "mecab-python3 (>=1.0.10,<2.0.0)",
-    "python-mecab-ko (>=1.3.7,<2.0.0)",
-    "python-mecab-ko-dic (>=2.1.1.post2,<3.0.0)",
     "httpx (>=0.28.1,<0.29.0)",
     "asyncpg (>=0.30.0,<0.31.0)",
     "gunicorn (>=23.0.0,<24.0.0)",
-    "httpx (>=0.28.1,<0.29.0)"
 ]