Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/pre-processing-service/app/api/endpoints/product.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ async def search(request: RequestSadaguSearch):
"""
상품 검색 엔드포인트
"""
return search_products(request)
return await search_products(request)

@router.post("/match", response_model=ResponseSadaguMatch)
async def match(request: RequestSadaguMatch):
Expand Down
74 changes: 71 additions & 3 deletions apps/pre-processing-service/app/core/config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,65 @@
# pydantic_settings에서 SettingsConfigDict를 추가로 import 합니다.
from pydantic_settings import BaseSettings, SettingsConfigDict
import os
import platform
import subprocess
from typing import Optional


def detect_mecab_dicdir() -> Optional[str]:
"""MeCab 사전 경로 자동 감지"""

# 1. mecab-config 명령어로 사전 경로 확인 (가장 정확한 방법)
try:
result = subprocess.run(['mecab-config', '--dicdir'],
capture_output=True, text=True, timeout=5)
if result.returncode == 0:
dicdir = result.stdout.strip()
if os.path.exists(dicdir):
print(f"mecab-config에서 사전 경로 발견: {dicdir}")
return dicdir
except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
pass

# 2. 플랫폼별 일반적인 경로들 확인
system = platform.system().lower()

if system == "darwin": # macOS
candidate_paths = [
"/opt/homebrew/lib/mecab/dic/mecab-ko-dic", # Apple Silicon
"/usr/local/lib/mecab/dic/mecab-ko-dic", # Intel Mac
"/opt/homebrew/lib/mecab/dic/mecab-ipadic", # 기본 사전
"/usr/local/lib/mecab/dic/mecab-ipadic"
]
elif system == "linux":
candidate_paths = [
"/usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ko-dic",
"/usr/lib/mecab/dic/mecab-ko-dic",
"/usr/local/lib/mecab/dic/mecab-ko-dic",
"/usr/share/mecab/dic/mecab-ko-dic",
"/usr/lib/mecab/dic/mecab-ipadic",
"/usr/local/lib/mecab/dic/mecab-ipadic"
]
elif system == "windows":
candidate_paths = [
"C:/Program Files/MeCab/dic/mecab-ko-dic",
"C:/mecab/dic/mecab-ko-dic",
"C:/Program Files/MeCab/dic/mecab-ipadic"
]
else:
candidate_paths = []

# 경로 존재 여부 확인
for path in candidate_paths:
if os.path.exists(path):
# dicrc 파일 존재 확인 (실제 사전인지 검증)
dicrc_path = os.path.join(path, "dicrc")
if os.path.exists(dicrc_path):
print(f"플랫폼 기본 경로에서 사전 발견: {path}")
return path

return None

# 공통 설정을 위한 BaseSettings
class BaseSettingsConfig(BaseSettings):

Expand All @@ -13,7 +69,19 @@ class BaseSettingsConfig(BaseSettings):
db_user: str
db_pass: str
db_name: str
env_name: str = "dev"
env_name: str = ".dev"

# MeCab 사전 경로 (자동 감지)
mecab_path: Optional[str] = None

def __init__(self, **kwargs):
super().__init__(**kwargs)

# mecab_path가 설정되지 않았으면 자동 감지
if not self.mecab_path:
self.mecab_path = detect_mecab_dicdir()
if not self.mecab_path:
print("MeCab 사전 경로를 찾을 수 없어 기본 설정으로 실행합니다.")

@property
def db_url(self) -> str:
Expand All @@ -25,11 +93,11 @@ def db_url(self) -> str:

# 환경별 설정 클래스
class DevSettings(BaseSettingsConfig):
model_config = SettingsConfigDict(env_file=['.env', 'dev.env'])
model_config = SettingsConfigDict(env_file=['.env', '.dev.env'])


class PrdSettings(BaseSettingsConfig):
model_config = SettingsConfigDict(env_file=['.env', 'prd.env'])
model_config = SettingsConfigDict(env_file=['.env', '.prd.env'])

def get_settings() -> BaseSettingsConfig:
"""환경 변수에 따라 적절한 설정 객체를 반환하는 함수"""
Expand Down
63 changes: 31 additions & 32 deletions apps/pre-processing-service/app/model/schemas.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,94 @@
from datetime import datetime
from typing import Optional, List, Dict
from typing import Optional, List, Dict, Union
from pydantic import BaseModel, Field, HttpUrl

#기본 요청
# 기본 요청
class RequestBase(BaseModel):
job_id: int
schedule_id: int
sschdule_his_id: Optional[int] = None
schedule_his_id: Optional[int] = None

#기본 응답
# 기본 응답
class ResponseBase(BaseModel):
job_id: int
schedule_id: int
sschdule_his_id : Optional[int] = None
schedule_his_id: Optional[int] = None
status: str


#네이버 키워드 추출
# 네이버 키워드 추출
class RequestNaverSearch(RequestBase):
tag: str
category: Optional[str] = None
start_date : Optional[str] = None
end_date : Optional[str] = None
start_date: Optional[str] = None
end_date: Optional[str] = None

class ResponseNaverSearch(ResponseBase):
category: Optional[str] = None
keyword: str
total_keyword: dict[int, str]

# #키워드 사다구몰 검증
# class RequestSadaguValidate(RequestBase):
# tag: str
# category: str
#
# class ResponsetSadaguValidate(ResponseBase):
# keyword: str
total_keyword: Dict[int, str]

# 2단계: 검색
class RequestSadaguSearch(RequestBase):
keyword: str

class ResponseSadaguSearch(ResponseBase):
keyword: str
search_results: list[dict]
search_results: List[Dict]

# 3단계: 매칭
class RequestSadaguMatch(RequestBase):
keyword: str
search_results: list[dict]
search_results: List[Dict]

class ResponseSadaguMatch(ResponseBase):
keyword: str
matched_products: list[dict]
matched_products: List[Dict]

# 4단계: 유사도
class RequestSadaguSimilarity(RequestBase):
keyword: str
matched_products: list[dict]
matched_products: List[Dict]
search_results: Optional[List[Dict]] = None # 3단계에서 매칭 실패시 폴백용

class ResponseSadaguSimilarity(ResponseBase):
keyword: str
selected_product: dict | None = None
reason: str | None = None

#사다구몰 크롤링
class RequestSadaguCrawl(RequestBase):
selected_product: Optional[Dict] = None
reason: Optional[str] = None

# 사다구몰 크롤링
class RequestSadaguCrawl(BaseModel):
job_id: int = Field(..., description="작업 ID")
schedule_id: int = Field(..., description="스케줄 ID")
schedule_his_id: int = Field(..., description="스케줄 히스토리 ID")
tag: str = Field(..., description="크롤링 태그 (예: 'detail')")
product_url: HttpUrl = Field(..., description="크롤링할 상품의 URL")
use_selenium: bool = Field(default=True, description="Selenium 사용 여부")
include_images: bool = Field(default=False, description="이미지 정보 포함 여부")

class ResponseSadaguCrawl(ResponseBase):
class ResponseSadaguCrawl(BaseModel):
job_id: int
schedule_id: int
schedule_his_id: int
tag: str
product_url: str
use_selenium: bool
include_images: bool
product_detail: Optional[dict] = None
product_detail: Optional[Dict] = None
status: str
crawled_at: Optional[str] = None

#블로그 생성
# 블로그 생성
class RequestBlogCreate(RequestBase):
tag: str
category: str

class ResponseBlogCreate(ResponseBase):
pass

#블로그 배포
# 블로그 배포
class RequestBlogPublish(RequestBase):
tag: str
category: str

class ResponseBlogPublish(ResponseBase):
pass
pass
Loading
Loading