Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions src/cl_sii/cte/data_models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

from collections.abc import Sequence
from datetime import date
from typing import Optional

import pydantic

Expand Down Expand Up @@ -38,3 +40,49 @@ class LegalRepresentative:
"""
Fecha de incorporación.
"""


@pydantic.dataclasses.dataclass(
frozen=True,
config=pydantic.ConfigDict(
arbitrary_types_allowed=True,
extra='forbid',
),
)
class TaxpayerData:
start_of_activities_date: Optional[date]
"""
Fecha de inicio de actividades.
"""
economic_activities: str
"""
Actividades Económicas
"""
tax_category: str
"""
Categoría Tributaria
"""
address: str
"""
Domicilio
"""
branches: Sequence[str]
"""
Sucursales
"""
last_filed_documents: Sequence[LastFiledDocument]
"""
Últimos documentos timbrados
"""
tax_observations: Optional[str] = None
"""
Observaciones tributarias
"""


@pydantic.dataclasses.dataclass(
frozen=True,
)
class LastFiledDocument:
name: str
date: date
89 changes: 88 additions & 1 deletion src/cl_sii/cte/parsers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import annotations

from datetime import datetime

from bs4 import BeautifulSoup

from .data_models import LegalRepresentative, TaxpayerProvidedInfo
from .data_models import LastFiledDocument, LegalRepresentative, TaxpayerData, TaxpayerProvidedInfo


def parse_taxpayer_provided_info(html_content: str) -> TaxpayerProvidedInfo:
Expand Down Expand Up @@ -89,3 +91,88 @@ def parse_taxpayer_provided_info(html_content: str) -> TaxpayerProvidedInfo:
company_formation=company_formation,
participation_in_existing_companies=participation_in_companies,
)


def parse_taxpayer_data(html_content: str) -> TaxpayerData:
"""
Parse the CTE HTML content to extract the content of the section:
"Datos del Contribuyente"

Args:
html_content: HTML string containing the taxpayer information table

Returns:
TaxpayerData instance with the parsed data
"""
soup = BeautifulSoup(html_content, 'html.parser')
table = soup.find('table', id='tbl_dbcontribuyente')
if not table:
raise ValueError("Could not find 'Datos del Contribuyente' table in HTML")

fecha_inicio_elem = table.find(id='td_fecha_inicio') # type: ignore[attr-defined]
if fecha_inicio_elem:
start_of_activities_date = (
datetime.strptime(fecha_inicio_elem.get_text(strip=True), "%d-%m-%Y").date()
if fecha_inicio_elem.get_text(strip=True)
else None
)
else:
start_of_activities_date = None

actividades_elem = table.find(id='td_actividades') # type: ignore[attr-defined]
if actividades_elem:
economic_activities = actividades_elem.get_text(separator="\n", strip=True)
else:
economic_activities = ""

categoria_elem = table.find(id='td_categoria') # type: ignore[attr-defined]
if categoria_elem:
tax_category = categoria_elem.get_text(strip=True)
else:
tax_category = ""

domicilio_elem = table.find(id='td_domicilio') # type: ignore[attr-defined]
if domicilio_elem:
address = domicilio_elem.get_text(strip=True)
else:
address = ""

# Sucursales
branches = []
sucursales_row = table.find( # type: ignore[attr-defined]
'td',
string=lambda s: s and 'Sucursales:' in s,
)
if sucursales_row:
sucursales_td = sucursales_row.find_next_sibling('td')
if sucursales_td:
branches_text = sucursales_td.get_text(separator="\n", strip=True)
branches = [b for b in branches_text.split("\n") if b]

# Últimos documentos timbrados
last_filed_documents = []
tim_nombre_elem = table.find(id='td_tim_nombre') # type: ignore[attr-defined]
tim_fecha_elem = table.find(id='td_tim_fecha') # type: ignore[attr-defined]
if tim_nombre_elem and tim_fecha_elem:
names = tim_nombre_elem.get_text(separator="\n", strip=True).split("\n")
dates = tim_fecha_elem.get_text(separator="\n", strip=True).split("\n")
for name, date_str in zip(names, dates):
if name and date_str:
doc_date = datetime.strptime(date_str, "%d-%m-%Y").date()
last_filed_documents.append(LastFiledDocument(name=name, date=doc_date))

# Observaciones tributarias
tax_observations = None
observaciones_elem = table.find(id='td_observaciones') # type: ignore[attr-defined]
if observaciones_elem:
tax_observations = observaciones_elem.get_text(strip=True)

return TaxpayerData(
start_of_activities_date=start_of_activities_date,
economic_activities=economic_activities,
tax_category=tax_category,
address=address,
branches=branches,
last_filed_documents=last_filed_documents,
tax_observations=tax_observations,
)
56 changes: 56 additions & 0 deletions src/tests/test_cte_parsers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from datetime import date
from unittest import TestCase

from cl_sii.cte import data_models, parsers
Expand Down Expand Up @@ -49,3 +50,58 @@ def test_parse_taxpayer_provided_info(self) -> None:
assert_raises_cm.exception.args,
("Could not find taxpayer information table in HTML",),
)

def test_parse_taxpayer_data(self) -> None:
html_content = read_test_file_str_utf8('test_data/sii-cte/cte_empty_f29.html')
with self.subTest("Parsing ok"):
result = parsers.parse_taxpayer_data(html_content)
expected_obj = data_models.TaxpayerData(
start_of_activities_date=date(2023, 11, 15),
economic_activities=(
"SERVICIOS DE ASESORIA Y CONSULTORIA EN MATERIA DE ADMINISTRACION DE EMPRESAS "
"Y OTROS SERVICIOS DE ASESORIA ADMINISTRATIVA Y DE NEGOCIOS N.C.P.\n"
"ACTIVIDADES DE OTRAS ORGANIZACIONES EMPRESARIALES N.C.P.\n"
"OTRAS ACTIVIDADES DE SERVICIOS PERSONALES N.C.P."
),
tax_category="Primera categoría",
address="AV REAL, LAS CONDES",
branches=[],
last_filed_documents=[
data_models.LastFiledDocument(
name="FACTURA ELECTRONICA", date=date(2025, 7, 24)
),
data_models.LastFiledDocument(
name="FACTURA NO AFECTA O EXENTA ELECTRONICA", date=date(2025, 7, 17)
),
data_models.LastFiledDocument(
name="GUIA DESPACHO ELECTRONICA", date=date(2025, 5, 14)
),
data_models.LastFiledDocument(
name="NOTA CREDITO ELECTRONICA", date=date(2025, 7, 18)
),
],
tax_observations="No tiene observaciones.",
)
self.assertEqual(result, expected_obj)

with self.subTest("Parsing empty content"):
with self.assertRaises(ValueError) as assert_raises_cm:
parsers.parse_taxpayer_data("")
self.assertEqual(
assert_raises_cm.exception.args,
("Could not find 'Datos del Contribuyente' table in HTML",),
)

with self.subTest("Parsing content with empty table"):
html_content = read_test_file_str_utf8('test_data/sii-cte/cte_empty_table.html')
result = parsers.parse_taxpayer_data(html_content)
expected_obj = data_models.TaxpayerData(
start_of_activities_date=None,
economic_activities="",
tax_category="",
address="",
branches=[],
last_filed_documents=[],
tax_observations=None,
)
self.assertEqual(result, expected_obj)
Loading
Loading