1010import re
1111import subprocess
1212import sys
13+ import shutil
1314from pathlib import Path
1415
1516from setuptools import Extension , find_packages , setup
1617from setuptools .command .build_ext import build_ext
18+ from setuptools .command .build_py import build_py as build_py_orig
1719
1820# Read the README file
1921with open ("README.md" , "r" ) as f :
@@ -126,19 +128,52 @@ def build_extension(self, ext): # noqa C901
126128 )
127129
128130
131+ class BuildPy (build_py_orig ):
132+ """Ensure header files are copied into the package during build."""
133+
134+ def run (self ):
135+ super ().run ()
136+ headers_src = Path ("include" )
137+ if not headers_src .exists ():
138+ return
139+
140+ headers_dst = Path (self .build_lib ) / "pytorch_tokenizers" / "include"
141+ for file_path in headers_src .rglob ("*" ):
142+ if file_path .is_file ():
143+ destination = headers_dst / file_path .relative_to (headers_src )
144+ destination .parent .mkdir (parents = True , exist_ok = True )
145+ shutil .copy2 (file_path , destination )
146+
147+
129148setup (
130149 name = "pytorch-tokenizers" ,
131- version = "0. 1.0" ,
150+ version = "1.0.1 " ,
132151 long_description = long_description ,
133152 long_description_content_type = "text/markdown" ,
134153 url = "https://github.com/meta-pytorch/tokenizers" ,
135154 packages = find_packages (),
155+ include_package_data = True ,
156+ package_data = {
157+ "pytorch_tokenizers" : [
158+ "include/*.h" ,
159+ "include/**/*.h" ,
160+ "include/*.hpp" ,
161+ "include/**/*.hpp" ,
162+ ]
163+ },
136164 ext_modules = [CMakeExtension ("pytorch_tokenizers.pytorch_tokenizers_cpp" )],
137- cmdclass = {"build_ext" : CMakeBuild },
165+ cmdclass = {
166+ "build_ext" : CMakeBuild ,
167+ "build_py" : BuildPy ,
168+ },
138169 zip_safe = False ,
139170 python_requires = ">=3.10" ,
140171 install_requires = [
141172 "pybind11>=2.6.0" ,
173+ "sentencepiece" ,
174+ "mistral-common" ,
175+ "tokenizers" ,
176+ "tiktoken" ,
142177 ],
143178 setup_requires = [
144179 "pybind11>=2.6.0" ,
@@ -150,8 +185,6 @@ def build_extension(self, ext): # noqa C901
150185 "License :: OSI Approved :: BSD License" ,
151186 "Operating System :: OS Independent" ,
152187 "Programming Language :: Python :: 3" ,
153- "Programming Language :: Python :: 3.8" ,
154- "Programming Language :: Python :: 3.9" ,
155188 "Programming Language :: Python :: 3.10" ,
156189 "Programming Language :: Python :: 3.11" ,
157190 "Programming Language :: Python :: 3.12" ,
0 commit comments