@@ -94,76 +94,83 @@ type modelfile struct {
94
94
quantization string
95
95
}
96
96
97
- // File type patterns, ignore the case of the file extensions
97
+ // Config file patterns - supported configuration and resource files
98
98
var (
99
- // Config file patterns
100
99
configFilePatterns = []string {
101
- // Common config files
102
- "*.json" ,
103
- "*.jsonl" ,
104
- "*.yaml" ,
105
- "*.yml" ,
106
- "*.toml" ,
107
- "*.txt" ,
108
- "*.config" ,
109
- "*.modelcard" ,
110
- "*.meta" ,
111
- "*.ini" ,
112
-
113
- // Common doc files
114
- "*.md" ,
115
- "LICENSE*" ,
116
- "README*" ,
117
- "SETUP*" ,
118
- "*requirements*" ,
119
-
120
- // Image file patterns
121
- "*.jpg" ,
122
- "*.jpeg" ,
123
- "*.png" ,
124
- "*.gif" ,
125
- "*.bmp" ,
126
- "*.tiff" ,
127
- "*.ico" ,
128
-
129
- // Other files
130
- "*tokenizer.model*" , // For mistral tokenizer.model.v3
131
- "config.json.*" ,
132
- }
133
-
134
- // Model file patterns
100
+ // Configuration formats
101
+ "*.json" , // JSON configuration files
102
+ "*.jsonl" , // JSON Lines format
103
+ "*.yaml" , // YAML configuration files
104
+ "*.yml" , // YAML alternative extension
105
+ "*.toml" , // TOML configuration files
106
+ "*.ini" , // INI configuration files
107
+ "*.config" , // Generic config files
108
+ "*.txt" , // Text files
109
+ "*.modelcard" , // Model card metadata
110
+ "*.meta" , // Model metadata
111
+
112
+ // Documentation files
113
+ "*.md" , // Markdown documentation
114
+ "LICENSE*" , // License files
115
+ "README*" , // Project documentation
116
+ "SETUP*" , // Setup instructions
117
+ "*requirements*" , // Dependency specifications
118
+
119
+ // Image assets
120
+ "*.jpg" , // JPEG image format
121
+ "*.jpeg" , // JPEG alternative extension
122
+ "*.png" , // PNG image format
123
+ "*.gif" , // GIF image format
124
+ "*.bmp" , // Bitmap image format
125
+ "*.tiff" , // TIFF image format
126
+ "*.ico" , // Icon format
127
+
128
+ // Model-specific files
129
+ "*tokenizer.model*" , // Tokenizer files (e.g., Mistral v3)
130
+ "config.json.*" , // Model configuration variants
131
+ }
132
+
133
+ // Model file patterns - supported model file extensions
135
134
modelFilePatterns = []string {
136
- "*.bin" ,
137
- "*.safetensors" ,
138
- "*.pt" ,
139
- "*.pth" ,
140
- "*.onnx" ,
141
- "*.gguf" ,
142
- "*.msgpack" ,
143
- "*.tflite" , // tensorflow lite
144
- "*.h5" , // keras
145
- "*.hdf" , // keras
146
- "*.hdf5" , // keras
147
- "*.ot" , // openvino
148
- "*.engine" , // tensorrt
149
- "*.trt" , // tensorrt
150
- }
151
-
152
- // Code file patterns
135
+ // Huggingface formats
136
+ "*.safetensors" , // Safe and efficient tensor serialization format
137
+
138
+ // PyTorch formats
139
+ "*.bin" , // General binary format
140
+ "*.pt" , // PyTorch model
141
+ "*.pth" , // PyTorch model (alternative extension)
142
+
143
+ // TensorFlow formats
144
+ "*.tflite" , // TensorFlow Lite
145
+ "*.h5" , // Keras HDF5 format
146
+ "*.hdf" , // Hierarchical Data Format
147
+ "*.hdf5" , // HDF5 (alternative extension)
148
+
149
+ // Other ML frameworks
150
+ "*.ot" , // OpenVINO format
151
+ "*.engine" , // TensorRT format
152
+ "*.trt" , // TensorRT format (alternative extension)
153
+ "*.onnx" , // Open Neural Network Exchange format
154
+ "*.gguf" , // GGML Universal Format
155
+ "*.msgpack" , // MessagePack serialization
156
+ "*.model" , // Some NLP frameworks
157
+ }
158
+
159
+ // Code file patterns - supported script and notebook files
153
160
codeFilePatterns = []string {
154
- "*.py" ,
155
- "*.sh" ,
156
- "*.ipynb" ,
161
+ "*.py" , // Python source files
162
+ "*.sh" , // Shell scripts
163
+ "*.ipynb" , // Jupyter notebooks
157
164
}
158
165
159
- // Skip files/ directories that match these patterns
166
+ // Skip patterns - files and directories to ignore during processing
160
167
skipPatterns = []string {
161
- ".*" ,
162
- "modelfile" ,
163
- "__pycache__" ,
164
- "*.pyc" ,
165
- "*.pyo" ,
166
- "*.pyd" ,
168
+ ".*" , // Hidden files and directories
169
+ "modelfile" , // Modelfile configuration
170
+ "__pycache__" , // Python bytecode cache directory
171
+ "*.pyc" , // Python compiled bytecode
172
+ "*.pyo" , // Python optimized bytecode
173
+ "*.pyd" , // Python dynamic modules
167
174
}
168
175
)
169
176
0 commit comments