@@ -45,7 +45,6 @@ use crate::utils::load_node_config;
45
45
pub struct IngestArgs {
46
46
pub input_path : Uri ,
47
47
pub input_format : SourceInputFormat ,
48
- pub overwrite : bool ,
49
48
pub vrl_script : Option < String > ,
50
49
pub clear_cache : bool ,
51
50
}
@@ -65,7 +64,6 @@ pub async fn ingest(args: IngestArgs) -> anyhow::Result<IndexingStatistics> {
65
64
& mut metastore,
66
65
& storage_resolver,
67
66
& config. default_index_root_uri ,
68
- args. overwrite ,
69
67
& source_config,
70
68
)
71
69
. await ?;
@@ -123,3 +121,131 @@ pub async fn ingest(args: IngestArgs) -> anyhow::Result<IndexingStatistics> {
123
121
}
124
122
Ok ( statistics)
125
123
}
124
+
125
+ #[ cfg( all( test, feature = "s3-localstack-tests" ) ) ]
126
+ mod tests {
127
+ use std:: path:: PathBuf ;
128
+ use std:: str:: FromStr ;
129
+
130
+ use quickwit_common:: new_coolid;
131
+ use quickwit_storage:: StorageResolver ;
132
+
133
+ use super :: * ;
134
+
135
+ async fn put_object (
136
+ storage_resolver : StorageResolver ,
137
+ bucket : & str ,
138
+ prefix : & str ,
139
+ filename : & str ,
140
+ data : Vec < u8 > ,
141
+ ) -> Uri {
142
+ let src_location = format ! ( "s3://{}/{}" , bucket, prefix) ;
143
+ let storage_uri = Uri :: from_str ( & src_location) . unwrap ( ) ;
144
+ let storage = storage_resolver. resolve ( & storage_uri) . await . unwrap ( ) ;
145
+ storage
146
+ . put ( & PathBuf :: from ( filename) , Box :: new ( data) )
147
+ . await
148
+ . unwrap ( ) ;
149
+ storage_uri. join ( filename) . unwrap ( )
150
+ }
151
+
152
+ #[ tokio:: test]
153
+ async fn test_ingest ( ) -> anyhow:: Result < ( ) > {
154
+ quickwit_common:: setup_logging_for_tests ( ) ;
155
+ let bucket = "quickwit-integration-tests" ;
156
+ let prefix = new_coolid ( "lambda-ingest-test" ) ;
157
+ let storage_resolver = StorageResolver :: unconfigured ( ) ;
158
+
159
+ let index_config = br#"
160
+ version: 0.8
161
+ index_id: lambda-test
162
+ doc_mapping:
163
+ field_mappings:
164
+ - name: timestamp
165
+ type: datetime
166
+ input_formats:
167
+ - unix_timestamp
168
+ fast: true
169
+ timestamp_field: timestamp
170
+ "# ;
171
+ let config_uri = put_object (
172
+ storage_resolver. clone ( ) ,
173
+ bucket,
174
+ & prefix,
175
+ "index-config.yaml" ,
176
+ index_config. to_vec ( ) ,
177
+ )
178
+ . await ;
179
+
180
+ // TODO use dependency injection instead of lazy static for env configs
181
+ std:: env:: set_var ( "QW_LAMBDA_METASTORE_BUCKET" , bucket) ;
182
+ std:: env:: set_var ( "QW_LAMBDA_INDEX_BUCKET" , bucket) ;
183
+ std:: env:: set_var ( "QW_LAMBDA_METASTORE_PREFIX" , & prefix) ;
184
+ std:: env:: set_var ( "QW_LAMBDA_INDEX_PREFIX" , & prefix) ;
185
+ std:: env:: set_var ( "QW_LAMBDA_INDEX_CONFIG_URI" , config_uri. as_str ( ) ) ;
186
+ std:: env:: set_var ( "QW_LAMBDA_INDEX_ID" , "lambda-test" ) ;
187
+
188
+ // first ingestion creates the index metadata
189
+ let test_data_1 = br#"{"timestamp": 1724140899, "field1": "value1"}"# ;
190
+ let test_data_1_uri = put_object (
191
+ storage_resolver. clone ( ) ,
192
+ bucket,
193
+ & prefix,
194
+ "data.json" ,
195
+ test_data_1. to_vec ( ) ,
196
+ )
197
+ . await ;
198
+
199
+ {
200
+ let args = IngestArgs {
201
+ input_path : test_data_1_uri. clone ( ) ,
202
+ input_format : SourceInputFormat :: Json ,
203
+ vrl_script : None ,
204
+ clear_cache : true ,
205
+ } ;
206
+ let stats = ingest ( args) . await ?;
207
+ assert_eq ! ( stats. num_invalid_docs, 0 ) ;
208
+ assert_eq ! ( stats. num_docs, 1 ) ;
209
+ }
210
+
211
+ tokio:: time:: sleep ( std:: time:: Duration :: from_secs ( 1 ) ) . await ;
212
+
213
+ {
214
+ // ingesting the same data again is a no-op
215
+ let args = IngestArgs {
216
+ input_path : test_data_1_uri,
217
+ input_format : SourceInputFormat :: Json ,
218
+ vrl_script : None ,
219
+ clear_cache : true ,
220
+ } ;
221
+ let stats = ingest ( args) . await ?;
222
+ assert_eq ! ( stats. num_invalid_docs, 0 ) ;
223
+ assert_eq ! ( stats. num_docs, 0 ) ;
224
+ }
225
+
226
+ {
227
+ // second ingestion should not fail when metadata already exists
228
+ let test_data = br#"{"timestamp": 1724149900, "field1": "value2"}"# ;
229
+ let test_data_uri = put_object (
230
+ storage_resolver. clone ( ) ,
231
+ bucket,
232
+ & prefix,
233
+ "data2.json" ,
234
+ test_data. to_vec ( ) ,
235
+ )
236
+ . await ;
237
+
238
+ let args = IngestArgs {
239
+ input_path : test_data_uri,
240
+ input_format : SourceInputFormat :: Json ,
241
+ vrl_script : None ,
242
+ clear_cache : true ,
243
+ } ;
244
+ let stats = ingest ( args) . await ?;
245
+ assert_eq ! ( stats. num_invalid_docs, 0 ) ;
246
+ assert_eq ! ( stats. num_docs, 1 ) ;
247
+ }
248
+
249
+ Ok ( ( ) )
250
+ }
251
+ }
0 commit comments