@@ -303,3 +303,92 @@ def test_check_jsonl_empty_messages(tmp_path: Path):
303
303
assert (
304
304
"Expected a non-empty list of messages. Found empty list" in report ["message" ]
305
305
)
306
+
307
+
308
+ def test_check_jsonl_valid_weights_all_messages (tmp_path : Path ):
309
+ file = tmp_path / "valid_weights_all.jsonl"
310
+ content = [
311
+ {
312
+ "messages" : [
313
+ {"role" : "user" , "content" : "Hello" , "weight" : 1 },
314
+ {"role" : "assistant" , "content" : "Hi there!" , "weight" : 0 },
315
+ {"role" : "user" , "content" : "How are you?" , "weight" : 1 },
316
+ {"role" : "assistant" , "content" : "I'm doing well!" , "weight" : 1 },
317
+ ]
318
+ },
319
+ {
320
+ "messages" : [
321
+ {"role" : "system" , "content" : "You are helpful" , "weight" : 0 },
322
+ {"role" : "user" , "content" : "What's the weather?" , "weight" : 1 },
323
+ {"role" : "assistant" , "content" : "It's sunny today!" , "weight" : 1 },
324
+ ]
325
+ },
326
+ ]
327
+ with file .open ("w" ) as f :
328
+ f .write ("\n " .join (json .dumps (item ) for item in content ))
329
+
330
+ report = check_file (file )
331
+ assert report ["is_check_passed" ]
332
+ assert report ["num_samples" ] == len (content )
333
+
334
+
335
+ def test_check_jsonl_valid_weights_mixed_with_none (tmp_path : Path ):
336
+ file = tmp_path / "valid_weights_mixed.jsonl"
337
+ content = [
338
+ {
339
+ "messages" : [
340
+ {"role" : "user" , "content" : "Hello" , "weight" : 1 },
341
+ {"role" : "assistant" , "content" : "Hi there!" , "weight" : 0 },
342
+ {"role" : "user" , "content" : "How are you?" },
343
+ {"role" : "assistant" , "content" : "I'm doing well!" },
344
+ ]
345
+ },
346
+ {
347
+ "messages" : [
348
+ {"role" : "user" , "content" : "What's the weather?" },
349
+ {"role" : "assistant" , "content" : "It's sunny today!" },
350
+ ]
351
+ },
352
+ ]
353
+ with file .open ("w" ) as f :
354
+ f .write ("\n " .join (json .dumps (item ) for item in content ))
355
+
356
+ report = check_file (file )
357
+ assert report ["is_check_passed" ]
358
+ assert report ["num_samples" ] == len (content )
359
+
360
+
361
+ def test_check_jsonl_invalid_weight_float (tmp_path : Path ):
362
+ file = tmp_path / "invalid_weight_float.jsonl"
363
+ content = [
364
+ {
365
+ "messages" : [
366
+ {"role" : "user" , "content" : "Hello" , "weight" : 1.0 },
367
+ {"role" : "assistant" , "content" : "Hi there!" , "weight" : 0 },
368
+ ]
369
+ }
370
+ ]
371
+ with file .open ("w" ) as f :
372
+ f .write ("\n " .join (json .dumps (item ) for item in content ))
373
+
374
+ report = check_file (file )
375
+ assert not report ["is_check_passed" ]
376
+ assert "Weight must be an integer" in report ["message" ]
377
+
378
+
379
+ def test_check_jsonl_invalid_weight (tmp_path : Path ):
380
+ file = tmp_path / "invalid_weight.jsonl"
381
+ content = [
382
+ {
383
+ "messages" : [
384
+ {"role" : "user" , "content" : "Hello" , "weight" : 2 },
385
+ {"role" : "assistant" , "content" : "Hi there!" , "weight" : 0 },
386
+ ]
387
+ }
388
+ ]
389
+ with file .open ("w" ) as f :
390
+ f .write ("\n " .join (json .dumps (item ) for item in content ))
391
+
392
+ report = check_file (file )
393
+ assert not report ["is_check_passed" ]
394
+ assert "Weight must be either 0 or 1" in report ["message" ]
0 commit comments