@@ -31,12 +31,17 @@ The CLI client uses the instructlab SDG library and provides it a run configurat
31
31
# cli_driver.py
32
32
33
33
from sdg import SDG
34
+ from run_config import SynthDataFlow
35
+ from pipeline import Pipeline
34
36
import yaml
35
37
36
38
client = openai_client(endpoint)
37
- with open (' run_config.yaml' , ' r' ) as file :
38
- run_config = yaml.safe_load(file )
39
- cli_sdg = SDG(run_config, client) # run config has all the variables like num_samples, pipelinesteps etc
39
+ model = " model-version"
40
+
41
+ synth_skills_flow = SynthDataFlow(client, model).get_flow()
42
+ skills_pipe = Pipeline(synth_skills_flow)
43
+
44
+ cli_sdg = SDG([synth_skills_flow]) # run config has all the variables like num_samples, pipelinesteps etc
40
45
generated_samples = cli_sdg.generate()
41
46
```
42
47
@@ -62,20 +67,35 @@ The run configuration includes the necessary parameters for executing the SDG co
62
67
)
63
68
```
64
69
65
- ``` yaml
66
- # run_config.yaml
67
-
68
- num_samples : 30
69
- max_retry : 5
70
- pipeline_steps :
71
- gen_q :
72
- prompt_template : " configs/gen_q.yaml"
73
- filter_q :
74
- prompt_template : " configs/filter_q.yaml"
75
- max_new_tokens : 10000
76
- # model parameters for generation
77
- model_name : mixtral-model
78
- model_prompt : ' <s> [INST] {prompt} [/INST]'
79
- client : client
80
- num_procs : 8
81
- ` ` `
70
+ ``` python
71
+ # run_config.py
72
+ class Flow (ABC ):
73
+ def __init__ (self , client , model_id ) -> None :
74
+ self .client = client
75
+ self .model_id = model_id
76
+
77
+ @abstractmethod
78
+ def get_flow (self ) -> list :
79
+ pass
80
+
81
+
82
+ class SynthDataFlow (Flow ):
83
+ def get_flow (self ) -> list :
84
+ return [
85
+ {
86
+ ' block_type' : LLMBlock,
87
+ ' block_config' : {
88
+ ' block_name' : " gen_q" ,
89
+ ' config_path' : " configs/gen_q.yaml" ,
90
+ ' client' : self .client,
91
+ ' model_id' : self .model_id,
92
+ ' model_prompt' : ' <s> [INST] {prompt} [/INST]' ,
93
+ ' output_cols' : [' question' ],
94
+ ' batch_kwargs' : {
95
+ ' num_procs' : 8 ,
96
+ ' num_samples' : 30 ,
97
+ ' batched' : True ,
98
+ },
99
+ ' max_retry' : 5 ,
100
+ ' max_new_tokens' : 10000
101
+ ```
0 commit comments