Skip to content

Commit 2be6833

Browse files
committed
updated the config format
Signed-off-by: Oindrilla Chatterjee <[email protected]>
1 parent 5b83bd2 commit 2be6833

File tree

1 file changed

+40
-20
lines changed

1 file changed

+40
-20
lines changed

docs/sdg/sdg-api-interface.md

+40-20
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,17 @@ The CLI client uses the instructlab SDG library and provides it a run configurat
3131
# cli_driver.py
3232

3333
from sdg import SDG
34+
from run_config import SynthDataFlow
35+
from pipeline import Pipeline
3436
import yaml
3537

3638
client = openai_client(endpoint)
37-
with open('run_config.yaml', 'r') as file:
38-
run_config = yaml.safe_load(file)
39-
cli_sdg = SDG(run_config, client) # run config has all the variables like num_samples, pipelinesteps etc
39+
model = "model-version"
40+
41+
synth_skills_flow = SynthDataFlow(client, model).get_flow()
42+
skills_pipe = Pipeline(synth_skills_flow)
43+
44+
cli_sdg = SDG([synth_skills_flow]) # run config has all the variables like num_samples, pipelinesteps etc
4045
generated_samples = cli_sdg.generate()
4146
```
4247

@@ -62,20 +67,35 @@ The run configuration includes the necessary parameters for executing the SDG co
6267
)
6368
```
6469

65-
```yaml
66-
# run_config.yaml
67-
68-
num_samples : 30
69-
max_retry : 5
70-
pipeline_steps:
71-
gen_q:
72-
prompt_template: "configs/gen_q.yaml"
73-
filter_q:
74-
prompt_template: "configs/filter_q.yaml"
75-
max_new_tokens: 10000
76-
# model parameters for generation
77-
model_name: mixtral-model
78-
model_prompt: '<s> [INST] {prompt} [/INST]'
79-
client: client
80-
num_procs: 8
81-
```
70+
```python
71+
# run_config.py
72+
class Flow(ABC):
73+
def __init__(self, client, model_id) -> None:
74+
self.client = client
75+
self.model_id = model_id
76+
77+
@abstractmethod
78+
def get_flow(self) -> list:
79+
pass
80+
81+
82+
class SynthDataFlow(Flow):
83+
def get_flow(self) -> list:
84+
return [
85+
{
86+
'block_type': LLMBlock,
87+
'block_config': {
88+
'block_name': "gen_q",
89+
'config_path': "configs/gen_q.yaml",
90+
'client': self.client,
91+
'model_id': self.model_id,
92+
'model_prompt': '<s> [INST] {prompt} [/INST]',
93+
'output_cols': ['question'],
94+
'batch_kwargs': {
95+
'num_procs': 8,
96+
'num_samples': 30,
97+
'batched': True,
98+
},
99+
'max_retry' : 5,
100+
'max_new_tokens': 10000
101+
```

0 commit comments

Comments
 (0)