Skip to content

Conversation

@1stprinciple
Copy link
Owner

observation = json.loads(observation)

Update dataset_registry.json

prepare dataset

with _timer(

only necessary dependencies

16

revert

env

bring them back

clip_advantages

mask_truncated_samples

remove chat_scheduler

cleanup

target

critic: enable: False

revert

trainer.device=cuda

Timer

Timer

timer

only import ToolASTAgent

from rllm.agents.tool_ast_agent import ToolASTAgent

examples.tool_calling.train_apigen_mt

trainer.experiment_name='rllm-apigen-mt-16k-stage2'

batch_size=128

comment to the back

back to full finetune

comment

double parse

update ground_truth

ground_truth

update ground_truth = [tool_call["function"] for tool_call in ground_truth]

\n instead of \\n

lora

remove nulls in tools

tool_calls = [tool_call.to_dict() for tool_call in tool_calls]

tool_call_str

question = json.loads(question)

agent_args = {}

tool calling AST environment

observation = json.loads(observation)

Update dataset_registry.json

prepare dataset

with _timer(

only necessary dependencies

16

revert

env

bring them back

clip_advantages

mask_truncated_samples

remove chat_scheduler

cleanup

_target_

critic:   enable: False

revert

trainer.device=cuda

Timer

Timer

timer

only import ToolASTAgent

from rllm.agents.tool_ast_agent import ToolASTAgent

examples.tool_calling.train_apigen_mt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants