adaptive_cot_framework/research_config.yaml at main · macto94/adaptive_cot_framework · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Research Configuration for Adaptive CoT Framework
# Focused on parallel test-time scaling with self-consistency

# Model Configuration
models:
  deepseek_r1_distill_qwen:
    model_name: "/raid/LLM/deepseek-r1-distill-qwen-1.5b"
    model_type: "deepseek"
    generation_params:
      max_new_tokens: 2048
      temperature: 0.6
      top_p: 0.95
      top_k: 50
      repetition_penalty: 1.1
      do_sample: true

# Adaptive Branching Configuration
adaptive_branching:
  enabled: true
  min_branches: 1
  max_branches: 15
  default_branches: 8  # Default for static baseline

  # Prefill analysis thresholds
  prefill_analysis:
    entropy_threshold: 2.5
    kl_divergence_threshold: 0.5
    confidence_threshold: 0.7

  # Branch allocation strategies
  branch_allocation:
    strategy: "entropy_based"  # Options: entropy_based, kl_based, hybrid, confidence_based
    entropy_weight: 0.6
    kl_weight: 0.3
    confidence_weight: 0.1

# Self-Consistency Configuration
self_consistency:
  enabled: true
  consensus_method: "majority_vote"
  confidence_threshold: 0.6

  # Answer extraction
  answer_extraction:
    method: "regex"
    patterns:
      - r"\\boxed\\{([^}]+)\\}"
      - r"\\$([^$]+)\\$"
      - r"Answer: ([^\\n]+)"
      - r"Final answer: ([^\\n]+)"
      - r"The answer is:? ([^\\n]+)"

# Research Logging
research_logging:
  enabled: true
  output_dir: "research_logs"
  log_level: "detailed"

  # What to log
  log_prefill_signals: true
  log_branch_allocations: true
  log_reasoning_paths: true
  log_consensus_data: true
  log_performance_metrics: true

# Evaluation Configuration
evaluation:
  output_dir: "results"
  save_predictions: true
  save_metrics: true
  timeout: 300

  # Benchmarks
  benchmarks:
    gsm8k:
      enabled: true
      max_samples: 1000
      split: "test"
    aime:
      enabled: true
      max_samples: 100
      split: "test"
    olympiad:
      enabled: true
      max_samples: 100
      split: "test"
    math:
      enabled: true
      max_samples: 1000
      split: "test"

# Performance Monitoring
performance_monitoring:
  enabled: true
  track_memory: true
  track_generation_time: true
  track_branch_efficiency: true