Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 32 additions & 9 deletions iris/src/iris/pipeline/autonomous_tutor_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
AbstractAgentPipeline,
AgentPipelineExecutionState,
)
from iris.pipeline.shared.confidence_scoring import (
is_large_model,
parse_confidence_response,
)
from iris.pipeline.shared.utils import (
REDACTED_ANSWER_PLACEHOLDER,
format_post_discussion,
Expand Down Expand Up @@ -74,6 +78,12 @@ def __init__(self):
self.system_prompt_template = self.jinja_env.get_template(
"autonomous_tutor_system_prompt.j2"
)
self.confidence_combo_template = self.jinja_env.get_template(
"autonomous_tutor_confidence_combo.j2"
)
self.confidence_basic_template = self.jinja_env.get_template(
"autonomous_tutor_confidence_basic.j2"
)

self.tokens = []

Expand Down Expand Up @@ -189,7 +199,15 @@ def build_system_message(
else "the course"
),
}
return self.system_prompt_template.render(template_context)
base_prompt = self.system_prompt_template.render(template_context)
model_id = state.llm.model_name if state.llm else ""
if is_large_model(model_id):
logger.info("Using combo confidence prompt | model=%s", model_id)
confidence_section = self.confidence_combo_template.render()
else:
logger.info("Using basic confidence prompt | model=%s", model_id)
confidence_section = self.confidence_basic_template.render()
return base_prompt + "\n\n" + confidence_section

def get_memiris_tenant(self, dto: AutonomousTutorPipelineExecutionDTO) -> str:
"""
Expand Down Expand Up @@ -238,12 +256,15 @@ def post_agent_hook(
)
return ""

# TODO(IRIS-22): Implement Confidence Evaluation
# For now, use a placeholder confidence value
confidence = self._estimate_confidence(state)
should_post_directly = confidence >= self.DIRECT_POST_CONFIDENCE_THRESHOLD

logger.info("Generated response: %s", state.result)
logger.info(
"Confidence score | score=%.4f should_post_directly=%s",
confidence,
should_post_directly,
)

state.callback.done(
"Response generated",
Expand All @@ -256,24 +277,26 @@ def post_agent_hook(

def _estimate_confidence(
self,
state: AgentPipelineExecutionState[ # pylint: disable=unused-argument
state: AgentPipelineExecutionState[
AutonomousTutorPipelineExecutionDTO, Variant
],
) -> float:
"""
Estimate confidence score for the generated response.
"""Parse the verbalized confidence score from the agent's response.

Mutates state.result to contain only the clean answer text (without the
trailing Probability line), and returns the extracted probability.

Confidence thresholds:
- >= 0.95: Post immediately
- 0.80 - 0.95: Forward to verification queue
- < 0.80: Do not post, forward to verification queue

TODO: Implement actual confidence estimation

Returns:
float: Confidence score between 0.0 and 1.0
"""
return 0.99
answer_text, confidence = parse_confidence_response(state.result)
state.result = answer_text
return confidence

def _generate_retrieval_query_text(self, discussion: str) -> str:
"""Generate query text for retrieval tools."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{# Confidence scoring addon — basic_probscore method (Yang et al. 2024) for small models.
This section is appended to the main system prompt. #}
---

## Confidence Scoring

After your answer, state the probability between 0.0 and 1.0 that your answer is correct.

**Output format — you MUST follow this exactly:**

Answer: <your response to the student>
Probability: <a single decimal between 0.0 and 1.0>

Do not include any text after the Probability line.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{# Confidence scoring addon — combo method (Yang et al. 2024) for large models.
This section is appended to the main system prompt. #}
---

## Confidence Scoring

Your response must include a probability that your answer is correct, expressed as a decimal between 0.0 and 1.0.

When assigning the probability, consider:
- **Task difficulty**: Is the question straightforward or does it require deep reasoning?
- **Knowledge availability**: Did you have access to sufficient, reliable information (via tools or training) to answer confidently?
- **Uncertainty in the question**: Is the question ambiguous or could it be interpreted in multiple ways?

Do not anchor on a comfortable middle value. Calibrate honestly: if you are nearly certain, use a high value; if you are mostly guessing, use a low value.

Here are examples of how to format your response:

---
**Example 1** (very low confidence — topic outside course scope with no tool access):
Guess: I'm not sure this is covered in the course materials, but binary search trees store elements such that each node's left subtree contains only smaller values and the right subtree only larger values, enabling O(log n) average-case search.
Probability: 0.08

---
**Example 2** (low confidence — question is vague and tools returned limited information):
Guess: The submission deadline is likely the end of the semester, but I couldn't find a specific date in the course FAQ or exercise details. I recommend checking the course announcements.
Probability: 0.24

---
**Example 3** (moderate confidence — general knowledge, no direct course evidence):
Guess: The gradient descent algorithm updates model parameters by moving in the direction of the negative gradient of the loss function with respect to those parameters, scaled by a learning rate.
Probability: 0.47

---
**Example 4** (high confidence — answered from retrieved lecture content):
Guess: According to the lecture slides, a mutex (mutual exclusion lock) ensures that only one thread can access a critical section at a time, preventing race conditions.
Probability: 0.77

---
**Example 5** (very high confidence — directly found in course FAQ):
Guess: Yes, you can submit up to 3 days late with a 10% penalty per day, as stated in the course FAQ.
Probability: 0.89

---

**Output format — you MUST follow this exactly:**

Guess: <your best response to the student>
Probability: <a single decimal between 0.0 and 1.0>

Do not include any text after the Probability line.
87 changes: 87 additions & 0 deletions iris/src/iris/pipeline/shared/confidence_scoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import re

_LARGE_MODEL_PATTERNS = [
"70b",
"72b",
"110b",
"32b",
"gpt-4",
"gpt-5",
"gpt-oss",
]
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

_ANSWER_PREFIX_RE = re.compile(
r"^(?:answer|guess)\s*:\s*",
re.IGNORECASE,
)

_PROBABILITY_LINE_RE = re.compile(
r"(?:probability|confidence|p)\s*:\s*(-?\d+(?:\.\d+)?)(\s*%)?",
re.IGNORECASE,
)
Comment on lines +12 to +15
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Probability parsing is too permissive and can misread normal answer text as confidence.

Using an unanchored regex with search() means any trailing p: <number> inside ordinary text can be parsed as confidence, which may inflate should_post_directly decisions.

Proposed fix
 _PROBABILITY_LINE_RE = re.compile(
-    r"(?:probability|confidence|p)\s*:\s*(-?\d+(?:\.\d+)?)(\s*%)?",
+    r"^\s*(?:probability|confidence|p)\s*:\s*(-?(?:\d+(?:\.\d+)?|\.\d+))\s*(%)?\s*$",
     re.IGNORECASE,
 )
@@
-            m = _PROBABILITY_LINE_RE.search(lines[i])
+            m = _PROBABILITY_LINE_RE.match(lines[i])

Also applies to: 55-56

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@iris/src/iris/pipeline/shared/confidence_scoring.py` around lines 18 - 21,
The _PROBABILITY_LINE_RE is too permissive and combined with re.search allows
incidental "p: 0.5" inside normal answers to be treated as a confidence score;
tighten the pattern (add anchors ^...$ or require word boundaries like \b and
ensure optional % handling stays correct) and change parsing call sites to use
re.match or re.fullmatch instead of search so only strings that are exclusively
a probability line are accepted; apply the same change to the other usage sites
in this module (the occurrences referenced around lines 55-56) so
should_post_directly logic only treats explicit standalone probability lines as
confidence.



def is_large_model(model_id: str) -> bool:
"""Return True if the model should use the combo confidence prompt.

Large models include any GPT-4/GPT-5 generation model (including mini
variants and gpt-oss) and open-source models with ≥32B parameters.
Everything else is treated as small.
"""
lower = model_id.lower()
return any(pattern in lower for pattern in _LARGE_MODEL_PATTERNS)


def parse_confidence_response(raw_response: str) -> tuple[str, float]:
"""Extract (answer_text, probability) from a verbalized confidence response.

Handles both large-model format (Guess: ... / Probability: ...) and
small-model format (Answer: ... / Probability: ...). Also accepts
"Confidence:" and "P:" as alternatives to "Probability:", and values
expressed as percentages (e.g. "85%" → 0.85). The probability is
clamped to [0.0, 1.0].

If parsing fails for any reason this function returns (raw_response, 0.0)
so that callers never receive an exception. A score of 0.0 will be
treated as below threshold and discarded by Artemis.
"""
try:
lines = raw_response.strip().splitlines()

# Find the last line that matches a probability pattern.
prob_line_index = None
probability = 0.0
for i in range(len(lines) - 1, -1, -1):
m = _PROBABILITY_LINE_RE.search(lines[i])
if m:
prob_line_index = i
raw_value = float(m.group(1))
is_percent = bool(m.group(2) and m.group(2).strip() == "%")
if is_percent:
probability = raw_value / 100.0
else:
probability = raw_value
probability = max(0.0, min(1.0, probability))
break

if prob_line_index is None:
# No probability line found — safe fallback.
return raw_response, 0.0

# Everything before the probability line is the answer block.
answer_lines = lines[:prob_line_index]

# Strip the "Answer:" / "Guess:" prefix from the first line if present.
if answer_lines:
answer_lines[0] = _ANSWER_PREFIX_RE.sub("", answer_lines[0])

answer_text = "\n".join(answer_lines).strip()

# If nothing is left after stripping, fall back to the raw response.
if not answer_text:
answer_text = raw_response

return answer_text, probability

except Exception: # pylint: disable=broad-except
return raw_response, 0.0
Loading