Skip to content

Fix DataFrame Error Handling for Non-existent Columns #625 #737

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions examples/bug_simulation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import traceback

import pandas as pd

from preswald.engine.runner import validate_dataframe_operation


def simulate_bug_and_fix():
"""
Simulate the original bug and demonstrate our fix.
"""
print("🔍 Bug Simulation: Column 'value' not found in DataFrame")
print("==================================================")

# Create a sample DataFrame (similar to the weather data)
df = pd.DataFrame(
{
"Humidity": [0.81, 0.76, 0.68, 0.92, 0.95],
"Temperature": [21.2, 22.1, 19.5, 18.7, 16.3],
"Summary": ["Partly Cloudy", "Mostly Cloudy", "Clear", "Foggy", "Rain"],
}
)

print("\n📊 Sample DataFrame:")
print(df.head())
print("\n📋 Available columns:", ", ".join(df.columns))

# Scenario 1: Original bug - trying to access 'value' column
print("\n❌ Scenario 1: Original bug - trying to access 'value' column")
threshold = 50
try:
# This would fail with a cryptic error
filtered_data = df[df["value"] > threshold]
print(filtered_data) # This line won't execute
except Exception as e:
print(f"Error: {e}")
print("Traceback:", "".join(traceback.format_tb(e.__traceback__)))

# Scenario 2: With our fix - validation before operation
print("\n✅ Scenario 2: With our fix - validation before operation")
threshold = 50
try:
# This will fail with a helpful error message
validate_dataframe_operation(df, "value", "filtering")
filtered_data = df[df["value"] > threshold]
print(filtered_data) # This line won't execute
except ValueError as e:
print(f"Error: {e}")

# Scenario 3: Correct usage
print("\n✅ Scenario 3: Correct usage with valid column")
humidity_threshold = 0.8
try:
validate_dataframe_operation(df, "Humidity", "filtering")
filtered_data = df[df["Humidity"] > humidity_threshold]
print(filtered_data)
except Exception as e:
print(f"Error: {e}")

print("\n🎯 Summary of fixes:")
print("1. Added early validation of DataFrame operations")
print("2. Improved error messages with available columns")
print("3. Made error handling consistent between preview and published modes")
print("4. Added helpful suggestions to guide users")


if __name__ == "__main__":
simulate_bug_and_fix()
31 changes: 31 additions & 0 deletions examples/dataframe_validation_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from preswald import connect, get_df, slider, table, text
from preswald.engine.runner import validate_dataframe_operation


text("# DataFrame Validation Example")

# Initialize connection
connect()

# Load sample data
df = get_df("weatherhistory_csv")

# Show available columns
text("## Available Columns")
text(f"Columns in the dataset: {', '.join(df.columns)}")

# Example 1: Valid column access
text("## Example 1: Valid Column Access")
threshold = slider("Humidity Threshold", min_val=0, max_val=1, default=0.5)
# Use validation function before operation
validate_dataframe_operation(df, "Humidity", "filtering")
table(df[df["Humidity"] > threshold], title="Filtered by Humidity")

# Example 2: Invalid column access (will show helpful error)
text("## Example 2: Invalid Column Access")
threshold2 = slider("Value Threshold", min_val=0, max_val=100, default=50)
try:
validate_dataframe_operation(df, "value", "filtering")
table(df[df["value"] > threshold2], title="Filtered by Value")
except ValueError as e:
text(f"Error: {e!s}")
Loading