Validation¶
GraphBit provides comprehensive validation capabilities to ensure workflow integrity, data quality, and execution reliability. This guide covers all aspects of validation in GraphBit workflows.
Overview¶
GraphBit validation operates at multiple levels: - Workflow Validation: Structure and connectivity validation - Input Validation: Data format and content validation
- Execution Validation: Runtime validation and error handling - Configuration Validation: LLM and embedding configuration validation - Output Validation: Result quality and format validation
Workflow Validation¶
Basic Workflow Validation¶
import graphbit
# Initialize GraphBit
graphbit.init()
def validate_workflow_basic():
"""Basic workflow validation example."""
# Create a workflow
workflow = graphbit.Workflow("Basic Validation Test")
# Add nodes
processor = graphbit.Node.agent(
name="Data Processor",
prompt="Process this data: {input}",
agent_id="processor"
)
validator = graphbit.Node.agent(
name="Result Validator",
prompt="Validate these results: {processed_data}",
agent_id="validator"
)
processor_id = workflow.add_node(processor)
validator_id = workflow.add_node(validator)
# Connect nodes
workflow.connect(processor_id, validator_id)
# Validate workflow structure
try:
workflow.validate()
print("โ
Workflow validation passed")
return True
except Exception as e:
print(f"โ Workflow validation failed: {e}")
return False
def validate_workflow_comprehensive():
"""Comprehensive workflow validation with detailed checks."""
workflow = graphbit.Workflow("Comprehensive Validation Test")
# Create nodes with various types
input_node = graphbit.Node.agent(
name="Input Handler",
prompt="Handle input: {input}",
agent_id="input_handler"
)
transform_node = graphbit.Node.transform(
name="Data Transformer",
transformation="uppercase"
)
condition_node = graphbit.Node.condition(
name="Quality Check",
expression="quality_score > 0.8"
)
output_node = graphbit.Node.agent(
name="Output Generator",
prompt="Generate output: {transformed_data}",
agent_id="output_generator"
)
# Add nodes to workflow
input_id = workflow.add_node(input_node)
transform_id = workflow.add_node(transform_node)
condition_id = workflow.add_node(condition_node)
output_id = workflow.add_node(output_node)
# Create connections
workflow.connect(input_id, transform_id)
workflow.connect(transform_id, condition_id)
workflow.connect(condition_id, output_id)
# Perform validation
validation_results = {
"structure_valid": False,
"nodes_valid": False,
"connections_valid": False,
"no_cycles": False
}
try:
# Basic structure validation
workflow.validate()
validation_results["structure_valid"] = True
# Additional validation checks would go here
# (These are conceptual - actual implementation depends on GraphBit internals)
validation_results["nodes_valid"] = True
validation_results["connections_valid"] = True
validation_results["no_cycles"] = True
print("โ
Comprehensive workflow validation passed")
print(f"Validation results: {validation_results}")
return True
except Exception as e:
print(f"โ Comprehensive workflow validation failed: {e}")
print(f"Validation results: {validation_results}")
return False
Input Validation¶
Data Format Validation¶
def create_input_validation_workflow():
"""Create workflow with robust input validation."""
workflow = graphbit.Workflow("Input Validation Workflow")
# Input validator node
input_validator = graphbit.Node.agent(
name="Input Validator",
prompt="""
Validate the following input data:
Input: {input}
Check for:
1. Required fields are present
2. Data types are correct
3. Values are within expected ranges
4. Format follows expected patterns
Return validation results with:
- is_valid: true/false
- errors: list of validation errors
- warnings: list of validation warnings
- sanitized_input: cleaned input data
""",
agent_id="input_validator"
)
# Data sanitizer
data_sanitizer = graphbit.Node.agent(
name="Data Sanitizer",
prompt="""
Sanitize and clean the validated input:
Validation Results: {validation_results}
If validation passed:
1. Remove any unsafe content
2. Normalize data formats
3. Apply standard transformations
4. Return clean data
If validation failed:
1. Attempt data recovery where possible
2. Provide fallback values for missing data
3. Flag critical issues that need attention
""",
agent_id="data_sanitizer"
)
# Build validation workflow
validator_id = workflow.add_node(input_validator)
sanitizer_id = workflow.add_node(data_sanitizer)
workflow.connect(validator_id, sanitizer_id)
return workflow
def validate_input_data(data, expected_schema=None):
"""Validate input data against expected schema."""
validation_results = {
"is_valid": True,
"errors": [],
"warnings": [],
"sanitized_data": data
}
# Basic type validation
if data is None:
validation_results["is_valid"] = False
validation_results["errors"].append("Input data is None")
return validation_results
# String validation
if isinstance(data, str):
if len(data.strip()) == 0:
validation_results["is_valid"] = False
validation_results["errors"].append("Input string is empty")
elif len(data) > 10000: # Arbitrary limit
validation_results["warnings"].append("Input string is very long")
# Sanitize string
validation_results["sanitized_data"] = data.strip()
# Dictionary validation
elif isinstance(data, dict):
if expected_schema:
for required_field in expected_schema.get("required", []):
if required_field not in data:
validation_results["is_valid"] = False
validation_results["errors"].append(f"Required field '{required_field}' missing")
# Sanitize dictionary
sanitized_dict = {}
for key, value in data.items():
if isinstance(value, str):
sanitized_dict[key] = value.strip()
else:
sanitized_dict[key] = value
validation_results["sanitized_data"] = sanitized_dict
# List validation
elif isinstance(data, list):
if len(data) == 0:
validation_results["warnings"].append("Input list is empty")
elif len(data) > 1000: # Arbitrary limit
validation_results["warnings"].append("Input list is very large")
return validation_results
def example_input_validation():
"""Example of input validation in practice."""
# Test various input types
test_inputs = [
{"text": "Hello, world!", "priority": 1},
{"text": "", "priority": "high"}, # Invalid: empty text, wrong type
None, # Invalid: null input
{"text": "Valid input", "priority": 2, "metadata": {"source": "test"}},
[] # Warning: empty list
]
schema = {
"required": ["text", "priority"],
"types": {
"text": str,
"priority": int
}
}
for i, test_input in enumerate(test_inputs):
print(f"\nValidating input {i + 1}: {test_input}")
result = validate_input_data(test_input, schema)
if result["is_valid"]:
print("โ
Validation passed")
else:
print("โ Validation failed")
if result["errors"]:
print(f"Errors: {result['errors']}")
if result["warnings"]:
print(f"Warnings: {result['warnings']}")
Configuration Validation¶
LLM Configuration Validation¶
import os
def validate_llm_configuration(config):
"""Validate LLM configuration."""
validation_result = {
"is_valid": True,
"errors": [],
"warnings": []
}
try:
# Test OpenAI configuration
if hasattr(config, 'provider') and config.provider == 'openai':
openai_config = graphbit.LlmConfig.openai(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o-mini"
)
# Create client to test configuration
client = graphbit.LlmClient(openai_config)
# Test basic connectivity
try:
client.warmup()
print("โ
OpenAI configuration valid")
except Exception as e:
validation_result["is_valid"] = False
validation_result["errors"].append(f"OpenAI connection failed: {e}")
# Test Anthropic configuration
elif hasattr(config, 'provider') and config.provider == 'anthropic':
try:
anthropic_config = graphbit.LlmConfig.anthropic(
api_key=os.getenv("ANTHROPIC_API_KEY"),
model="claude-3-5-sonnet-20241022"
)
client = graphbit.LlmClient(anthropic_config)
client.warmup()
print("โ
Anthropic configuration valid")
except Exception as e:
validation_result["is_valid"] = False
validation_result["errors"].append(f"Anthropic connection failed: {e}")
# Test Ollama configuration
elif hasattr(config, 'provider') and config.provider == 'ollama':
try:
ollama_config = graphbit.LlmConfig.ollama(
model="llama3.2"
)
client = graphbit.LlmClient(ollama_config)
client.warmup()
print("โ
Ollama configuration valid")
except Exception as e:
validation_result["warnings"].append(f"Ollama connection issue: {e}")
except Exception as e:
validation_result["is_valid"] = False
validation_result["errors"].append(f"Configuration validation failed: {e}")
return validation_result
def validate_embedding_configuration():
"""Validate embedding configuration."""
validation_result = {
"is_valid": True,
"errors": [],
"warnings": []
}
try:
# Test OpenAI embeddings
openai_config = graphbit.EmbeddingConfig.openai(
api_key=os.getenv("OPENAI_API_KEY")
)
client = graphbit.EmbeddingClient(openai_config)
# Test embedding generation
test_embedding = client.embed("Test embedding")
if test_embedding and len(test_embedding) > 0:
print("โ
OpenAI embedding configuration valid")
else:
validation_result["warnings"].append("OpenAI embedding returned empty result")
# Test HuggingFace embeddings
try:
hf_config = graphbit.EmbeddingConfig.huggingface(
model="sentence-transformers/all-MiniLM-L6-v2"
)
hf_client = graphbit.EmbeddingClient(hf_config)
hf_embedding = hf_client.embed("Test embedding")
if hf_embedding and len(hf_embedding) > 0:
print("โ
HuggingFace embedding configuration valid")
else:
validation_result["warnings"].append("HuggingFace embedding returned empty result")
except Exception as e:
validation_result["warnings"].append(f"HuggingFace embeddings not available: {e}")
except Exception as e:
validation_result["is_valid"] = False
validation_result["errors"].append(f"Embedding configuration validation failed: {e}")
return validation_result
Execution Validation¶
Runtime Validation¶
def create_execution_validation_workflow():
"""Create workflow with execution validation."""
workflow = graphbit.Workflow("Execution Validation Workflow")
# Pre-execution validator
pre_validator = graphbit.Node.agent(
name="Pre-Execution Validator",
prompt="""
Validate input before processing:
Input: {input}
Check:
1. Input is processable
2. Required resources are available
3. No security concerns
4. Expected format is correct
Return validation status and any concerns.
""",
agent_id="pre_validator"
)
# Main processor
processor = graphbit.Node.agent(
name="Main Processor",
prompt="""
Process the validated input:
Validated Input: {validated_input}
Perform the main processing task and include quality metrics.
""",
agent_id="main_processor"
)
# Post-execution validator
post_validator = graphbit.Node.agent(
name="Post-Execution Validator",
prompt="""
Validate processing results:
Processing Results: {processing_results}
Check:
1. Results are complete
2. Quality meets standards
3. Format is correct
4. No errors in output
Return final validation and any needed corrections.
""",
agent_id="post_validator"
)
# Build validation workflow
pre_id = workflow.add_node(pre_validator)
proc_id = workflow.add_node(processor)
post_id = workflow.add_node(post_validator)
workflow.connect(pre_id, proc_id)
workflow.connect(proc_id, post_id)
return workflow
def validate_execution_result(result):
"""Validate workflow execution result."""
validation_report = {
"execution_successful": False,
"output_valid": False,
"quality_score": 0.0,
"issues": [],
"recommendations": []
}
# Check if execution completed
if result.is_completed():
validation_report["execution_successful"] = True
# Get output
output = result.output()
# Validate output
if output:
validation_report["output_valid"] = True
# Basic quality checks
if isinstance(output, str):
if len(output.strip()) > 0:
validation_report["quality_score"] += 0.5
if len(output) > 10: # Meaningful length
validation_report["quality_score"] += 0.3
# Check for common error indicators
error_indicators = ["error", "failed", "invalid", "exception"]
if not any(indicator in output.lower() for indicator in error_indicators):
validation_report["quality_score"] += 0.2
else:
validation_report["issues"].append("Output contains error indicators")
elif isinstance(output, dict):
if output: # Non-empty dict
validation_report["quality_score"] += 0.7
# Check for error fields
if "error" in output or "exception" in output:
validation_report["issues"].append("Output contains error fields")
else:
validation_report["quality_score"] += 0.3
else:
validation_report["issues"].append("Output is empty")
else:
validation_report["issues"].append(f"Execution failed: {result.error()}")
# Generate recommendations
if validation_report["quality_score"] < 0.5:
validation_report["recommendations"].append("Consider improving input quality")
if validation_report["issues"]:
validation_report["recommendations"].append("Review and fix identified issues")
return validation_report
Output Validation¶
Result Quality Validation¶
def create_output_validation_workflow():
"""Create workflow with comprehensive output validation."""
workflow = graphbit.Workflow("Output Validation Workflow")
# Content generator
generator = graphbit.Node.agent(
name="Content Generator",
prompt="Generate content based on: {input}",
agent_id="generator"
)
# Quality checker
quality_checker = graphbit.Node.agent(
name="Quality Checker",
prompt="""
Evaluate the quality of this generated content:
Content: {generated_content}
Rate on a scale of 1-10 for:
1. Accuracy
2. Completeness
3. Clarity
4. Relevance
5. Coherence
Provide overall quality score and specific feedback.
""",
agent_id="quality_checker"
)
# Format validator
format_validator = graphbit.Node.agent(
name="Format Validator",
prompt="""
Validate the format of this content:
Content: {quality_checked_content}
Check:
1. Proper structure
2. Correct formatting
3. Standard compliance
4. Readability
Return validation results and any format corrections needed.
""",
agent_id="format_validator"
)
# Build output validation workflow
gen_id = workflow.add_node(generator)
quality_id = workflow.add_node(quality_checker)
format_id = workflow.add_node(format_validator)
workflow.connect(gen_id, quality_id)
workflow.connect(quality_id, format_id)
return workflow
def validate_output_quality(output, criteria=None):
"""Validate output quality against specific criteria."""
if criteria is None:
criteria = {
"min_length": 10,
"max_length": 10000,
"required_elements": [],
"forbidden_elements": ["error", "exception", "failed"]
}
quality_report = {
"overall_score": 0.0,
"length_valid": False,
"content_valid": False,
"format_valid": False,
"issues": [],
"suggestions": []
}
if not output:
quality_report["issues"].append("Output is empty")
return quality_report
output_str = str(output)
# Length validation
if criteria["min_length"] <= len(output_str) <= criteria["max_length"]:
quality_report["length_valid"] = True
quality_report["overall_score"] += 0.3
else:
quality_report["issues"].append(f"Length {len(output_str)} outside range {criteria['min_length']}-{criteria['max_length']}")
# Content validation
content_score = 0.0
# Check for required elements
for element in criteria.get("required_elements", []):
if element.lower() in output_str.lower():
content_score += 0.2
else:
quality_report["issues"].append(f"Missing required element: {element}")
# Check for forbidden elements
forbidden_found = False
for element in criteria.get("forbidden_elements", []):
if element.lower() in output_str.lower():
quality_report["issues"].append(f"Contains forbidden element: {element}")
forbidden_found = True
if not forbidden_found:
content_score += 0.3
quality_report["content_valid"] = content_score > 0.0
quality_report["overall_score"] += min(content_score, 0.4)
# Format validation (basic)
if output_str.strip():
quality_report["format_valid"] = True
quality_report["overall_score"] += 0.3
# Generate suggestions
if quality_report["overall_score"] < 0.5:
quality_report["suggestions"].append("Consider regenerating output with better parameters")
if not quality_report["content_valid"]:
quality_report["suggestions"].append("Review content requirements and regenerate")
return quality_report
Validation Testing Framework¶
Comprehensive Validation Testing¶
def create_validation_test_suite():
"""Create comprehensive validation test suite."""
class ValidationTestSuite:
def __init__(self):
self.test_results = {}
def run_workflow_validation_tests(self):
"""Run workflow validation tests."""
print("๐งช Running Workflow Validation Tests")
tests = {
"basic_workflow": self.test_basic_workflow_validation,
"complex_workflow": self.test_complex_workflow_validation,
"invalid_workflow": self.test_invalid_workflow_validation
}
for test_name, test_func in tests.items():
try:
result = test_func()
self.test_results[test_name] = {"passed": result, "error": None}
status = "โ
PASSED" if result else "โ FAILED"
print(f" {test_name}: {status}")
except Exception as e:
self.test_results[test_name] = {"passed": False, "error": str(e)}
print(f" {test_name}: โ ERROR - {e}")
def test_basic_workflow_validation(self):
"""Test basic workflow validation."""
return validate_workflow_basic()
def test_complex_workflow_validation(self):
"""Test complex workflow validation."""
return validate_workflow_comprehensive()
def test_invalid_workflow_validation(self):
"""Test validation of invalid workflow."""
# Create intentionally invalid workflow
workflow = graphbit.Workflow("Invalid Test Workflow")
# Add node but don't connect it properly (this may or may not be invalid depending on GraphBit's rules)
node = graphbit.Node.agent(
name="Isolated Node",
prompt="Process: {input}",
agent_id="isolated"
)
workflow.add_node(node)
# Try to validate - this should pass since a single node workflow is valid
try:
workflow.validate()
return True # Single nodes are actually valid
except Exception:
return True # Expected to fail
def run_input_validation_tests(self):
"""Run input validation tests."""
print("๐งช Running Input Validation Tests")
test_cases = [
{"input": "Valid string", "expected": True},
{"input": "", "expected": False},
{"input": None, "expected": False},
{"input": {"text": "Valid", "priority": 1}, "expected": True},
{"input": {"text": "", "priority": "invalid"}, "expected": False}
]
passed = 0
total = len(test_cases)
for i, case in enumerate(test_cases):
result = validate_input_data(case["input"])
actual_valid = result["is_valid"]
expected_valid = case["expected"]
if actual_valid == expected_valid:
print(f" Test {i+1}: โ
PASSED")
passed += 1
else:
print(f" Test {i+1}: โ FAILED - Expected {expected_valid}, got {actual_valid}")
self.test_results["input_validation"] = {"passed": passed, "total": total}
print(f"Input validation: {passed}/{total} tests passed")
def run_configuration_validation_tests(self):
"""Run configuration validation tests."""
print("๐งช Running Configuration Validation Tests")
# Test LLM configuration
try:
config = graphbit.LlmConfig.openai(
api_key=os.getenv("OPENAI_API_KEY") or "test-key",
model="gpt-4o-mini"
)
# This is a conceptual test - actual validation depends on implementation
print(" LLM Config: โ
PASSED")
self.test_results["llm_config"] = {"passed": True}
except Exception as e:
print(f" LLM Config: โ FAILED - {e}")
self.test_results["llm_config"] = {"passed": False, "error": str(e)}
# Test embedding configuration
try:
embed_config = graphbit.EmbeddingConfig.openai(
api_key=os.getenv("OPENAI_API_KEY") or "test-key"
)
print(" Embedding Config: โ
PASSED")
self.test_results["embedding_config"] = {"passed": True}
except Exception as e:
print(f" Embedding Config: โ FAILED - {e}")
self.test_results["embedding_config"] = {"passed": False, "error": str(e)}
def run_all_tests(self):
"""Run all validation tests."""
print("๐ Starting Comprehensive Validation Test Suite")
print("=" * 50)
self.run_workflow_validation_tests()
print()
self.run_input_validation_tests()
print()
self.run_configuration_validation_tests()
print("\n" + "=" * 50)
print("๐ Test Suite Results Summary")
total_passed = 0
total_tests = 0
for test_name, result in self.test_results.items():
if isinstance(result, dict) and "passed" in result:
if isinstance(result["passed"], bool):
total_tests += 1
if result["passed"]:
total_passed += 1
status = "โ
" if result["passed"] else "โ"
else:
# For input validation which has passed/total format
total_tests += result.get("total", 0)
total_passed += result.get("passed", 0)
status = "โ
" if result["passed"] == result.get("total", 0) else "โ"
print(f"{status} {test_name}: {result}")
success_rate = (total_passed / total_tests * 100) if total_tests > 0 else 0
print(f"\nOverall Success Rate: {success_rate:.1f}% ({total_passed}/{total_tests})")
return self.test_results
return ValidationTestSuite()
def example_comprehensive_validation():
"""Example of comprehensive validation testing."""
# Initialize GraphBit
graphbit.init()
# Create test suite
test_suite = create_validation_test_suite()
# Run all tests
results = test_suite.run_all_tests()
return results
Best Practices¶
1. Validation Strategy¶
def get_validation_best_practices():
"""Get best practices for validation."""
best_practices = {
"early_validation": "Validate inputs as early as possible",
"comprehensive_checks": "Use multiple validation layers",
"clear_error_messages": "Provide actionable error messages",
"graceful_degradation": "Handle validation failures gracefully",
"performance_balance": "Balance validation thoroughness with performance",
"automated_testing": "Automate validation testing in CI/CD",
"continuous_monitoring": "Monitor validation metrics in production"
}
for practice, description in best_practices.items():
print(f"โ
{practice.replace('_', ' ').title()}: {description}")
return best_practices
2. Error Handling¶
def handle_validation_error(validation_result, context="validation"):
"""Handle validation errors appropriately."""
if validation_result.get("is_valid", True):
return True
errors = validation_result.get("errors", [])
warnings = validation_result.get("warnings", [])
# Log errors
print(f"โ {context.title()} failed:")
for error in errors:
print(f" - {error}")
# Log warnings
if warnings:
print(f"โ ๏ธ {context.title()} warnings:")
for warning in warnings:
print(f" - {warning}")
# Determine if execution should continue
critical_errors = [error for error in errors if "critical" in error.lower()]
if critical_errors:
print("โ Critical errors found, stopping execution")
return False
elif len(errors) > 0:
print("โ ๏ธ Non-critical errors found, proceeding with caution")
return True
else:
print("โ
Only warnings found, continuing execution")
return True
Usage Examples¶
Complete Validation Example¶
def example_complete_validation():
"""Complete example of validation in practice."""
# Initialize GraphBit
graphbit.init()
print("๐ Starting Complete Validation Example")
# 1. Input validation
test_input = {"text": "Hello, world!", "priority": 1}
input_validation = validate_input_data(test_input)
if not handle_validation_error(input_validation, "input validation"):
return False
# 2. Workflow validation
workflow = create_input_validation_workflow()
if not validate_workflow_basic():
print("โ Workflow validation failed")
return False
# 3. Configuration validation
try:
llm_config = graphbit.LlmConfig.openai(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o-mini"
)
executor = graphbit.Executor(llm_config)
print("โ
Configuration validation passed")
except Exception as e:
print(f"โ Configuration validation failed: {e}")
return False
# 4. Execution validation
try:
result = executor.execute(workflow)
execution_validation = validate_execution_result(result)
if not execution_validation["execution_successful"]:
print("โ Execution validation failed")
return False
# 5. Output validation
output_validation = validate_output_quality(result.output())
if output_validation["overall_score"] < 0.5:
print(f"โ ๏ธ Output quality score low: {output_validation['overall_score']}")
else:
print(f"โ
Output quality good: {output_validation['overall_score']}")
print("โ
Complete validation example passed")
return True
except Exception as e:
print(f"โ Execution failed: {e}")
return False
if __name__ == "__main__":
example_complete_validation()
What's Next¶
- Learn about Reliability for building robust validated systems
- Explore Monitoring for tracking validation metrics
- Check Performance for optimizing validation overhead
- See Error Handling for comprehensive error management