my-gradio-app / feedback /feedback_analyzer.py
Nguyen Trong Lap
Recreate history without binary blobs
eeb0f9c
"""
Feedback Analyzer
Analyze feedback patterns and generate insights for improvement
"""
import json
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Any, List, Optional
from collections import defaultdict, Counter
import re
class FeedbackAnalyzer:
"""Analyze feedback to identify improvement opportunities"""
def __init__(self, feedback_collector):
self.collector = feedback_collector
def analyze_agent_performance(self, agent_name: str) -> Dict[str, Any]:
"""
Comprehensive analysis of agent performance
Args:
agent_name: Name of the agent to analyze
Returns:
Performance analysis
"""
stats = self.collector.get_feedback_stats(agent_name=agent_name)
low_rated = self.collector.get_low_rated_responses(agent_name=agent_name)
corrections = self.collector.get_corrections(agent_name=agent_name)
analysis = {
'agent_name': agent_name,
'overall_rating': stats['average_rating'],
'total_feedback': stats['total_ratings'],
'rating_distribution': stats['rating_distribution'],
'strengths': [],
'weaknesses': [],
'common_issues': [],
'improvement_suggestions': []
}
# Identify strengths (high-rated patterns)
if stats['average_rating'] >= 4.0:
analysis['strengths'].append("High overall satisfaction")
# Identify weaknesses (low-rated patterns)
if stats['rating_distribution'][1] + stats['rating_distribution'][2] > stats['total_ratings'] * 0.2:
analysis['weaknesses'].append("High number of low ratings (1-2 stars)")
# Analyze common issues from low-rated responses
if low_rated:
issues = self._extract_common_issues(low_rated)
analysis['common_issues'] = issues
# Analyze corrections
if corrections:
correction_patterns = self._analyze_corrections(corrections)
analysis['correction_patterns'] = correction_patterns
# Generate improvement suggestions
for pattern in correction_patterns:
analysis['improvement_suggestions'].append(
f"Improve {pattern['category']}: {pattern['suggestion']}"
)
return analysis
def _extract_common_issues(self, low_rated: List[Dict]) -> List[Dict[str, Any]]:
"""Extract common issues from low-rated responses"""
issues = []
# Analyze comments
comments = [r.get('comment', '') for r in low_rated if r.get('comment')]
# Common keywords in negative feedback
issue_keywords = {
'incorrect': 'Thông tin không chính xác',
'wrong': 'Câu trả lời sai',
'unhelpful': 'Không hữu ích',
'confusing': 'Khó hiểu',
'incomplete': 'Thiếu thông tin',
'too long': 'Quá dài dòng',
'too short': 'Quá ngắn gọn',
'rude': 'Không lịch sự',
'generic': 'Quá chung chung'
}
issue_counts = Counter()
for comment in comments:
comment_lower = comment.lower()
for keyword, description in issue_keywords.items():
if keyword in comment_lower:
issue_counts[description] += 1
# Get top issues
for issue, count in issue_counts.most_common(5):
issues.append({
'issue': issue,
'frequency': count,
'percentage': round(count / len(low_rated) * 100, 1)
})
return issues
def _analyze_corrections(self, corrections: List[Dict]) -> List[Dict[str, Any]]:
"""Analyze user corrections to find patterns"""
patterns = []
# Group by correction reason
by_reason = defaultdict(list)
for correction in corrections:
reason = correction.get('correction_reason', 'other')
by_reason[reason].append(correction)
# Analyze each category
for reason, items in by_reason.items():
if len(items) >= 2: # Only include if multiple occurrences
patterns.append({
'category': reason,
'count': len(items),
'suggestion': self._generate_suggestion(reason, items)
})
return patterns
def _generate_suggestion(self, reason: str, corrections: List[Dict]) -> str:
"""Generate improvement suggestion based on corrections"""
suggestions = {
'incorrect_info': 'Verify medical information against authoritative sources',
'missing_context': 'Ask more follow-up questions to gather context',
'tone': 'Adjust tone to be more empathetic and supportive',
'too_generic': 'Provide more personalized and specific advice',
'calculation_error': 'Double-check all numerical calculations',
'outdated_info': 'Update knowledge base with latest medical guidelines'
}
return suggestions.get(reason, f'Review and improve handling of: {reason}')
def get_trending_issues(self, days: int = 7) -> List[Dict[str, Any]]:
"""
Get trending issues in recent feedback
Args:
days: Number of days to analyze
Returns:
List of trending issues
"""
cutoff_date = datetime.now() - timedelta(days=days)
recent_low_rated = []
for file_path in (self.collector.storage_dir / "ratings").glob("*.json"):
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
timestamp = datetime.fromisoformat(data.get('timestamp', ''))
if timestamp >= cutoff_date and data.get('rating', 5) <= 2:
recent_low_rated.append(data)
return self._extract_common_issues(recent_low_rated)
def compare_agents(self) -> Dict[str, Any]:
"""
Compare performance across all agents
Returns:
Comparison data
"""
stats = self.collector.get_feedback_stats()
comparison = {
'agents': [],
'best_agent': None,
'worst_agent': None,
'average_rating': stats['average_rating']
}
# Rank agents
agent_rankings = []
for agent, data in stats['by_agent'].items():
agent_rankings.append({
'agent': agent,
'average_rating': data['average'],
'total_feedback': data['count']
})
# Sort by rating
agent_rankings.sort(key=lambda x: x['average_rating'], reverse=True)
comparison['agents'] = agent_rankings
if agent_rankings:
comparison['best_agent'] = agent_rankings[0]
comparison['worst_agent'] = agent_rankings[-1]
return comparison
def generate_improvement_report(self, agent_name: Optional[str] = None) -> str:
"""
Generate a comprehensive improvement report
Args:
agent_name: Specific agent or all agents
Returns:
Formatted report
"""
if agent_name:
analysis = self.analyze_agent_performance(agent_name)
report = f"""
# Feedback Analysis Report: {agent_name}
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}
## Overall Performance
- Average Rating: {analysis['overall_rating']:.1f}/5.0
- Total Feedback: {analysis['total_feedback']}
## Rating Distribution
- ⭐⭐⭐⭐⭐ (5 stars): {analysis['rating_distribution'][5]}
- ⭐⭐⭐⭐ (4 stars): {analysis['rating_distribution'][4]}
- ⭐⭐⭐ (3 stars): {analysis['rating_distribution'][3]}
- ⭐⭐ (2 stars): {analysis['rating_distribution'][2]}
- ⭐ (1 star): {analysis['rating_distribution'][1]}
## Strengths
"""
for strength in analysis['strengths']:
report += f"- ✅ {strength}\n"
report += "\n## Weaknesses\n"
for weakness in analysis['weaknesses']:
report += f"- ⚠️ {weakness}\n"
if analysis['common_issues']:
report += "\n## Common Issues\n"
for issue in analysis['common_issues']:
report += f"- {issue['issue']}: {issue['frequency']} occurrences ({issue['percentage']}%)\n"
if analysis['improvement_suggestions']:
report += "\n## Improvement Suggestions\n"
for i, suggestion in enumerate(analysis['improvement_suggestions'], 1):
report += f"{i}. {suggestion}\n"
return report
else:
# All agents comparison
comparison = self.compare_agents()
report = f"""
# Overall Feedback Analysis Report
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}
## System-wide Performance
- Average Rating: {comparison['average_rating']:.1f}/5.0
## Agent Rankings
"""
for i, agent in enumerate(comparison['agents'], 1):
report += f"{i}. {agent['agent']}: {agent['average_rating']:.1f}/5.0 ({agent['total_feedback']} feedback)\n"
if comparison['best_agent']:
report += f"\n🏆 Best Performing: {comparison['best_agent']['agent']} ({comparison['best_agent']['average_rating']:.1f}/5.0)\n"
if comparison['worst_agent']:
report += f"⚠️ Needs Improvement: {comparison['worst_agent']['agent']} ({comparison['worst_agent']['average_rating']:.1f}/5.0)\n"
return report
def get_actionable_insights(self, agent_name: str, limit: int = 5) -> List[Dict[str, Any]]:
"""
Get top actionable insights for improvement
Args:
agent_name: Agent to analyze
limit: Number of insights to return
Returns:
List of actionable insights
"""
analysis = self.analyze_agent_performance(agent_name)
low_rated = self.collector.get_low_rated_responses(agent_name=agent_name, limit=20)
corrections = self.collector.get_corrections(agent_name=agent_name, limit=20)
insights = []
# Insight 1: Most common low-rating issue
if analysis['common_issues']:
top_issue = analysis['common_issues'][0]
insights.append({
'priority': 'high',
'category': 'quality',
'issue': top_issue['issue'],
'frequency': top_issue['frequency'],
'action': f"Review and fix responses related to: {top_issue['issue']}",
'examples': [r['user_message'] for r in low_rated[:3]]
})
# Insight 2: Correction patterns
if corrections:
insights.append({
'priority': 'high',
'category': 'accuracy',
'issue': 'User corrections available',
'frequency': len(corrections),
'action': 'Incorporate user corrections into training data',
'examples': [c['correction_reason'] for c in corrections[:3]]
})
# Insight 3: Rating trend
stats = self.collector.get_feedback_stats(agent_name=agent_name)
low_rating_pct = (stats['rating_distribution'][1] + stats['rating_distribution'][2]) / max(stats['total_ratings'], 1) * 100
if low_rating_pct > 20:
insights.append({
'priority': 'critical',
'category': 'overall',
'issue': f'{low_rating_pct:.1f}% of ratings are 1-2 stars',
'action': 'Urgent review needed - high dissatisfaction rate',
'examples': []
})
return insights[:limit]
def get_feedback_analyzer(feedback_collector) -> FeedbackAnalyzer:
"""Create feedback analyzer instance"""
return FeedbackAnalyzer(feedback_collector)