Spaces:

lapnt3
/

my-gradio-app

Runtime error

my-gradio-app / feedback /feedback_analyzer.py

Nguyen Trong Lap

Recreate history without binary blobs

eeb0f9c about 2 months ago

12.6 kB

	"""
	Feedback Analyzer
	Analyze feedback patterns and generate insights for improvement
	"""

	import json
	from datetime import datetime, timedelta
	from pathlib import Path
	from typing import Dict, Any, List, Optional
	from collections import defaultdict, Counter
	import re


	class FeedbackAnalyzer:
	"""Analyze feedback to identify improvement opportunities"""

	def __init__(self, feedback_collector):
	self.collector = feedback_collector

	def analyze_agent_performance(self, agent_name: str) -> Dict[str, Any]:
	"""
	Comprehensive analysis of agent performance

	Args:
	agent_name: Name of the agent to analyze

	Returns:
	Performance analysis
	"""
	stats = self.collector.get_feedback_stats(agent_name=agent_name)
	low_rated = self.collector.get_low_rated_responses(agent_name=agent_name)
	corrections = self.collector.get_corrections(agent_name=agent_name)

	analysis = {
	'agent_name': agent_name,
	'overall_rating': stats['average_rating'],
	'total_feedback': stats['total_ratings'],
	'rating_distribution': stats['rating_distribution'],
	'strengths': [],
	'weaknesses': [],
	'common_issues': [],
	'improvement_suggestions': []
	}

	# Identify strengths (high-rated patterns)
	if stats['average_rating'] >= 4.0:
	analysis['strengths'].append("High overall satisfaction")

	# Identify weaknesses (low-rated patterns)
	if stats['rating_distribution'][1] + stats['rating_distribution'][2] > stats['total_ratings'] * 0.2:
	analysis['weaknesses'].append("High number of low ratings (1-2 stars)")

	# Analyze common issues from low-rated responses
	if low_rated:
	issues = self._extract_common_issues(low_rated)
	analysis['common_issues'] = issues

	# Analyze corrections
	if corrections:
	correction_patterns = self._analyze_corrections(corrections)
	analysis['correction_patterns'] = correction_patterns

	# Generate improvement suggestions
	for pattern in correction_patterns:
	analysis['improvement_suggestions'].append(
	f"Improve {pattern['category']}: {pattern['suggestion']}"
	)

	return analysis

	def _extract_common_issues(self, low_rated: List[Dict]) -> List[Dict[str, Any]]:
	"""Extract common issues from low-rated responses"""
	issues = []

	# Analyze comments
	comments = [r.get('comment', '') for r in low_rated if r.get('comment')]

	# Common keywords in negative feedback
	issue_keywords = {
	'incorrect': 'Thông tin không chính xác',
	'wrong': 'Câu trả lời sai',
	'unhelpful': 'Không hữu ích',
	'confusing': 'Khó hiểu',
	'incomplete': 'Thiếu thông tin',
	'too long': 'Quá dài dòng',
	'too short': 'Quá ngắn gọn',
	'rude': 'Không lịch sự',
	'generic': 'Quá chung chung'
	}

	issue_counts = Counter()

	for comment in comments:
	comment_lower = comment.lower()
	for keyword, description in issue_keywords.items():
	if keyword in comment_lower:
	issue_counts[description] += 1

	# Get top issues
	for issue, count in issue_counts.most_common(5):
	issues.append({
	'issue': issue,
	'frequency': count,
	'percentage': round(count / len(low_rated) * 100, 1)
	})

	return issues

	def _analyze_corrections(self, corrections: List[Dict]) -> List[Dict[str, Any]]:
	"""Analyze user corrections to find patterns"""
	patterns = []

	# Group by correction reason
	by_reason = defaultdict(list)
	for correction in corrections:
	reason = correction.get('correction_reason', 'other')
	by_reason[reason].append(correction)

	# Analyze each category
	for reason, items in by_reason.items():
	if len(items) >= 2: # Only include if multiple occurrences
	patterns.append({
	'category': reason,
	'count': len(items),
	'suggestion': self._generate_suggestion(reason, items)
	})

	return patterns

	def _generate_suggestion(self, reason: str, corrections: List[Dict]) -> str:
	"""Generate improvement suggestion based on corrections"""
	suggestions = {
	'incorrect_info': 'Verify medical information against authoritative sources',
	'missing_context': 'Ask more follow-up questions to gather context',
	'tone': 'Adjust tone to be more empathetic and supportive',
	'too_generic': 'Provide more personalized and specific advice',
	'calculation_error': 'Double-check all numerical calculations',
	'outdated_info': 'Update knowledge base with latest medical guidelines'
	}

	return suggestions.get(reason, f'Review and improve handling of: {reason}')

	def get_trending_issues(self, days: int = 7) -> List[Dict[str, Any]]:
	"""
	Get trending issues in recent feedback

	Args:
	days: Number of days to analyze

	Returns:
	List of trending issues
	"""
	cutoff_date = datetime.now() - timedelta(days=days)

	recent_low_rated = []
	for file_path in (self.collector.storage_dir / "ratings").glob("*.json"):
	with open(file_path, 'r', encoding='utf-8') as f:
	data = json.load(f)
	timestamp = datetime.fromisoformat(data.get('timestamp', ''))

	if timestamp >= cutoff_date and data.get('rating', 5) <= 2:
	recent_low_rated.append(data)

	return self._extract_common_issues(recent_low_rated)

	def compare_agents(self) -> Dict[str, Any]:
	"""
	Compare performance across all agents

	Returns:
	Comparison data
	"""
	stats = self.collector.get_feedback_stats()

	comparison = {
	'agents': [],
	'best_agent': None,
	'worst_agent': None,
	'average_rating': stats['average_rating']
	}

	# Rank agents
	agent_rankings = []
	for agent, data in stats['by_agent'].items():
	agent_rankings.append({
	'agent': agent,
	'average_rating': data['average'],
	'total_feedback': data['count']
	})

	# Sort by rating
	agent_rankings.sort(key=lambda x: x['average_rating'], reverse=True)

	comparison['agents'] = agent_rankings

	if agent_rankings:
	comparison['best_agent'] = agent_rankings[0]
	comparison['worst_agent'] = agent_rankings[-1]

	return comparison

	def generate_improvement_report(self, agent_name: Optional[str] = None) -> str:
	"""
	Generate a comprehensive improvement report

	Args:
	agent_name: Specific agent or all agents

	Returns:
	Formatted report
	"""
	if agent_name:
	analysis = self.analyze_agent_performance(agent_name)

	report = f"""
	# Feedback Analysis Report: {agent_name}
	Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}

	## Overall Performance
	- Average Rating: {analysis['overall_rating']:.1f}/5.0
	- Total Feedback: {analysis['total_feedback']}

	## Rating Distribution
	- ⭐⭐⭐⭐⭐ (5 stars): {analysis['rating_distribution'][5]}
	- ⭐⭐⭐⭐ (4 stars): {analysis['rating_distribution'][4]}
	- ⭐⭐⭐ (3 stars): {analysis['rating_distribution'][3]}
	- ⭐⭐ (2 stars): {analysis['rating_distribution'][2]}
	- ⭐ (1 star): {analysis['rating_distribution'][1]}

	## Strengths
	"""
	for strength in analysis['strengths']:
	report += f"- ✅ {strength}\n"

	report += "\n## Weaknesses\n"
	for weakness in analysis['weaknesses']:
	report += f"- ⚠️ {weakness}\n"

	if analysis['common_issues']:
	report += "\n## Common Issues\n"
	for issue in analysis['common_issues']:
	report += f"- {issue['issue']}: {issue['frequency']} occurrences ({issue['percentage']}%)\n"

	if analysis['improvement_suggestions']:
	report += "\n## Improvement Suggestions\n"
	for i, suggestion in enumerate(analysis['improvement_suggestions'], 1):
	report += f"{i}. {suggestion}\n"

	return report

	else:
	# All agents comparison
	comparison = self.compare_agents()

	report = f"""
	# Overall Feedback Analysis Report
	Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}

	## System-wide Performance
	- Average Rating: {comparison['average_rating']:.1f}/5.0

	## Agent Rankings
	"""
	for i, agent in enumerate(comparison['agents'], 1):
	report += f"{i}. {agent['agent']}: {agent['average_rating']:.1f}/5.0 ({agent['total_feedback']} feedback)\n"

	if comparison['best_agent']:
	report += f"\n🏆 Best Performing: {comparison['best_agent']['agent']} ({comparison['best_agent']['average_rating']:.1f}/5.0)\n"

	if comparison['worst_agent']:
	report += f"⚠️ Needs Improvement: {comparison['worst_agent']['agent']} ({comparison['worst_agent']['average_rating']:.1f}/5.0)\n"

	return report

	def get_actionable_insights(self, agent_name: str, limit: int = 5) -> List[Dict[str, Any]]:
	"""
	Get top actionable insights for improvement

	Args:
	agent_name: Agent to analyze
	limit: Number of insights to return

	Returns:
	List of actionable insights
	"""
	analysis = self.analyze_agent_performance(agent_name)
	low_rated = self.collector.get_low_rated_responses(agent_name=agent_name, limit=20)
	corrections = self.collector.get_corrections(agent_name=agent_name, limit=20)

	insights = []

	# Insight 1: Most common low-rating issue
	if analysis['common_issues']:
	top_issue = analysis['common_issues'][0]
	insights.append({
	'priority': 'high',
	'category': 'quality',
	'issue': top_issue['issue'],
	'frequency': top_issue['frequency'],
	'action': f"Review and fix responses related to: {top_issue['issue']}",
	'examples': [r['user_message'] for r in low_rated[:3]]
	})

	# Insight 2: Correction patterns
	if corrections:
	insights.append({
	'priority': 'high',
	'category': 'accuracy',
	'issue': 'User corrections available',
	'frequency': len(corrections),
	'action': 'Incorporate user corrections into training data',
	'examples': [c['correction_reason'] for c in corrections[:3]]
	})

	# Insight 3: Rating trend
	stats = self.collector.get_feedback_stats(agent_name=agent_name)
	low_rating_pct = (stats['rating_distribution'][1] + stats['rating_distribution'][2]) / max(stats['total_ratings'], 1) * 100

	if low_rating_pct > 20:
	insights.append({
	'priority': 'critical',
	'category': 'overall',
	'issue': f'{low_rating_pct:.1f}% of ratings are 1-2 stars',
	'action': 'Urgent review needed - high dissatisfaction rate',
	'examples': []
	})

	return insights[:limit]


	def get_feedback_analyzer(feedback_collector) -> FeedbackAnalyzer:
	"""Create feedback analyzer instance"""
	return FeedbackAnalyzer(feedback_collector)