Files
New-planet-ai-agent/services/data_analyzer.py

157 lines
5.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Сервис анализа данных детей."""
from datetime import datetime, timedelta
from typing import Dict, List, Optional
import pandas as pd
class DataAnalyzer:
"""Сервис для анализа прогресса детей."""
@staticmethod
def calculate_completion_rate(tasks: List[Dict]) -> float:
"""
Рассчитать процент выполнения заданий.
Args:
tasks: Список заданий с полем 'completed'
Returns:
Процент выполнения (0.0 - 1.0)
"""
if not tasks:
return 0.0
completed = sum(1 for task in tasks if task.get("completed", False))
return completed / len(tasks)
@staticmethod
def analyze_daily_progress(schedules: List[Dict]) -> Dict:
"""
Проанализировать ежедневный прогресс.
Args:
schedules: Список расписаний с заданиями
Returns:
Словарь с аналитикой
"""
if not schedules:
return {
"total_days": 0,
"average_completion": 0.0,
"total_tasks": 0,
"completed_tasks": 0,
}
total_tasks = 0
completed_tasks = 0
completion_rates = []
for schedule in schedules:
tasks = schedule.get("tasks", [])
total_tasks += len(tasks)
completed_tasks += sum(1 for task in tasks if task.get("completed", False))
rate = DataAnalyzer.calculate_completion_rate(tasks)
completion_rates.append(rate)
return {
"total_days": len(schedules),
"average_completion": sum(completion_rates) / len(completion_rates) if completion_rates else 0.0,
"total_tasks": total_tasks,
"completed_tasks": completed_tasks,
"completion_rate": completed_tasks / total_tasks if total_tasks > 0 else 0.0,
}
@staticmethod
def get_category_statistics(schedules: List[Dict]) -> Dict[str, Dict]:
"""
Получить статистику по категориям заданий.
Args:
schedules: Список расписаний
Returns:
Словарь со статистикой по категориям
"""
category_stats: Dict[str, Dict] = {}
for schedule in schedules:
for task in schedule.get("tasks", []):
category = task.get("category", "unknown")
if category not in category_stats:
category_stats[category] = {
"total": 0,
"completed": 0,
"average_duration": 0.0,
"durations": [],
}
stats = category_stats[category]
stats["total"] += 1
if task.get("completed", False):
stats["completed"] += 1
if "duration_minutes" in task:
stats["durations"].append(task["duration_minutes"])
# Вычисляем среднюю длительность
for category, stats in category_stats.items():
if stats["durations"]:
stats["average_duration"] = sum(stats["durations"]) / len(stats["durations"])
del stats["durations"]
return category_stats
@staticmethod
def get_weekly_trend(schedules: List[Dict], days: int = 7) -> List[Dict]:
"""
Получить тренд за последние N дней.
Args:
schedules: Список расписаний
days: Количество дней
Returns:
Список словарей с данными по дням
"""
end_date = datetime.now().date()
start_date = end_date - timedelta(days=days - 1)
# Группируем расписания по датам
daily_data: Dict[str, List[Dict]] = {}
for schedule in schedules:
schedule_date = schedule.get("date")
if isinstance(schedule_date, str):
schedule_date = datetime.fromisoformat(schedule_date).date()
elif isinstance(schedule_date, datetime):
schedule_date = schedule_date.date()
if start_date <= schedule_date <= end_date:
date_str = str(schedule_date)
if date_str not in daily_data:
daily_data[date_str] = []
daily_data[date_str].append(schedule)
# Формируем тренд
trend = []
current_date = start_date
while current_date <= end_date:
date_str = str(current_date)
day_schedules = daily_data.get(date_str, [])
all_tasks = []
for sched in day_schedules:
all_tasks.extend(sched.get("tasks", []))
trend.append(
{
"date": date_str,
"completion_rate": DataAnalyzer.calculate_completion_rate(all_tasks),
"total_tasks": len(all_tasks),
"completed_tasks": sum(1 for task in all_tasks if task.get("completed", False)),
}
)
current_date += timedelta(days=1)
return trend