2025-10-13 17:26:41 +03:00

629 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Main reviewer agent using LangGraph"""
from typing import TypedDict, List, Dict, Any, Optional
from langgraph.graph import StateGraph, END
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from app.agents.tools import CodeAnalyzer, detect_language, should_review_file
from app.agents.prompts import SYSTEM_PROMPT, SUMMARY_PROMPT
from app.models import Review, Comment, PullRequest, Repository
from app.models.review import ReviewStatusEnum
from app.models.comment import SeverityEnum
from app.services import GiteaService, GitHubService, BitbucketService
from app.services.base import BaseGitService
from app.config import settings
class ReviewState(TypedDict):
"""State for the review workflow"""
review_id: int
pr_number: int
repository_id: int
status: str
files: List[Dict[str, Any]]
analyzed_files: List[str]
comments: List[Dict[str, Any]]
error: Optional[str]
git_service: Optional[BaseGitService]
class ReviewerAgent:
"""Agent for reviewing code using LangGraph"""
def __init__(self, db: AsyncSession):
self.db = db
self.analyzer = CodeAnalyzer(
ollama_base_url=settings.ollama_base_url,
model=settings.ollama_model
)
self.graph = self._build_graph()
def _build_graph(self) -> StateGraph:
"""Build the LangGraph workflow"""
workflow = StateGraph(ReviewState)
# Add nodes
workflow.add_node("fetch_pr_info", self.fetch_pr_info)
workflow.add_node("fetch_files", self.fetch_files)
workflow.add_node("analyze_files", self.analyze_files)
workflow.add_node("post_comments", self.post_comments)
workflow.add_node("complete_review", self.complete_review)
# Set entry point
workflow.set_entry_point("fetch_pr_info")
# Add edges
workflow.add_edge("fetch_pr_info", "fetch_files")
workflow.add_edge("fetch_files", "analyze_files")
workflow.add_edge("analyze_files", "post_comments")
workflow.add_edge("post_comments", "complete_review")
workflow.add_edge("complete_review", END)
return workflow.compile()
def _remove_think_blocks(self, text: str) -> str:
"""Remove <think>...</think> blocks from text"""
import re
# Remove <think> blocks
text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL | re.IGNORECASE)
# Remove extra whitespace
text = re.sub(r'\n\n+', '\n\n', text)
return text.strip()
def _escape_html_in_text(self, text: str) -> str:
"""Escape HTML tags in text to prevent Markdown from hiding them
Wraps code-like content (anything with < >) in backticks.
"""
import re
# Pattern to find HTML-like tags (e.g., <CharacterItem>, <img>)
# We want to wrap them in backticks so they display correctly
def replace_tag(match):
tag = match.group(0)
# If it's already in backticks or code block, skip
return f"`{tag}`"
# Find all <...> patterns and wrap them
text = re.sub(r'<[^>]+>', replace_tag, text)
return text
def _get_git_service(self, repository: Repository) -> BaseGitService:
"""Get appropriate Git service for repository"""
from app.utils import decrypt_token
from app.config import settings
# Parse repository URL to get owner and name
# Assuming URL format: https://git.example.com/owner/repo
parts = repository.url.rstrip('/').split('/')
repo_name = parts[-1].replace('.git', '')
repo_owner = parts[-2]
base_url = '/'.join(parts[:-2])
# Определяем токен: проектный или мастер
if repository.api_token:
# Используем проектный токен
try:
decrypted_token = decrypt_token(repository.api_token)
print(f" 🔑 Используется проектный токен")
except ValueError as e:
raise ValueError(f"Не удалось расшифровать API токен для репозитория {repository.name}: {str(e)}")
else:
# Используем мастер токен
platform = repository.platform.value.lower()
if platform == "gitea":
decrypted_token = settings.master_gitea_token
elif platform == "github":
decrypted_token = settings.master_github_token
elif platform == "bitbucket":
decrypted_token = settings.master_bitbucket_token
else:
raise ValueError(f"Unsupported platform: {repository.platform}")
if not decrypted_token:
raise ValueError(
f"API токен не указан для репозитория {repository.name} "
f"и мастер токен для {platform} не настроен в .env (MASTER_{platform.upper()}_TOKEN)"
)
print(f" 🔑 Используется мастер {platform} токен")
if repository.platform.value == "gitea":
return GiteaService(base_url, decrypted_token, repo_owner, repo_name)
elif repository.platform.value == "github":
return GitHubService(base_url, decrypted_token, repo_owner, repo_name)
elif repository.platform.value == "bitbucket":
return BitbucketService(base_url, decrypted_token, repo_owner, repo_name)
else:
raise ValueError(f"Unsupported platform: {repository.platform}")
async def fetch_pr_info(self, state: ReviewState) -> ReviewState:
"""Fetch PR information"""
# Send step event
if hasattr(self, '_stream_callback') and self._stream_callback:
await self._stream_callback({
"type": "agent_step",
"step": "fetch_pr_info",
"message": "Получение информации о PR..."
})
try:
# Update review status
result = await self.db.execute(
select(Review).where(Review.id == state["review_id"])
)
review = result.scalar_one()
review.status = ReviewStatusEnum.FETCHING
await self.db.commit()
# Get repository
result = await self.db.execute(
select(Repository).where(Repository.id == state["repository_id"])
)
repository = result.scalar_one()
# Initialize Git service
git_service = self._get_git_service(repository)
state["git_service"] = git_service
# Fetch PR info
pr_info = await git_service.get_pull_request(state["pr_number"])
print("\n" + "📋"*40)
print("ИНФОРМАЦИЯ О PR")
print("📋"*40)
print(f"\n📝 Название: {pr_info.title}")
print(f"👤 Автор: {pr_info.author}")
print(f"🔀 Ветки: {pr_info.source_branch}{pr_info.target_branch}")
print(f"📄 Описание:")
print("-" * 80)
print(pr_info.description if pr_info.description else "(без описания)")
print("-" * 80)
print("📋"*40 + "\n")
# Store PR info in state
state["pr_info"] = {
"title": pr_info.title,
"description": pr_info.description,
"author": pr_info.author,
"source_branch": pr_info.source_branch,
"target_branch": pr_info.target_branch
}
state["status"] = "pr_info_fetched"
return state
except Exception as e:
print(f"❌ ОШИБКА в fetch_pr_info: {e}")
import traceback
traceback.print_exc()
state["error"] = str(e)
state["status"] = "failed"
return state
async def fetch_files(self, state: ReviewState) -> ReviewState:
"""Fetch changed files in PR"""
# Send step event
if hasattr(self, '_stream_callback') and self._stream_callback:
await self._stream_callback({
"type": "agent_step",
"step": "fetch_files",
"message": "Загрузка измененных файлов..."
})
try:
git_service = state["git_service"]
print("\n" + "📥"*40)
print("ПОЛУЧЕНИЕ ФАЙЛОВ ИЗ PR")
print("📥"*40)
# Get changed files
files = await git_service.get_pr_files(state["pr_number"])
print(f"\n📊 Получено файлов из API: {len(files)}")
for i, f in enumerate(files, 1):
print(f"\n {i}. {f.filename}")
print(f" Status: {f.status}")
print(f" +{f.additions} -{f.deletions}")
print(f" Patch: {'ДА' if f.patch else 'НЕТ'} ({len(f.patch) if f.patch else 0} символов)")
if f.patch:
print(f" Первые 200 символов patch:")
print(f" {f.patch[:200]}...")
# Filter files that should be reviewed
reviewable_files = []
skipped_files = []
for f in files:
if should_review_file(f.filename):
reviewable_files.append({
"path": f.filename,
"status": f.status,
"additions": f.additions,
"deletions": f.deletions,
"patch": f.patch,
"language": detect_language(f.filename)
})
else:
skipped_files.append(f.filename)
print(f"\n✅ Файлов для ревью: {len(reviewable_files)}")
for rf in reviewable_files:
print(f" - {rf['path']} ({rf['language']})")
if skipped_files:
print(f"\n⏭️ Пропущено файлов: {len(skipped_files)}")
for sf in skipped_files:
print(f" - {sf}")
print("📥"*40 + "\n")
state["files"] = reviewable_files
state["status"] = "files_fetched"
# Update review
result = await self.db.execute(
select(Review).where(Review.id == state["review_id"])
)
review = result.scalar_one()
review.status = ReviewStatusEnum.ANALYZING
await self.db.commit()
return state
except Exception as e:
print(f"❌ ОШИБКА в fetch_files: {e}")
import traceback
traceback.print_exc()
state["error"] = str(e)
state["status"] = "failed"
return state
async def analyze_files(self, state: ReviewState) -> ReviewState:
"""Analyze files and generate comments"""
# Send step event
if hasattr(self, '_stream_callback') and self._stream_callback:
await self._stream_callback({
"type": "agent_step",
"step": "analyze_files",
"message": "Анализ кода с помощью AI..."
})
try:
all_comments = []
print("\n" + "🔬"*40)
print("НАЧАЛО АНАЛИЗА ФАЙЛОВ")
print("🔬"*40)
print(f"Файлов для анализа: {len(state['files'])}")
for i, file_info in enumerate(state["files"], 1):
file_path = file_info["path"]
patch = file_info.get("patch")
language = file_info.get("language", "text")
print(f"\n📂 Файл {i}/{len(state['files'])}: {file_path}")
print(f" Язык: {language}")
print(f" Размер patch: {len(patch) if patch else 0} символов")
print(f" Additions: {file_info.get('additions')}, Deletions: {file_info.get('deletions')}")
if not patch or len(patch) < 10:
print(f" ⚠️ ПРОПУСК: patch пустой или слишком маленький")
continue
# Analyze diff with PR context
pr_info = state.get("pr_info", {})
comments = await self.analyzer.analyze_diff(
file_path=file_path,
diff=patch,
language=language,
pr_title=pr_info.get("title", ""),
pr_description=pr_info.get("description", "")
)
print(f" 💬 Получено комментариев: {len(comments)}")
# Add file path to each comment
for comment in comments:
comment["file_path"] = file_path
all_comments.append(comment)
print(f"\n✅ ИТОГО комментариев: {len(all_comments)}")
print("🔬"*40 + "\n")
state["comments"] = all_comments
state["status"] = "analyzed"
# Update review
result = await self.db.execute(
select(Review).where(Review.id == state["review_id"])
)
review = result.scalar_one()
review.files_analyzed = len(state["files"])
review.status = ReviewStatusEnum.COMMENTING
await self.db.commit()
return state
except Exception as e:
print(f"❌ ОШИБКА в analyze_files: {e}")
import traceback
traceback.print_exc()
state["error"] = str(e)
state["status"] = "failed"
return state
async def post_comments(self, state: ReviewState) -> ReviewState:
"""Post comments to PR"""
# Send step event
if hasattr(self, '_stream_callback') and self._stream_callback:
await self._stream_callback({
"type": "agent_step",
"step": "post_comments",
"message": "Публикация комментариев в PR..."
})
try:
# Save comments to database
result = await self.db.execute(
select(Review).where(Review.id == state["review_id"])
)
review = result.scalar_one()
db_comments = []
for comment_data in state["comments"]:
# Фильтруем <think> блоки из сообщения
message = comment_data.get("message", "")
message = self._remove_think_blocks(message)
# Экранируем HTML теги (чтобы они не исчезали в Markdown)
message = self._escape_html_in_text(message)
comment = Comment(
review_id=review.id,
file_path=comment_data["file_path"],
line_number=comment_data.get("line", 1),
content=message,
severity=SeverityEnum(comment_data.get("severity", "INFO").lower()),
posted=False
)
self.db.add(comment)
db_comments.append({**comment_data, "message": message})
await self.db.commit()
# Post to Git platform
git_service = state["git_service"]
pr_info = state.get("pr_info", {})
# Generate summary
summary = await self.analyzer.generate_summary(
all_comments=db_comments,
pr_title=pr_info.get("title", ""),
pr_description=pr_info.get("description", "")
)
# Фильтруем <think> блоки из summary
summary = self._remove_think_blocks(summary)
# Экранируем HTML теги в summary
summary = self._escape_html_in_text(summary)
if db_comments:
# Format comments for API
formatted_comments = [
{
"file_path": c["file_path"],
"line_number": c.get("line", 1),
"content": f"**{c.get('severity', 'INFO').upper()}**: {c.get('message', '')}"
}
for c in db_comments
]
try:
# Determine review status based on severity
has_errors = any(c.get('severity', '').upper() == 'ERROR' for c in db_comments)
event = "REQUEST_CHANGES" if has_errors else "COMMENT"
await git_service.create_review(
pr_number=state["pr_number"],
comments=formatted_comments,
body=summary,
event=event
)
# Mark comments as posted
result = await self.db.execute(
select(Comment).where(Comment.review_id == review.id)
)
comments = result.scalars().all()
for comment in comments:
comment.posted = True
await self.db.commit()
except Exception as e:
print(f"Error posting comments to Git platform: {e}")
# Continue even if posting fails
else:
# No issues found - approve PR
try:
await git_service.create_review(
pr_number=state["pr_number"],
comments=[],
body=summary,
event="APPROVE" # Approve if no issues
)
except Exception as e:
print(f"Error posting approval: {e}")
review.comments_generated = len(db_comments)
await self.db.commit()
state["status"] = "commented"
return state
except Exception as e:
state["error"] = str(e)
state["status"] = "failed"
return state
async def complete_review(self, state: ReviewState) -> ReviewState:
"""Complete the review"""
try:
result = await self.db.execute(
select(Review).where(Review.id == state["review_id"])
)
review = result.scalar_one()
if state.get("error"):
review.status = ReviewStatusEnum.FAILED
review.error_message = state["error"]
else:
review.status = ReviewStatusEnum.COMPLETED
from datetime import datetime
review.completed_at = datetime.utcnow()
await self.db.commit()
state["status"] = "completed"
return state
except Exception as e:
state["error"] = str(e)
state["status"] = "failed"
return state
async def run_review(
self,
review_id: int,
pr_number: int,
repository_id: int
) -> Dict[str, Any]:
"""Run the review workflow"""
initial_state: ReviewState = {
"review_id": review_id,
"pr_number": pr_number,
"repository_id": repository_id,
"status": "pending",
"files": [],
"analyzed_files": [],
"comments": [],
"error": None,
"git_service": None
}
final_state = await self.graph.ainvoke(initial_state)
return final_state
async def run_review_stream(
self,
review_id: int,
pr_number: int,
repository_id: int,
on_event: callable = None
) -> Dict[str, Any]:
"""Run the review workflow with streaming events"""
print(f"\n{'='*80}")
print(f"🎬 Starting review stream for PR #{pr_number}")
print(f" Review ID: {review_id}")
print(f" Callback: {on_event is not None}")
print(f"{'='*80}\n")
# Store callback in instance for access in nodes
self._stream_callback = on_event
initial_state: ReviewState = {
"review_id": review_id,
"pr_number": pr_number,
"repository_id": repository_id,
"status": "pending",
"files": [],
"analyzed_files": [],
"comments": [],
"error": None,
"git_service": None
}
final_state = None
event_count = 0
callback_count = 0
# Stream through the graph
print(f"📊 Starting graph.astream() with mode=['updates']\n")
try:
async for event in self.graph.astream(
initial_state,
stream_mode=["updates"]
):
event_count += 1
print(f"\n{''*80}")
print(f"📨 STREAM Event #{event_count}")
print(f" Type: {type(event).__name__}")
print(f" Is tuple: {isinstance(event, tuple)}")
print(f" Content: {event}")
print(f"{''*80}")
# LangGraph returns events as tuple: ('updates', {node_name: node_output})
if isinstance(event, tuple) and len(event) == 2:
event_type, event_data = event[0], event[1]
print(f"✓ Tuple detected:")
print(f" [0] event_type: '{event_type}'")
print(f" [1] event_data type: {type(event_data).__name__}")
# Handle 'updates' events
if event_type == 'updates' and isinstance(event_data, dict):
print(f"✓ Updates event with dict data")
for node_name, node_state in event_data.items():
print(f"\n 🔔 Node: '{node_name}'")
print(f" State type: {type(node_state).__name__}")
if on_event:
callback_count += 1
print(f" 📤 Calling callback #{callback_count}...")
try:
await on_event({
"type": "agent_step",
"step": node_name,
"message": f"Шаг: {node_name}",
"data": {
"status": node_state.get("status") if isinstance(node_state, dict) else None
}
})
print(f" ✓ Callback executed successfully")
except Exception as e:
print(f" ❌ Callback error: {e}")
import traceback
traceback.print_exc()
else:
print(f" ⚠️ No callback set!")
# Store final state
if isinstance(node_state, dict):
final_state = node_state
else:
print(f" ⚠️ Not an 'updates' event or data is not dict")
print(f" event_type={event_type}, isinstance(event_data, dict)={isinstance(event_data, dict)}")
else:
print(f" ❌ NOT a tuple or wrong length!")
print(f" isinstance(event, tuple)={isinstance(event, tuple)}")
if isinstance(event, tuple):
print(f" len(event)={len(event)}")
except Exception as e:
print(f"❌ Error in graph streaming: {e}")
import traceback
traceback.print_exc()
print(f"✅ Graph streaming completed. Total events: {event_count}")
# Clear callback
self._stream_callback = None
return final_state or initial_state