File size: 3,963 Bytes
a6bfba7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import json
from typing import List
from dataclasses import asdict
import re
from .types import SearchResult, StackOverflowAnswer, StackOverflowComment
def format_response(results: List[SearchResult], format_type: str = "markdown") -> str:
"""Format search results as either JSON or Markdown.
Args:
results (List[SearchResult]): List of search results to format
format_type (str, optional): Output format type - either "json" or "markdown". Defaults to "markdown".
Returns:
str: Formatted string representation of the search results
"""
if format_type == "json":
def _convert_to_dict(obj):
if hasattr(obj, "__dataclass_fields__"):
return asdict(obj)
return obj
class DataClassJSONEncoder(json.JSONEncoder):
def default(self, obj):
if hasattr(obj, "__dataclass_fields__"):
return asdict(obj)
return super().default(obj)
return json.dumps(results, cls=DataClassJSONEncoder, indent=2)
if not results:
return "No results found."
markdown = ""
for result in results:
markdown += f"# {result.question.title}\n\n"
markdown += f"**Score:** {result.question.score} | **Answers:** {result.question.answer_count}\n\n"
question_body = clean_html(result.question.body)
markdown += f"## Question\n\n{question_body}\n\n"
if result.comments and result.comments.question:
markdown += "### Question Comments\n\n"
for comment in result.comments.question:
markdown += f"- {clean_html(comment.body)} *(Score: {comment.score})*\n"
markdown += "\n"
markdown += "## Answers\n\n"
for answer in result.answers:
markdown += f"### {'✓ ' if answer.is_accepted else ''}Answer (Score: {answer.score})\n\n"
answer_body = clean_html(answer.body)
markdown += f"{answer_body}\n\n"
if (result.comments and
result.comments.answers and
answer.answer_id in result.comments.answers and
result.comments.answers[answer.answer_id]
):
markdown += "#### Answer Comments\n\n"
for comment in result.comments.answers[answer.answer_id]:
markdown += f"- {clean_html(comment.body)} *(Score: {comment.score})*\n"
markdown += "/n"
markdown += f"---\n\n[View on Stack Overflow]({result.question.link})\n\n"
return markdown
def clean_html(html_text: str) -> str:
"""Clean HTML tags from text while preserving code blocks.
Args:
html_text (str): HTML text to be cleaned
Returns:
str: Cleaned text with HTML tags removed and code blocks preserved
"""
code_blocks = []
def replace_code_block(match):
code = match.group(1) or match.group(2)
code_blocks.append(code)
return f"CODE_BLOCK_{len(code_blocks)-1}"
html_without_code = re.sub(r'<pre><code>(.*?)</code></pre>|<code>(.*?)</code>', replace_code_block, html_text, flags=re.DOTALL)
text_without_html = re.sub(r'<[^>]+>', '', html_without_code)
for i, code in enumerate(code_blocks):
if '\n' in code or len(code) > 80:
text_without_html = text_without_html.replace(f"CODE_BLOCK_{i}", f"```\n{code}\n```")
else:
text_without_html = text_without_html.replace(f"CODE_BLOCK_{i}", f"`{code}`")
text_without_html = text_without_html.replace("<", "<")
text_without_html = text_without_html.replace(">", ">")
text_without_html = text_without_html.replace("&", "&")
text_without_html = text_without_html.replace(""", "\"")
return text_without_html |