Spaces:
Running
Running
File size: 8,205 Bytes
768c740 7d19342 b8a6b71 7d19342 6784902 20bfd0b 7acb803 56f7cbb 7acb803 b8a6b71 8162519 7d19342 7acb803 7d19342 7acb803 20bfd0b 7d19342 eab3a5f 8162519 eab3a5f 7acb803 0fc20d7 7acb803 0fc20d7 7acb803 0fc20d7 8162519 0fc20d7 8162519 0fc20d7 7acb803 8162519 0fc20d7 7acb803 0fc20d7 7acb803 0fc20d7 8162519 7acb803 7d19342 7acb803 20bfd0b 7acb803 eab3a5f 8162519 eab3a5f 8162519 eab3a5f 7acb803 8162519 7acb803 8162519 7acb803 8162519 b5a890a 7acb803 eab3a5f e5db2cc eab3a5f 8162519 eab3a5f 8162519 eab3a5f 8162519 eab3a5f 8162519 eab3a5f 8162519 eab3a5f 8162519 eab3a5f e5db2cc 7acb803 eab3a5f 7acb803 8162519 7acb803 aac72ea 7acb803 aac72ea 7acb803 aac72ea 7acb803 aac72ea 7acb803 aac72ea 7acb803 8162519 aac72ea 7acb803 b5a890a 8162519 aac72ea 8162519 b5a890a 7acb803 aac72ea 7d19342 aac72ea 8162519 7acb803 aac72ea 7acb803 8162519 20bfd0b 7acb803 20bfd0b 8162519 7d19342 768c740 eab3a5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
from nltk import download, sent_tokenize
import google.generativeai as genai
import os
import re
import tempfile
import asyncio
# Download NLTK data
download('punkt')
download('punkt_tab')
# Configure Gemini API using environment variable
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
raise ValueError("GEMINI_API_KEY not found in environment variables. Please set it in your environment.")
genai.configure(api_key=api_key)
# Use gemini-1.5-flash for faster text analysis
try:
model = genai.GenerativeModel('gemini-1.5-flash')
except Exception as e:
print(f"Error initializing model: {str(e)}")
print("Available models:")
for m in genai.list_models():
print(m.name)
raise ValueError("Failed to initialize gemini-1.5-flash. Check available models above and update the model name.")
# Prompt for Gemini to analyze text
PROMPT = """
You are an AI content reviewer. Analyze the provided text for the following:
1. *Grammar Issues*: Identify and suggest corrections for grammatical errors.
2. *Legal Policy Violations*: Flag content that may violate common legal policies (e.g., copyright infringement, defamation, incitement to violence).
3. *Crude/Abusive Language*: Detect crude, offensive, or abusive language.
4. *Sensitive Topics*: Identify content related to sensitive topics such as racism, gender bias, or other forms of discrimination.
Return the results in the following markdown format:
# Blog Review Report
## Grammar Corrections
1. [Heading of issue]
- CONTENT: [Exact line or part of text with the issue]
- SUGGESTION: [Suggested correction]
- ISSUE: [Description of the issue]
2. [Heading of next issue]
- CONTENT: [Exact line or part of text with the issue]
- SUGGESTION: [Suggested correction]
- ISSUE: [Description of the issue]
[Continue numbering for additional issues or state "None detected"]
## Legal Policy Violations
- CONTENT: [Exact line or part of text with the issue]
SUGGESTION: [Suggested action or correction]
ISSUE: [Description of the legal violation]
[Or state "None detected"]
## Crude/Abusive Language
- [List instances of crude or abusive language or "None detected"]
## Sensitive Topics
- [List instances of sensitive topics or "None detected"]
For each issue, provide the exact text, a suggested correction or action, and a concise explanation. Be precise and ensure the output strictly follows the specified format.
"""
async def fetch_url_content(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
content = ' '.join([p.get_text(strip=True) for p in soup.find_all(['p', 'article', 'div'])])
return content if content else "No readable content found on the page."
except Exception as e:
return f"Error fetching URL: {str(e)}"
async def review_blog(text_input, url_input):
# Initialize output variables
button_text = "Processing..."
report = ""
download_path = None
# Determine input type
if text_input and not url_input:
input_type = "Text"
input_text = text_input
elif url_input and not text_input:
input_type = "URL"
input_text = url_input
else:
return "Review Blog", "Error: Please provide input in either the Text or URL tab, but not both.", gr.update(visible=False)
# Handle empty input
if not input_text:
return "Review Blog", "Error: No input provided.", gr.update(visible=False)
try:
# Wrap the entire process in a timeout
async def process_with_timeout():
nonlocal button_text, report, download_path
# Handle URL input
if input_type == "URL":
button_text = "Fetching content..."
content = await fetch_url_content(input_text)
if content.startswith("Error"):
return "Review Blog", content, gr.update(visible=False)
input_text_content = content
else:
input_text_content = input_text
# Tokenize input for analysis
sentences = sent_tokenize(input_text_content)
analysis_text = "\n".join(sentences)
# Update button for API call
button_text = "Generating report..."
try:
response = await asyncio.to_thread(model.generate_content, PROMPT + "\n\nText to analyze:\n" + analysis_text)
report = response.text.strip()
report = re.sub(r'^```markdown\n|```$', '', report, flags=re.MULTILINE)
except Exception as e:
report = f"Error analyzing content with Gemini: {str(e)}. Please check your API key, network connection, or model availability."
print("Available models:")
for m in genai.list_models():
print(m.name)
return "Review Blog", report, gr.update(visible=False)
# Create a temporary file to store the report
try:
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as temp_file:
temp_file.write(report)
download_path = temp_file.name
report = f"**Report generated, please scroll down to view.**\n\n{report}"
return "Review Blog", report, gr.update(visible=True, value=download_path)
except Exception as e:
return "Review Blog", f"Error creating temporary file: {str(e)}", gr.update(visible=False)
# Execute with timeout
return await asyncio.wait_for(process_with_timeout(), timeout=30)
except asyncio.TimeoutError:
return "Review Blog", "Error: Process timed out after 30 seconds.", gr.update(visible=False)
except Exception as e:
return "Review Blog", f"Unexpected error: {str(e)}", gr.update(visible=False)
# Custom CSS for hover effect, loading state, and Inter font
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
.gradio-container {
font-family: 'Inter', sans-serif !important;
}
.review-btn {
transition: all 0.3s ease;
font-weight: 500;
background-color: #2c3e50;
color: white;
border-radius: 8px;
padding: 10px 20px;
position: relative;
}
.review-btn:hover {
background-color: #4CAF50;
color: white;
transform: scale(1.05);
}
.review-btn:disabled {
opacity: 0.7;
cursor: not-allowed;
}
.review-btn:disabled::before {
content: '';
display: inline-block;
width: 16px;
height: 16px;
border: 2px solid #fff;
border-radius: 50%;
border-top-color: transparent;
animation: spin 1s linear infinite;
margin-right: 8px;
vertical-align: middle;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.tab-nav button {
font-family: 'Inter', sans-serif;
font-weight: 500;
}
input, textarea {
font-family: 'Inter', sans-serif;
}
"""
# Gradio UI with Tabs
with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as demo:
gr.Markdown("# π AI Blog Reviewer")
gr.Markdown("Enter blog text or a URL to review for grammar, legal issues, crude language, and sensitive topics. The report is generated in markdown format.")
with gr.Tabs():
with gr.TabItem("Text"):
text_input = gr.Textbox(lines=8, label="Blog Content", placeholder="Paste your blog text here...")
with gr.TabItem("URL"):
url_input = gr.Textbox(lines=1, label="Blog URL", placeholder="Enter the blog URL here...")
status_button = gr.Button(value="Review Blog", elem_classes=["review-btn"])
gr.Markdown("### π Review Report")
report_output = gr.Markdown()
download_btn = gr.File(label="Download Report", visible=False)
# Bind the review button to process inputs
status_button.click(
fn=review_blog,
inputs=[text_input, url_input],
outputs=[status_button, report_output, download_btn]
)
demo.launch() |