Spaces:
Sleeping
Sleeping
File size: 2,804 Bytes
6dfe69f 026b888 6dfe69f 026b888 6dfe69f a168dcd db269c6 3eb6cb5 db269c6 026b888 3eb6cb5 026b888 db269c6 3eb6cb5 db269c6 3eb6cb5 026b888 3eb6cb5 026b888 db269c6 6dfe69f 026b888 6dfe69f db269c6 6dfe69f db269c6 026b888 db269c6 6dfe69f 3eb6cb5 026b888 3eb6cb5 026b888 fdb3373 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from PIL import Image
import pytesseract
import numpy as np
import re
# Load Fake News Detection model
MODEL_NAME = "jy46604790/Fake-News-Bert-Detect"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
# Load summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def clean_text(text):
text = re.sub(r'\n+', ' ', text)
text = re.sub(r'[^\x00-\x7F]+', ' ', text)
return text.strip()
def ocr_image(image):
if image is None:
return ""
if not isinstance(image, Image.Image):
image = Image.fromarray(np.uint8(image))
text = pytesseract.image_to_string(image)
return clean_text(text)
def summarize_text(text):
if len(text) < 50:
return text
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
def predict_text(text):
if not text.strip():
return "β Please enter some text."
result = classifier(text)[0]
label = "π’ Real News" if result["label"] == "LABEL_1" else "π΄ Fake News"
confidence = round(result["score"] * 100, 2)
return f"{label} ({confidence}% confidence)"
# This function combines OCR + summarization + prediction
def ocr_summarize_predict(image):
raw_text = ocr_image(image)
if not raw_text:
return "β οΈ No text found in image.", ""
summarized_text = summarize_text(raw_text)
prediction = predict_text(summarized_text)
return summarized_text, prediction
with gr.Blocks() as app:
gr.Markdown("π° **Fake News Detector with OCR + Summarization + Auto Prediction**\n\nUpload an image or enter text.")
with gr.Tab("Text Input"):
text_input = gr.Textbox(lines=4, label="Enter News Text")
text_output = gr.Textbox(label="Prediction")
text_btn = gr.Button("Detect")
text_btn.click(predict_text, inputs=text_input, outputs=text_output)
with gr.Tab("Image Upload"):
img_input = gr.Image(type="numpy", label="Upload News Image")
extracted_text = gr.Textbox(label="Extracted & Summarized Text")
prediction_output = gr.Textbox(label="Prediction")
# When image uploaded or button clicked, run OCR + summarize + predict
img_input.change(ocr_summarize_predict, inputs=img_input, outputs=[extracted_text, prediction_output])
# Optional: also trigger on button click
# btn = gr.Button("Analyze Image")
# btn.click(ocr_summarize_predict, inputs=img_input, outputs=[extracted_text, prediction_output])
app.launch()
|