Spaces:
Sleeping
Sleeping
import os | |
# Ensure Java is installed in Hugging Face Space | |
if not os.path.exists("/usr/bin/java"): | |
print("π Installing Java manually...") | |
os.system("wget https://download.java.net/openjdk/jdk11/ri/openjdk-11+28_linux-x64_bin.tar.gz") | |
os.system("tar -xvzf openjdk-11+28_linux-x64_bin.tar.gz") | |
os.system("mv jdk-11 java") | |
os.environ["JAVA_HOME"] = os.path.abspath("java") | |
os.environ["PATH"] += os.pathsep + os.path.join(os.path.abspath("java"), "bin") | |
print("β Java is installed!") | |
import language_tool_python | |
# Initialize LanguageTool | |
tool = language_tool_python.LanguageTool('en-US') | |
import gradio as gr | |
import ftfy | |
import language_tool_python | |
import re | |
import torch | |
from sentence_transformers import SentenceTransformer, util | |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
device = "cuda" if torch.cuda.is_available() else "cpu" # β Use GPU if available | |
# Load fine-tuned GPT-2 model | |
model_path = "cpv2280/gpt2-tinystories-generator" # Update if needed | |
model = AutoModelForCausalLM.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# Create a text-generation pipeline | |
story_generator = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
# Load NLP tools | |
tool = language_tool_python.LanguageTool('en-UK') | |
sentence_model = SentenceTransformer('all-MiniLM-L6-v2') | |
def refine_story(text): | |
"""Refines the generated story by fixing encoding, grammar, and redundancy.""" | |
text = ftfy.fix_text(text) # Fix encoding | |
matches = tool.check(text) # Check grammar | |
text = language_tool_python.utils.correct(text, matches) # Apply fixes | |
# Remove redundant words/phrases | |
text = re.sub(r'(\b\w+\b) \1', r'\1', text) # Remove duplicate words | |
text = re.sub(r'(\b\w+ and \w+\b)(,? \1)+', r'\1', text) # Remove phrase repetitions | |
return text | |
def detect_inconsistencies(text): | |
"""Checks for logical inconsistencies by comparing sentence similarities.""" | |
sentences = text.split(". ") | |
inconsistencies = [] | |
# Compare each sentence with the next one | |
for i in range(len(sentences) - 1): | |
similarity_score = util.pytorch_cos_sim(sentence_model.encode(sentences[i]), sentence_model.encode(sentences[i+1])) | |
if similarity_score.item() < 0.3: # If similarity is low, flag as inconsistent | |
inconsistencies.append(f"β οΈ **Possible inconsistency detected:**\nβ‘ {sentences[i]} \nβ‘ {sentences[i+1]}") | |
return "\n\n".join(inconsistencies) if inconsistencies else "β No major inconsistencies detected." | |
def story_pipeline(prompt): | |
"""Generates a story, refines it, and checks inconsistencies.""" | |
# Generate the story | |
generated = story_generator(prompt, max_length=200, do_sample=True, temperature=1.0, top_p=0.9, top_k=50, truncation=True) | |
raw_story = generated[0]['generated_text'] | |
# Refine the generated story | |
refined_story = refine_story(raw_story) | |
# Detect logical inconsistencies | |
inconsistencies = detect_inconsistencies(refined_story) | |
return raw_story, refined_story, inconsistencies | |
# β Gradio Interface with Proper Logical Inconsistency Detection | |
interface = gr.Interface( | |
fn=story_pipeline, | |
inputs=gr.Textbox(label="Enter Story Prompt", placeholder="Once upon a time..."), | |
outputs=[ | |
gr.Textbox(label="π Generated Story", interactive=True), # Interactive textbox | |
gr.Textbox(label="β Refined Story", interactive=True), # Refined output | |
gr.Textbox(label="β οΈ Logical Inconsistencies", interactive=False), # Shows inconsistencies correctly | |
], | |
title="π FableWeaver AI", | |
description="Generates AI-powered TinyStories using GPT-2 fine-tuned on TinyStories. Automatically refines the story and detects logical inconsistencies." | |
) | |
# Launch Gradio app | |
interface.launch(share="True") | |