Spaces:

cpv2280
/

gpt2-tinystories-generator

Sleeping

App Files Files Community

gpt2-tinystories-generator / app.py

cpv2280

Update app.py

9845e29 verified 5 months ago

raw

history blame contribute delete

3.86 kB


	import os

	# Ensure Java is installed in Hugging Face Space
	if not os.path.exists("/usr/bin/java"):
	print("🔄 Installing Java manually...")
	os.system("wget https://download.java.net/openjdk/jdk11/ri/openjdk-11+28_linux-x64_bin.tar.gz")
	os.system("tar -xvzf openjdk-11+28_linux-x64_bin.tar.gz")
	os.system("mv jdk-11 java")
	os.environ["JAVA_HOME"] = os.path.abspath("java")
	os.environ["PATH"] += os.pathsep + os.path.join(os.path.abspath("java"), "bin")

	print("✅ Java is installed!")

	import language_tool_python

	# Initialize LanguageTool
	tool = language_tool_python.LanguageTool('en-US')


	import gradio as gr
	import ftfy
	import language_tool_python
	import re
	import torch
	from sentence_transformers import SentenceTransformer, util
	from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer


	device = "cuda" if torch.cuda.is_available() else "cpu" # ✅ Use GPU if available

	# Load fine-tuned GPT-2 model
	model_path = "cpv2280/gpt2-tinystories-generator" # Update if needed
	model = AutoModelForCausalLM.from_pretrained(model_path)
	tokenizer = AutoTokenizer.from_pretrained(model_path)

	# Create a text-generation pipeline
	story_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

	# Load NLP tools
	tool = language_tool_python.LanguageTool('en-UK')
	sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

	def refine_story(text):
	"""Refines the generated story by fixing encoding, grammar, and redundancy."""
	text = ftfy.fix_text(text) # Fix encoding
	matches = tool.check(text) # Check grammar
	text = language_tool_python.utils.correct(text, matches) # Apply fixes

	# Remove redundant words/phrases
	text = re.sub(r'(\b\w+\b) \1', r'\1', text) # Remove duplicate words
	text = re.sub(r'(\b\w+ and \w+\b)(,? \1)+', r'\1', text) # Remove phrase repetitions

	return text

	def detect_inconsistencies(text):
	"""Checks for logical inconsistencies by comparing sentence similarities."""
	sentences = text.split(". ")
	inconsistencies = []

	# Compare each sentence with the next one
	for i in range(len(sentences) - 1):
	similarity_score = util.pytorch_cos_sim(sentence_model.encode(sentences[i]), sentence_model.encode(sentences[i+1]))

	if similarity_score.item() < 0.3: # If similarity is low, flag as inconsistent
	inconsistencies.append(f"⚠️ Possible inconsistency detected:\n➡ {sentences[i]} \n➡ {sentences[i+1]}")

	return "\n\n".join(inconsistencies) if inconsistencies else "✅ No major inconsistencies detected."

	def story_pipeline(prompt):
	"""Generates a story, refines it, and checks inconsistencies."""
	# Generate the story
	generated = story_generator(prompt, max_length=200, do_sample=True, temperature=1.0, top_p=0.9, top_k=50, truncation=True)
	raw_story = generated[0]['generated_text']

	# Refine the generated story
	refined_story = refine_story(raw_story)

	# Detect logical inconsistencies
	inconsistencies = detect_inconsistencies(refined_story)

	return raw_story, refined_story, inconsistencies

	# ✅ Gradio Interface with Proper Logical Inconsistency Detection
	interface = gr.Interface(
	fn=story_pipeline,
	inputs=gr.Textbox(label="Enter Story Prompt", placeholder="Once upon a time..."),
	outputs=[
	gr.Textbox(label="📖 Generated Story", interactive=True), # Interactive textbox
	gr.Textbox(label="✅ Refined Story", interactive=True), # Refined output
	gr.Textbox(label="⚠️ Logical Inconsistencies", interactive=False), # Shows inconsistencies correctly
	],
	title="📖 FableWeaver AI",
	description="Generates AI-powered TinyStories using GPT-2 fine-tuned on TinyStories. Automatically refines the story and detects logical inconsistencies."
	)

	# Launch Gradio app
	interface.launch(share="True")