import gradio as gr import torch from transformers import AutoTokenizer, T5ForConditionalGeneration, pipeline from sentence_transformers import SentenceTransformer, util import requests import random import warnings from transformers import logging import os import tensorflow as tf # Set environment configurations os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' tf.get_logger().setLevel('ERROR') warnings.filterwarnings("ignore") logging.set_verbosity_error() GROQ_API_KEY = os.getenv("GROQ_API_KEY") if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY is not set. Please add it to the Secrets in your Hugging Face Space settings.") def segment_into_sentences_groq(passage): headers = { "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json" } payload = { "model": "llama3-8b-8192", "messages": [ { "role": "system", "content": "you are to segment the sentence by adding '1!2@3#' at the end of each sentence. Return only the segmented sentences only return the modified passage and nothing else do not add your responses" }, { "role": "user", "content": f"you are to segment the sentence by adding '1!2@3#' at the end of each sentence. Return only the segmented sentences only return the modified passage and nothing else do not add your responses. here is the passage:{passage}" } ], "temperature": 1.0, "max_tokens": 8192 } response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=headers) if response.status_code == 200: data = response.json() try: segmented_text = data.get("choices", [{}])[0].get("message", {}).get("content", "") sentences = segmented_text.split("1!2@3#") return [sentence.strip() for sentence in sentences if sentence.strip()] except (IndexError, KeyError): raise ValueError("Unexpected response structure from Groq API.") else: raise ValueError(f"Groq API error: {response.text}") class TextEnhancer: def __init__(self): self.device = "cuda" if torch.cuda.is_available() else "cpu" self.paraphrase_tokenizer = AutoTokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5") self.paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(self.device) self.grammar_pipeline = pipeline( "text2text-generation", model="Grammarly/coedit-large", device=0 if self.device == "cuda" else -1 ) self.similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2').to(self.device) def enhance_text(self, text, min_similarity=0.8, max_variations=3): sentences = segment_into_sentences_groq(text) enhanced_sentences = [] for sentence in sentences: if not sentence.strip(): continue inputs = self.paraphrase_tokenizer( f"paraphrase: {sentence}", return_tensors="pt", padding=True, max_length=150, truncation=True ).to(self.device) outputs = self.paraphrase_model.generate( **inputs, max_length=len(sentence.split()) + 20, num_return_sequences=max_variations, num_beams=max_variations, temperature=0.7 ) paraphrases = [ self.paraphrase_tokenizer.decode(output, skip_special_tokens=True) for output in outputs ] sentence_embedding = self.similarity_model.encode(sentence) paraphrase_embeddings = self.similarity_model.encode(paraphrases) similarities = util.cos_sim(sentence_embedding, paraphrase_embeddings) valid_paraphrases = [ para for para, sim in zip(paraphrases, similarities[0]) if sim >= min_similarity ] if valid_paraphrases: corrected = self.grammar_pipeline( valid_paraphrases[0], max_length=150, num_return_sequences=1 )[0]["generated_text"] enhanced_sentences.append(corrected) else: enhanced_sentences.append(sentence) enhanced_text = ". ".join(sentence.rstrip(".") for sentence in enhanced_sentences) + "." return enhanced_text def create_interface(): enhancer = TextEnhancer() def process_text(text, similarity_threshold): try: return enhancer.enhance_text( text, min_similarity=similarity_threshold / 100 ) except Exception as e: return f"Error: {str(e)}" interface = gr.Interface( fn=process_text, inputs=[ gr.Textbox(label="Input Text", placeholder="Enter text to enhance...", lines=10), gr.Slider(minimum=50, maximum=100, value=80, label="Minimum Semantic Similarity (%)") ], outputs=gr.Textbox(label="Enhanced Text", lines=10), title="Text Enhancement System", description="Improve text quality while preserving original meaning" ) return interface if __name__ == "__main__": interface = create_interface() interface.launch()