Spaces:

aliehhe
/

synthetic-qa-generator

Sleeping

App Files Files Community

aliehhe commited on 11 days ago

Commit

9abbad9

verified ·

1 Parent(s): 371c7ad

Upload 2 files

Browse files

Files changed (2) hide show

app.py +348 -233
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,277 +1,415 @@
 """
-Synthetic Q&A Dataset Generator for Hugging Face Spaces
-Uses remote inference via Hugging Face InferenceClient (recommended for free Spaces)
 """
-import os
 import json
 import time
 import re
 import tempfile
 from typing import List, Tuple
 import gradio as gr
-from huggingface_hub import InferenceClient
 # ---------------------------
-# Config / defaults
 # ---------------------------
-DEFAULT_PROMPT = """You are an expert educational content creator. Generate clear question-answer pairs from the provided text. Provide concise answers strictly based on the text. Include keywords and 0-2 short examples when relevant."""
-# A lightweight remote model name (TinyLlama hosted on HF)
-REMOTE_DEFAULT_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 # ---------------------------
-# Helpers: text processing
 # ---------------------------
-def chunk_text(text: str, words_per_chunk: int = 1200) -> List[str]:
     words = text.split()
     if not words:
         return []
-    return [' '.join(words[i:i+words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
-def extract_json_array_from_text(text: str) -> List:
-    try:
-        m = re.search(r'\[[\s\S]*\]', text)
-        if m:
-            return json.loads(m.group())
-        # fallback: maybe the model returned a top-level object with qa_pairs
-        m2 = re.search(r'\{[\s\S]*\}', text)
-        if m2:
-            parsed = json.loads(m2.group())
-            if isinstance(parsed, dict) and "qa_pairs" in parsed:
-                return parsed["qa_pairs"]
-    except Exception:
-        pass
-    return []
-def create_fallback_qa(chunk: str, idx: int) -> dict:
-    first_sentence = chunk.strip().split(".")[0][:300]
     return {
-        "question": f"Summarize the main idea of chunk {idx+1}.",
-        "answer": first_sentence if first_sentence else "No content available.",
-        "complexity": "basic",
-        "keywords": ["summary", "generated"],
-        "examples": []
     }
-# ---------------------------
-# Remote backend: InferenceClient
-# ---------------------------
-class RemoteBackend:
-    def __init__(self, model_name: str = REMOTE_DEFAULT_MODEL, token: str = None):
-        self.token = token or os.environ.get("HF_TOKEN")
-        self.client = None
-        self.model_name = model_name
-    def _init(self):
-        if self.client is None:
-            if self.token:
-                self.client = InferenceClient(token=self.token)
             else:
-                self.client = InferenceClient()
-        return self.client
-    def generate(self, prompt: str, max_new_tokens: int = 256):
-        client = self._init()
         try:
-            resp = client.text_generation(
-                prompt,
-                model=self.model_name,
-                max_new_tokens=max_new_tokens,
-                return_full_text=False,
-                temperature=0.7
-            )
-            if isinstance(resp, dict):
-                if "generated_text" in resp:
-                    return resp["generated_text"]
-                return str(resp)
-            return str(resp)
         except Exception as e:
-            raise RuntimeError(f"Remote generation error: {e}")
 # ---------------------------
-# Core generation orchestration
 # ---------------------------
-def generate_qa_pairs_backend(
     text_content: str,
     custom_prompt: str,
     num_questions: int,
     model_name: str,
-    hf_token: str,
     progress=gr.Progress()
 ) -> Tuple[str, str]:
-    """Generate Q&A pairs using remote inference."""
     if not text_content or not text_content.strip():
-        return None, "Error: Please provide text content to process."
-    # Instantiate backend
-    try:
-        backend = RemoteBackend(model_name=model_name, token=hf_token)
-    except Exception as e:
-        return None, f"Backend initialization error: {e}"
-    chunks = chunk_text(text_content, words_per_chunk=1200)
     if not chunks:
-        return None, "Error: No text chunks created from input."
-    total_questions = max(1, int(num_questions))
-    q_per_chunk = max(1, total_questions // len(chunks))
-    max_questions = min(total_questions, 3000)
     all_qas = []
-    progress(0.0, desc="Starting generation...")
     for idx, chunk in enumerate(chunks):
-        progress(idx / len(chunks) * 0.9, desc=f"Processing chunk {idx+1}/{len(chunks)}")
-        prompt = f"""{custom_prompt}
-TEXT TO PROCESS:
-{chunk}
-INSTRUCTIONS:
-Generate EXACTLY {q_per_chunk} question-answer pairs as a JSON array.
-Each entry must be an object with keys: "question", "answer", "complexity", "keywords", "examples".
-Keep answers concise (one paragraph). Return ONLY the JSON array.
-"""
-        try:
-            raw = backend.generate(prompt, max_new_tokens=512)
-            raw_text = raw.decode("utf-8") if isinstance(raw, (bytes, bytearray)) else str(raw)
-            qa_list = extract_json_array_from_text(raw_text)
-            if not qa_list:
-                try:
-                    parsed = json.loads(raw_text)
-                    if isinstance(parsed, list):
-                        qa_list = parsed
-                    elif isinstance(parsed, dict) and "qa_pairs" in parsed:
-                        qa_list = parsed["qa_pairs"]
-                except Exception:
-                    qa_list = []
-            if not qa_list:
-                for i in range(q_per_chunk):
-                    all_qas.append(create_fallback_qa(chunk, idx))
-            else:
-                if len(qa_list) > q_per_chunk:
-                    qa_list = qa_list[:q_per_chunk]
-                for entry in qa_list:
-                    if not isinstance(entry, dict):
-                        continue
-                    entry.setdefault("question", entry.get("question", "No question"))
-                    entry.setdefault("answer", entry.get("answer", "No answer"))
-                    entry.setdefault("complexity", entry.get("complexity", "intermediate"))
-                    entry.setdefault("keywords", entry.get("keywords", []))
-                    entry.setdefault("examples", entry.get("examples", []))
-                    all_qas.append(entry)
-        except Exception as e:
-            print(f"Error processing chunk {idx+1}: {e}")
-            for i in range(q_per_chunk):
-                all_qas.append(create_fallback_qa(chunk, idx))
-        time.sleep(0.3)
-    progress(0.95, desc="Finalizing...")
     if not all_qas:
-        return None, "Error: No Q&A pairs were generated."
-    if len(all_qas) > max_questions:
-        all_qas = all_qas[:max_questions]
     output_data = {
         "metadata": {
             "total_pairs": len(all_qas),
             "generated_at": time.strftime("%Y-%m-%d %H:%M:%S"),
             "source_chunks": len(chunks),
-            "model": model_name
         },
         "qa_pairs": all_qas
     }
     json_str = json.dumps(output_data, indent=2, ensure_ascii=False)
-    summary = f"✅ Generated {len(all_qas)} Q&A pairs from {len(chunks)} chunks using {model_name}."
-    progress(1.0, desc="Done")
     return json_str, summary
 # ---------------------------
-# Gradio UI
 # ---------------------------
 def safe_read_file(file_obj):
     if file_obj is None:
         return ""
     try:
         if hasattr(file_obj, "name"):
-            with open(file_obj.name, "r", encoding="utf-8") as f:
                 return f.read()
-        if isinstance(file_obj, (bytes, bytearray)):
-            return file_obj.decode("utf-8", errors="ignore")
     except Exception as e:
-        print("File read error:", e)
     return ""
-with gr.Blocks(title="Synthetic Q&A Dataset Generator") as demo:
-    gr.Markdown("# 🤖 Synthetic Q&A Dataset Generator")
-    gr.Markdown("Generate question-answer pairs from your text using AI models via Hugging Face Inference API")
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### ⚙️ Configuration")
-            remote_model_input = gr.Textbox(
-                label="Model (HF repo)",
-                value=REMOTE_DEFAULT_MODEL,
-                info="Use any text generation model from Hugging Face"
-            )
-            hf_token_input = gr.Textbox(
-                label="HF Token (optional)",
-                value="",
-                type="password",
-                placeholder="For higher rate limits, set HF_TOKEN in Space Secrets"
             )
             num_questions = gr.Slider(
-                minimum=10,
-                maximum=2000,
-                value=200,
-                step=10,
-                label="Number of Q&A Pairs"
             )
             prompt_input = gr.Textbox(
-                label="Custom prompt (instruction)",
-                value=DEFAULT_PROMPT,
-                lines=6
-            )
-            save_checkbox = gr.Checkbox(
-                label="Save JSON output to downloadable file",
-                value=True
             )
-            gr.Markdown("### 📄 Input")
-            with gr.Tab("Upload .txt"):
-                file_input = gr.File(file_types=[".txt"], label="Upload .txt file")
-                generate_file_btn = gr.Button("Generate from File", variant="primary")
-            with gr.Tab("Paste text"):
-                text_input = gr.Textbox(lines=12, placeholder="Paste your text here...")
-                generate_text_btn = gr.Button("Generate from Text", variant="primary")
         with gr.Column(scale=1):
-            gr.Markdown("### 📥 Output")
-            status_output = gr.Textbox(label="Status", lines=4, interactive=False)
-            json_output = gr.Textbox(
-                label="Generated JSON",
-                lines=18,
-                interactive=False,
                 show_copy_button=True
             )
-            download_file = gr.File(label="Download JSON", interactive=False)
-    def _run_from_text(text, prompt, num_q, model_name, hf_token, save_to_file):
-        if not text or not text.strip():
-            return "❌ Error: Please provide text content.", "", None
-        json_str, status = generate_qa_pairs_backend(
-            text, prompt, num_q, model_name, hf_token, progress=gr.Progress()
-        )
-        if json_str and save_to_file:
             tmp = tempfile.NamedTemporaryFile(
                 prefix="qa_dataset_",
                 suffix=".json",
@@ -283,53 +421,30 @@ with gr.Blocks(title="Synthetic Q&A Dataset Generator") as demo:
             tmp.flush()
             tmp.close()
             return status, json_str, tmp.name
-        return status, json_str, None
-    def _run_from_file(file_obj, prompt, num_q, model_name, hf_token, save_to_file):
         if file_obj is None:
             return "❌ Error: Please upload a file.", "", None
         content = safe_read_file(file_obj)
-        if not content or not content.strip():
-            return "❌ Error: File is empty or could not be read.", "", None
-        json_str, status = generate_qa_pairs_backend(
-            content, prompt, num_q, model_name, hf_token, progress=gr.Progress()
-        )
-        if json_str and save_to_file:
-            tmp = tempfile.NamedTemporaryFile(
-                prefix="qa_dataset_",
-                suffix=".json",
-                delete=False,
-                mode="w",
-                encoding="utf-8"
-            )
-            tmp.write(json_str)
-            tmp.flush()
-            tmp.close()
-            return status, json_str, tmp.name
-        return status, json_str, None
-    generate_text_btn.click(
-        fn=_run_from_text,
-        inputs=[text_input, prompt_input, num_questions, remote_model_input, hf_token_input, save_checkbox],
-        outputs=[status_output, json_output, download_file]
     )
-    generate_file_btn.click(
-        fn=_run_from_file,
-        inputs=[file_input, prompt_input, num_questions, remote_model_input, hf_token_input, save_checkbox],
-        outputs=[status_output, json_output, download_file]
     )
-    gr.Markdown("---")
-    gr.Markdown("""
-    **💡 Tips:**
-    - For better results, try models like `mistralai/Mistral-7B-Instruct-v0.2` or `meta-llama/Llama-3.2-1B-Instruct`
-    - Set your HF_TOKEN in Space Settings → Repository Secrets for higher rate limits
-    - The generator works best with well-structured text (articles, documentation, etc.)
-    """)
 if __name__ == "__main__":
     demo.launch()

 """
+FREE Synthetic Q&A Generator - Optimized for Free HF Spaces
+Uses LIGHTWEIGHT serverless API - No heavy models loaded in Space!
+Generates 500+ Q&A pairs using HF's free inference endpoints
 """
 import json
 import time
 import re
 import tempfile
 from typing import List, Tuple
+import requests
 import gradio as gr
 # ---------------------------
+# LIGHTWEIGHT Models (Work on FREE HF Inference API)
 # ---------------------------
+MODELS = [
+    "google/flan-t5-base",  # 250M params - FAST & FREE
+    # "google/flan-t5-large", # 780M params - Good quality
+    # "facebook/bart-large",  # 400M params - Good for Q&A
+]
+DEFAULT_MODEL = "google/flan-t5-base"
+DEFAULT_PROMPT = """Create question-answer pairs from this text. Make questions clear and answers detailed."""
 # ---------------------------
+# Text Processing
 # ---------------------------
+def chunk_text(text: str, words_per_chunk: int = 500) -> List[str]:
+    """Split text into smaller chunks for processing"""
     words = text.split()
     if not words:
         return []
+    chunks = []
+    for i in range(0, len(words), words_per_chunk):
+        chunk = ' '.join(words[i:i+words_per_chunk])
+        chunks.append(chunk)
+    return chunks
+def create_structured_qa(question: str, answer: str, chunk_idx: int) -> dict:
+    """Create properly structured Q&A entry"""
+    # Extract potential keywords from question and answer
+    words = (question + " " + answer).lower().split()
+    keywords = list(set([w for w in words if len(w) > 4]))[:5]
+    # Determine complexity based on answer length
+    answer_len = len(answer.split())
+    if answer_len < 20:
+        complexity = "basic"
+    elif answer_len < 50:
+        complexity = "intermediate"
+    else:
+        complexity = "advanced"
     return {
+        "question": question.strip(),
+        "answer": answer.strip(),
+        "complexity": complexity,
+        "keywords": keywords,
+        "examples": [],
+        "source_chunk": chunk_idx + 1
     }
+def generate_qa_from_chunk(chunk: str, chunk_idx: int, qa_per_chunk: int) -> List[dict]:
+    """Generate multiple Q&A pairs from a single chunk using simple extraction"""
+    qa_pairs = []
+    # Split chunk into sentences
+    sentences = [s.strip() for s in chunk.split('.') if len(s.strip()) > 20]
+    if not sentences:
+        return []
+    # Generate different types of questions
+    for i in range(min(qa_per_chunk, len(sentences))):
+        if i < len(sentences):
+            sentence = sentences[i]
+            # Create different question types
+            if i % 3 == 0:
+                question = f"What information is provided about the topic in section {chunk_idx + 1}?"
+                answer = sentence + (". " + sentences[i+1] if i+1 < len(sentences) else "")
+            elif i % 3 == 1:
+                question = f"Can you explain the key point from section {chunk_idx + 1}, part {i+1}?"
+                answer = sentence
             else:
+                question = f"What does the text state in section {chunk_idx + 1}?"
+                answer = ". ".join(sentences[max(0, i-1):min(len(sentences), i+2)])
+            qa_pairs.append(create_structured_qa(question, answer, chunk_idx))
+    return qa_pairs
+# ---------------------------
+# FREE Serverless Inference (No model loaded in Space!)
+# ---------------------------
+def query_hf_api(model_name: str, prompt: str, max_retries: int = 2) -> str:
+    """
+    Query HF Inference API - Model runs on HF servers, NOT in your Space!
+    This is why it's free and doesn't require resources in your Space.
+    """
+    API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": 200,
+            "temperature": 0.7,
+            "do_sample": False,
+            "return_full_text": False
+        }
+    }
+    for attempt in range(max_retries):
         try:
+            response = requests.post(API_URL, json=payload, timeout=30)
+            if response.status_code == 200:
+                result = response.json()
+                if isinstance(result, list) and len(result) > 0:
+                    return result[0].get('generated_text', '')
+                elif isinstance(result, dict):
+                    return result.get('generated_text', '')
+                return str(result)
+            elif response.status_code == 503:
+                # Model loading - wait briefly
+                time.sleep(15)
+                continue
+            else:
+                time.sleep(3)
         except Exception as e:
+            print(f"API error (attempt {attempt+1}): {e}")
+            if attempt < max_retries - 1:
+                time.sleep(5)
+    return ""
 # ---------------------------
+# SMART Generation: Mix AI + Rule-based
 # ---------------------------
+def generate_dataset(
     text_content: str,
     custom_prompt: str,
     num_questions: int,
     model_name: str,
     progress=gr.Progress()
 ) -> Tuple[str, str]:
+    """
+    Smart hybrid approach:
+    1. Use AI for some Q&A (when API works)
+    2. Use rule-based extraction for others (always works)
+    This ensures you ALWAYS get 500+ Q&A pairs!
+    """
     if not text_content or not text_content.strip():
+        return None, "❌ Error: Please provide text content."
+    # Split text into chunks
+    chunks = chunk_text(text_content, words_per_chunk=500)
     if not chunks:
+        return None, "❌ Error: Text too short."
+    total_questions = max(50, int(num_questions))
+    qa_per_chunk = max(2, total_questions // len(chunks))
     all_qas = []
+    ai_generated = 0
+    rule_based = 0
+    progress(0.0, desc="🚀 Starting generation...")
     for idx, chunk in enumerate(chunks):
+        progress_val = (idx / len(chunks)) * 0.9
+        progress(progress_val, desc=f"📝 Chunk {idx+1}/{len(chunks)}")
+        # Try AI generation first (for some chunks)
+        ai_qas = []
+        if idx % 2 == 0:  # Try AI every other chunk to save time
+            try:
+                prompt = f"""{custom_prompt}
+Text: {chunk[:400]}
+Generate {min(3, qa_per_chunk)} questions and answers. Format:
+Q: [question]
+A: [answer]"""
+                response = query_hf_api(model_name, prompt)
+                if response:
+                    # Parse Q&A from response
+                    lines = response.split('\n')
+                    current_q = None
+                    current_a = None
+                    for line in lines:
+                        line = line.strip()
+                        if line.startswith('Q:'):
+                            if current_q and current_a:
+                                ai_qas.append(create_structured_qa(current_q, current_a, idx))
+                            current_q = line[2:].strip()
+                            current_a = None
+                        elif line.startswith('A:'):
+                            current_a = line[2:].strip()
+                        elif current_a and line:
+                            current_a += " " + line
+                    if current_q and current_a:
+                        ai_qas.append(create_structured_qa(current_q, current_a, idx))
+                    if ai_qas:
+                        ai_generated += len(ai_qas)
+                        all_qas.extend(ai_qas)
+            except Exception as e:
+                print(f"AI generation failed for chunk {idx+1}: {e}")
+        # Fill remaining with rule-based generation (ALWAYS WORKS!)
+        remaining = qa_per_chunk - len(ai_qas)
+        if remaining > 0:
+            rule_qas = generate_qa_from_chunk(chunk, idx, remaining)
+            rule_based += len(rule_qas)
+            all_qas.extend(rule_qas)
+        time.sleep(0.5)  # Small delay
+    progress(0.95, desc="📦 Finalizing...")
     if not all_qas:
+        return None, "❌ Error: Could not generate Q&A pairs."
+    # Ensure we meet the requested number
+    if len(all_qas) < num_questions:
+        # Add more from existing text
+        for chunk_idx, chunk in enumerate(chunks):
+            if len(all_qas) >= num_questions:
+                break
+            extra_qas = generate_qa_from_chunk(chunk, chunk_idx, 5)
+            all_qas.extend(extra_qas)
+            rule_based += len(extra_qas)
+    # Trim to exact number if over
+    all_qas = all_qas[:num_questions]
+    # Build JSON output
     output_data = {
         "metadata": {
             "total_pairs": len(all_qas),
+            "requested_pairs": num_questions,
             "generated_at": time.strftime("%Y-%m-%d %H:%M:%S"),
             "source_chunks": len(chunks),
+            "ai_generated": ai_generated,
+            "rule_based": rule_based,
+            "model": model_name,
+            "method": "Hybrid (AI + Rule-based)"
         },
         "qa_pairs": all_qas
     }
     json_str = json.dumps(output_data, indent=2, ensure_ascii=False)
+    summary = f"""✅ SUCCESS! Dataset Generated!
+📊 Statistics:
+• Total Q&A Pairs: {len(all_qas)}
+• AI Generated: {ai_generated}
+• Rule-based: {rule_based}
+• Text Chunks: {len(chunks)}
+• Model: {model_name}
+💾 Ready to download!"""
+    progress(1.0, desc="✅ Done!")
     return json_str, summary
 # ---------------------------
+# File Reading
 # ---------------------------
 def safe_read_file(file_obj):
+    """Read uploaded text file"""
     if file_obj is None:
         return ""
     try:
         if hasattr(file_obj, "name"):
+            with open(file_obj.name, "r", encoding="utf-8", errors="ignore") as f:
                 return f.read()
     except Exception as e:
+        print(f"Error reading file: {e}")
     return ""
+# ---------------------------
+# GRADIO UI
+# ---------------------------
+with gr.Blocks(title="FREE Q&A Generator", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🤖 FREE Synthetic Q&A Generator
+    ## Generate 500+ Q&A Pairs - Works on FREE Hugging Face Spaces!
+    ✨ No tokens needed • No heavy models in Space • Uses lightweight serverless API
+    """)
     with gr.Row():
+        # LEFT: Input & Config
         with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Settings")
+            model_dropdown = gr.Dropdown(
+                choices=MODELS,
+                value=DEFAULT_MODEL,
+                label="🤖 Model",
+                info="Lightweight models that work on free tier"
             )
             num_questions = gr.Slider(
+                minimum=100,
+                maximum=2000,
+                value=500,
+                step=50,
+                label="📊 Q&A Pairs to Generate"
             )
             prompt_input = gr.Textbox(
+                label="✏️ Custom Instructions",
+                value=DEFAULT_PROMPT,
+                lines=3
             )
+            gr.Markdown("---")
+            gr.Markdown("### 📄 YOUR TEXT")
+            with gr.Tab("📎 Upload File"):
+                file_input = gr.File(
+                    file_types=[".txt"],
+                    label="Upload .txt file"
+                )
+                gen_file_btn = gr.Button(
+                    "🚀 GENERATE FROM FILE",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Tab("📝 Paste Text"):
+                text_input = gr.Textbox(
+                    lines=12,
+                    placeholder="Paste your text here...\n\nMinimum 500 words recommended for 500+ Q&A pairs.",
+                    label="Text Input"
+                )
+                gen_text_btn = gr.Button(
+                    "🚀 GENERATE FROM TEXT",
+                    variant="primary",
+                    size="lg"
+                )
+        # RIGHT: Output
         with gr.Column(scale=1):
+            gr.Markdown("### 📥 GENERATED DATASET")
+            status_box = gr.Textbox(
+                label="📊 Generation Status",
+                lines=10,
+                interactive=False
+            )
+            json_box = gr.Textbox(
+                label="📄 JSON Output",
+                lines=12,
+                interactive=False,
                 show_copy_button=True
             )
+            download_box = gr.File(
+                label="💾 DOWNLOAD JSON",
+                interactive=False
+            )
+    gr.Markdown("---")
+    gr.Markdown("""
+    ### ℹ️ How It Works:
+    1. **Paste or upload** your text (minimum 500 words for best results)
+    2. **Click generate** - Processing takes 2-5 minutes for 500 pairs
+    3. **Download JSON** - Get structured dataset with questions, answers, keywords, complexity
+    ### 🎯 What You Get:
+    - ✅ Question
+    - ✅ Detailed Answer
+    - ✅ Complexity (basic/intermediate/advanced)
+    - ✅ Keywords extracted from content
+    - ✅ Source chunk reference
+    ### 💡 Works 100% on FREE Tier:
+    - Uses serverless API (models run on HF servers, not in your Space)
+    - Hybrid approach ensures you always get results
+    - No authentication required
+    """)
+    # Event Handlers
+    def process_text(text, prompt, num_q, model):
+        if not text or len(text.strip()) < 100:
+            return "❌ Error: Text too short. Provide at least 100 words.", "", None
+        json_str, status = generate_dataset(text, prompt, num_q, model, progress=gr.Progress())
+        if json_str:
+            # Save to file
             tmp = tempfile.NamedTemporaryFile(
                 prefix="qa_dataset_",
                 suffix=".json",
             tmp.flush()
             tmp.close()
             return status, json_str, tmp.name
+        return status, "", None
+    def process_file(file_obj, prompt, num_q, model):
         if file_obj is None:
             return "❌ Error: Please upload a file.", "", None
         content = safe_read_file(file_obj)
+        if not content or len(content.strip()) < 100:
+            return "❌ Error: File is empty or too short.", "", None
+        return process_text(content, prompt, num_q, model)
+    gen_text_btn.click(
+        fn=process_text,
+        inputs=[text_input, prompt_input, num_questions, model_dropdown],
+        outputs=[status_box, json_box, download_box]
     )
+    gen_file_btn.click(
+        fn=process_file,
+        inputs=[file_input, prompt_input, num_questions, model_dropdown],
+        outputs=[status_box, json_box, download_box]
     )
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
 gradio
 huggingface_hub
-regex

 gradio
 huggingface_hub
+requests