webpluging

Paused

App Files Files Community

ranamhamoud commited on Apr 18

Commit

f317c15

•

1 Parent(s): bcb8e3f

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -131

app.py CHANGED Viewed

@@ -1,36 +1,16 @@
 import os
-import re
 import torch
-from threading import Thread
-from typing import Iterator
-from mongoengine import connect, Document, StringField, SequenceField
-import gradio as gr
-import spaces
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
 from peft import PeftModel
 # Constants
-MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 930
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
-# # Description and License Texts
-# DESCRIPTION = """
-# # ✨Storytell AI🧑🏽‍💻
-# Welcome to the **Storytell AI** space, crafted with care by Ranam & George. Dive into the world of educational storytelling with our model. This iteration of the Llama 2 model with 7 billion parameters is fine-tuned to generate educational stories that engage and educate. Enjoy a journey of discovery and creativity—your storytelling lesson begins here! You can prompt this model to explain any computer science concept. **Please check the examples below**.
-# """
-LICENSE = """
----
-As a derivative work of [Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) by Meta,
-this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
-"""
-# GPU Check and add CPU warning
-if not torch.cuda.is_available():
-    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
-# Model and Tokenizer Configuration
-model_id = "meta-llama/Llama-2-7b-chat-hf"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_use_double_quant=False,
@@ -38,117 +18,51 @@ bnb_config = BitsAndBytesConfig(
     bnb_4bit_compute_dtype=torch.bfloat16
 )
 base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=bnb_config)
-model = PeftModel.from_pretrained(base_model, "ranamhamoud/storytell")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 tokenizer.pad_token = tokenizer.eos_token
-# MongoDB Connection
-PASSWORD = os.environ.get("MONGO_PASS")
-connect(host=f"mongodb+srv://ranamhammoud11:{PASSWORD}@stories.zf5v52a.mongodb.net/")
-# MongoDB Document
-class Story(Document):
-    message = StringField()
-    content = StringField()
-    story_id = SequenceField(primary_key=True)
-# Utility function for prompts
-def make_prompt(entry):
-    return  f"### Human: Don't repeat the assesments, limit to 500 words {entry} ### Assistant:"
-    # f"TELL A STORY, RELATE TO COMPUTER SCIENCE, INCLUDE ASSESMENTS. MAKE IT REALISTIC AND AROUND 800 WORDS, END THE STORY WITH "THE END.": {entry}"
-def process_text(text):
-    text = re.sub(r'\[.*?\]', '', text, flags=re.DOTALL)
-    return text
-custom_css = """
-body, input, button, textarea, label {
-    font-family: Arial, sans-serif;
-    font-size: 24px;
-}
-.gr-chat-interface .gr-chat-message-container {
-    font-size: 14px;
-}
-.gr-button {
-    font-size: 14px;
-    padding: 12px 24px;
-}
-.gr-input {
-    font-size: 14px;
-}
-"""
-# Gradio Function
-@spaces.GPU
-def generate(
-    message: str,
-    chat_history: list[tuple[str, str]],
-    max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
-    temperature: float = 0.6,
-    top_p: float = 0.7,
-    top_k: int = 20,
-    repetition_penalty: float = 1.0,
-) -> Iterator[str]:
-    conversation = []
-    for user, assistant in chat_history:
-        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
-    conversation.append({"role": "user", "content": make_prompt(message)})
-    enc = tokenizer(make_prompt(message), return_tensors="pt", padding=True, truncation=True)
-    input_ids = enc.input_ids.to(model.device)
-    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
-        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
-    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=False)
-    generate_kwargs = dict(
-        {"input_ids": input_ids},
-        streamer=streamer,
-        max_new_tokens=max_new_tokens,
-        do_sample=True,
-        top_p=top_p,
-        top_k=top_k,
-        temperature=temperature,
-        num_beams=1,
-        repetition_penalty=repetition_penalty,
-    )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    outputs = []
-    for text in streamer:
-        processed_text = process_text(text)
-        outputs.append(processed_text)
-        output = "".join(outputs)
         yield output
-    final_story = "".join(outputs)
-    try:
-        saved_story = Story(message=message, content=final_story).save()
-        yield f"{final_story}\n\n Story saved with ID: {saved_story.story_id}"
-    except Exception as e:
-        yield f"Failed to save story: {str(e)}"
-# Gradio Interface Setup
-chat_interface = gr.ChatInterface(
-    fn=generate,
-    fill_height=True,
-    stop_btn=None,
-    examples=[
-        ["Can you explain briefly to me what is the Python programming language?"],
-        ["Could you please provide an explanation about the concept of recursion?"],
-        ["Could you explain what a URL is?"]
-    ],
-    theme='shivi/calm_seafoam'
-)
-# Gradio Web Interface
-with gr.Blocks(css=custom_css,theme='shivi/calm_seafoam',fill_height=True) as demo:
-    # gr.Markdown(DESCRIPTION)
-    chat_interface.render()
-    gr.Markdown(LICENSE)
 # Main Execution
 if __name__ == "__main__":
-    demo.queue(max_size=20)
-    demo.launch(share=True)

 import os
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from peft import PeftModel
+import gradio as gr
+from typing import Iterator, List, Tuple
 # Constants
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+DEFAULT_MAX_NEW_TOKENS = 930
+# Model Configuration for Generating Mode
+model_id = "meta-llama/Llama-2-7b-hf"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_use_double_quant=False,
     bnb_4bit_compute_dtype=torch.bfloat16
 )
 base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=bnb_config)
+model_generate = PeftModel.from_pretrained(base_model, "ranamhamoud/storytell")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 tokenizer.pad_token = tokenizer.eos_token
+# Editing mode uses the same tokenizer but might use a simpler or different model setup
+model_edit = model_generate  # For simplicity, using the same model setup for editing in this example
+# Helper Functions
+def generate_text(input_text: str, chat_history: List[Tuple[str, str]], max_tokens: int = DEFAULT_MAX_NEW_TOKENS) -> Iterator[str]:
+    # Append the new message to the chat history for context
+    chat_history.append(("user", input_text))
+    # Prepare the input with the conversation context
+    context = "\n".join([f"{speaker}: {text}" for speaker, text in chat_history])
+    input_ids = tokenizer(context, return_tensors="pt").input_ids.to(model_generate.device)
+    outputs = model_generate.generate(input_ids, max_length=input_ids.shape[1] + max_tokens, do_sample=True)
+    for output in tokenizer.decode(outputs[0], skip_special_tokens=True).split():
         yield output
+    chat_history.append(("assistant", tokenizer.decode(outputs[0], skip_special_tokens=True)))
+def edit_text(input_text: str, chat_history: List[Tuple[str, str]]) -> Iterator[str]:
+    context = "\n".join([f"{speaker}: {text}" for speaker, text in chat_history])
+    input_ids = tokenizer(context, return_tensors="pt").input_ids.to(model_edit.device)
+    outputs = model_edit.generate(input_ids, max_length=input_ids.shape[1] + DEFAULT_MAX_NEW_TOKENS, do_sample=True)
+    for output in tokenizer.decode(outputs[0], skip_special_tokens=True).split():
+        yield output
+# Gradio Interface
+def switch_mode(is_editing: bool, input_text: str, chat_history: List[Tuple[str, str]]) -> Iterator[str]:
+    if is_editing and chat_history:
+        return edit_text(input_text, chat_history)
+    elif not is_editing:
+        return generate_text(input_text, chat_history)
+    else:
+        yield "Chat history is empty, cannot edit."
+with gr.Blocks() as demo:
+    with gr.Row():
+        input_text = gr.Textbox(label="Input Text")
+        is_editing = gr.Checkbox(label="Editing Mode", value=False)
+        output_text = gr.Textbox(label="Output", interactive=True)
+        chat_history = gr.State([])  # Using State to maintain chat history
+    generate_button = gr.Button("Generate/Edit")
+    generate_button.click(switch_mode, inputs=[is_editing, input_text, chat_history], outputs=output_text)
 # Main Execution
 if __name__ == "__main__":
+    demo.launch()