Spaces:
Sleeping
Sleeping
using gemini instead of llama to improve speed
Browse files- .gradio/flagged/dataset1.csv +4 -0
- Gemini.py +1 -1
- __pycache__/model.cpython-312.pyc +0 -0
- app.py +20 -5
- model.py +1 -1
- 新建 Text Document.txt +0 -0
.gradio/flagged/dataset1.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
question,output,timestamp
|
| 2 |
+
what should I have for dinner today,"Provide at most 50 answers for this question, each answer should be concluded into a word or short phrase.Avoid providing similar or repetitive answers, and try to make them as diverse as possible.The reply should be simply listing out all possible options, without any number or any other words, including but not limited to introduction, paraphrasing, and conclusion...Limit the length of your answer to at most 50 words in total and delete anything beyond this limit.
|
| 3 |
+
|
| 4 |
+
what should I have for dinner today? I'm not a big fan of eating steak, but I'd rather have a quick dinner with a little less cheese.",2025-01-18 19:33:49.159655
|
Gemini.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
# prompt_template = (
|
| 4 |
# "Provide at most 50 answers for this question, each answer should be concluded into a word or short phrase."
|
|
|
|
| 1 |
+
import google.generativeai as genai
|
| 2 |
|
| 3 |
# prompt_template = (
|
| 4 |
# "Provide at most 50 answers for this question, each answer should be concluded into a word or short phrase."
|
__pycache__/model.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/model.cpython-312.pyc and b/__pycache__/model.cpython-312.pyc differ
|
|
|
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import torch
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
import gradio as gr
|
| 4 |
import os
|
|
|
|
| 5 |
|
| 6 |
from model import LlamaModel, GPT2Model, GPTNeoXModel, DistilGPT2Model, LLaMA2Model
|
| 7 |
|
|
@@ -20,18 +21,32 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 20 |
|
| 21 |
print("Loading model...")
|
| 22 |
#generator = LlamaModel() #can work, but super slow ~2min
|
| 23 |
-
generator = GPT2Model() #can work, but not showing correct output
|
| 24 |
#generator = GPTNeoXModel()
|
| 25 |
#generator = DistilGPT2Model()
|
| 26 |
#generator = LLaMA2Model()
|
| 27 |
|
|
|
|
|
|
|
|
|
|
| 28 |
# Function to handle input and generate output
|
| 29 |
def gradio_interface(question):
|
| 30 |
-
|
| 31 |
full_prompt = f"{prompt_template}\n\n{question}"
|
| 32 |
-
|
| 33 |
-
answer = generator.generate(full_prompt)
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
# Create a Gradio interface
|
| 37 |
interface = gr.Interface(
|
|
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
import gradio as gr
|
| 4 |
import os
|
| 5 |
+
import google.generativeai as genai
|
| 6 |
|
| 7 |
from model import LlamaModel, GPT2Model, GPTNeoXModel, DistilGPT2Model, LLaMA2Model
|
| 8 |
|
|
|
|
| 21 |
|
| 22 |
print("Loading model...")
|
| 23 |
#generator = LlamaModel() #can work, but super slow ~2min
|
| 24 |
+
#generator = GPT2Model() #can work, but not showing correct output
|
| 25 |
#generator = GPTNeoXModel()
|
| 26 |
#generator = DistilGPT2Model()
|
| 27 |
#generator = LLaMA2Model()
|
| 28 |
|
| 29 |
+
genai.configure(api_key="AIzaSyAJF6isCNu6XfGA5TBFddXu9BTfAKaPF30")
|
| 30 |
+
model = genai.GenerativeModel("gemini-1.5-flash")
|
| 31 |
+
|
| 32 |
# Function to handle input and generate output
|
| 33 |
def gradio_interface(question):
|
| 34 |
+
|
| 35 |
full_prompt = f"{prompt_template}\n\n{question}"
|
| 36 |
+
|
| 37 |
+
#answer = generator.generate(full_prompt)
|
| 38 |
+
answer = model.generate_content(full_prompt)
|
| 39 |
+
|
| 40 |
+
# Extract the text content
|
| 41 |
+
try:
|
| 42 |
+
# Access the first candidate's content using attributes
|
| 43 |
+
content = answer.candidates[0].content.parts[0].text
|
| 44 |
+
# Remove newline characters
|
| 45 |
+
cleaned_content = content.replace("\n", ", ").strip()
|
| 46 |
+
except (AttributeError, IndexError) as e:
|
| 47 |
+
cleaned_content = "An error occurred while processing the response."
|
| 48 |
+
|
| 49 |
+
return cleaned_content
|
| 50 |
|
| 51 |
# Create a Gradio interface
|
| 52 |
interface = gr.Interface(
|
model.py
CHANGED
|
@@ -105,7 +105,7 @@ class DistilGPT2Model:
|
|
| 105 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 106 |
self.model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 107 |
|
| 108 |
-
def generate(self, input_text, max_length=
|
| 109 |
"""
|
| 110 |
Generate a response using the DistilGPT-2 model.
|
| 111 |
"""
|
|
|
|
| 105 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 106 |
self.model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 107 |
|
| 108 |
+
def generate(self, input_text, max_length=200, temperature=0.7, top_p=0.9, top_k=50):
|
| 109 |
"""
|
| 110 |
Generate a response using the DistilGPT-2 model.
|
| 111 |
"""
|
新建 Text Document.txt
ADDED
|
File without changes
|