Spaces:
Sleeping
Sleeping
emar
commited on
Commit
•
6c79873
1
Parent(s):
4b52a00
reduce to simplicity?
Browse files
app.py
CHANGED
@@ -1,67 +1,43 @@
|
|
1 |
-
import
|
2 |
import gradio as gr
|
3 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
4 |
-
from llama_index.core import
|
|
|
|
|
|
|
5 |
from llama_index.llms.huggingface import HuggingFaceLLM
|
6 |
import torch
|
7 |
-
from pydantic import BaseModel
|
8 |
-
|
9 |
PERSIST_DIR = './storage'
|
10 |
|
11 |
# Configure the settings
|
12 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
13 |
|
14 |
-
# Pydantic config to avoid protected namespace warning
|
15 |
-
class Config(BaseModel):
|
16 |
-
model_config = {'protected_namespaces': ()}
|
17 |
-
|
18 |
-
# @spaces.GPU(duration=240)
|
19 |
-
def setup():
|
20 |
-
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device=DEVICE)
|
21 |
-
Settings.llm = HuggingFaceLLM(
|
22 |
-
model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
23 |
-
tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
24 |
-
context_window=2048,
|
25 |
-
max_new_tokens=256,
|
26 |
-
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
|
27 |
-
device_map="auto",
|
28 |
-
)
|
29 |
|
30 |
-
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
query_engine = None
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
|
46 |
-
# Initialize query engine at the start
|
47 |
-
initialize_query_engine()
|
48 |
|
49 |
-
# Chatbot response function
|
50 |
@spaces.GPU
|
51 |
def chatbot_response(message, history):
|
52 |
-
if query_engine is None:
|
53 |
-
initialize_query_engine()
|
54 |
response = query_engine.query(message)
|
55 |
return str(response)
|
56 |
|
57 |
-
# Initialize Gradio interface
|
58 |
iface = gr.ChatInterface(
|
59 |
fn=chatbot_response,
|
60 |
title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
|
61 |
-
description=
|
62 |
-
"Low quality and extremely slow version of the ones you can find on the github page: "
|
63 |
-
"https://github.com/emarron/UESP-lore. I am not paying to have Llama3 on here."
|
64 |
-
),
|
65 |
examples=["Who is Zaraphus?"],
|
66 |
cache_examples=True,
|
67 |
)
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
4 |
+
from llama_index.core import (
|
5 |
+
StorageContext,
|
6 |
+
load_index_from_storage, Settings,
|
7 |
+
)
|
8 |
from llama_index.llms.huggingface import HuggingFaceLLM
|
9 |
import torch
|
|
|
|
|
10 |
PERSIST_DIR = './storage'
|
11 |
|
12 |
# Configure the settings
|
13 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
|
17 |
|
18 |
+
Settings.llm = HuggingFaceLLM(
|
19 |
+
model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
20 |
+
tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
21 |
+
context_window=2048,
|
22 |
+
max_new_tokens=256,
|
23 |
+
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
|
24 |
+
device_map="auto",
|
25 |
+
)
|
|
|
26 |
|
27 |
+
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
28 |
+
index = load_index_from_storage(storage_context)
|
29 |
+
query_engine = index.as_query_engine()
|
30 |
|
|
|
|
|
31 |
|
|
|
32 |
@spaces.GPU
|
33 |
def chatbot_response(message, history):
|
|
|
|
|
34 |
response = query_engine.query(message)
|
35 |
return str(response)
|
36 |
|
|
|
37 |
iface = gr.ChatInterface(
|
38 |
fn=chatbot_response,
|
39 |
title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
|
40 |
+
description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore I am not paying to have Llama3 on here.",
|
|
|
|
|
|
|
41 |
examples=["Who is Zaraphus?"],
|
42 |
cache_examples=True,
|
43 |
)
|