Spaces:
Sleeping
Sleeping
πwπ
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
-
from datasets import load_dataset
|
3 |
|
4 |
-
# import faiss
|
5 |
import os
|
6 |
import spaces
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
@@ -31,6 +30,7 @@ data = load_dataset("not-lain/wikipedia-small-3000-embedded", subset="train")
|
|
31 |
# index dataset
|
32 |
data.add_faiss_index("embedding", device=1)
|
33 |
|
|
|
34 |
@spaces.GPU
|
35 |
def search(query: str, k: int = TOP_K):
|
36 |
embedded_query = model.encode(query)
|
@@ -68,7 +68,9 @@ def talk(message, history):
|
|
68 |
cleaned_past = item[1].split("\nRESOURCES:\n")[0]
|
69 |
chat.append({"role": "assistant", "content": cleaned_past})
|
70 |
chat.append({"role": "user", "content": message})
|
71 |
-
messages = tokenizer.apply_chat_template(
|
|
|
|
|
72 |
# Tokenize the messages string
|
73 |
model_inputs = tokenizer([messages], return_tensors="pt").to(device)
|
74 |
streamer = TextIteratorStreamer(
|
@@ -100,12 +102,14 @@ TITLE = "RAG"
|
|
100 |
|
101 |
DESCRIPTION = """
|
102 |
## Resources used to build this project
|
103 |
-
* https://huggingface.co/mixedbread-ai/mxbai-colbert-large-v1
|
104 |
-
*
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
|
|
|
|
109 |
"""
|
110 |
|
111 |
demo = gr.ChatInterface(
|
|
|
1 |
import gradio as gr
|
2 |
+
from datasets import load_dataset
|
3 |
|
|
|
4 |
import os
|
5 |
import spaces
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
|
|
30 |
# index dataset
|
31 |
data.add_faiss_index("embedding", device=1)
|
32 |
|
33 |
+
|
34 |
@spaces.GPU
|
35 |
def search(query: str, k: int = TOP_K):
|
36 |
embedded_query = model.encode(query)
|
|
|
68 |
cleaned_past = item[1].split("\nRESOURCES:\n")[0]
|
69 |
chat.append({"role": "assistant", "content": cleaned_past})
|
70 |
chat.append({"role": "user", "content": message})
|
71 |
+
messages = tokenizer.apply_chat_template(
|
72 |
+
chat, tokenize=False, add_generation_prompt=True
|
73 |
+
)
|
74 |
# Tokenize the messages string
|
75 |
model_inputs = tokenizer([messages], return_tensors="pt").to(device)
|
76 |
streamer = TextIteratorStreamer(
|
|
|
102 |
|
103 |
DESCRIPTION = """
|
104 |
## Resources used to build this project
|
105 |
+
* embedding model : https://huggingface.co/mixedbread-ai/mxbai-colbert-large-v1
|
106 |
+
* dataset : https://huggingface.co/datasets/not-lain/wikipedia-small-3000-embedded (used mxbai-colbert-large-v1 to create the embedding column )
|
107 |
+
* faiss docs : https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/main_classes#datasets.Dataset.add_faiss_index
|
108 |
+
* chatbot : google/gemma-7b-it
|
109 |
+
|
110 |
+
If you want to support my work please click on the heart react button β€οΈπ€
|
111 |
+
|
112 |
+
<sub><sup><sub><sup>psst, I am still open for work if please reach me out at https://not-lain.github.io/</sup></sub></sup></sub>
|
113 |
"""
|
114 |
|
115 |
demo = gr.ChatInterface(
|