Spaces:
Sleeping
Sleeping
import chromadb | |
from sentence_transformers import SentenceTransformer | |
from llama_cpp import Llama | |
import gradio as gr | |
# ✅ Initialize ChromaDB | |
chroma_client = chromadb.PersistentClient(path="./chromadb_store") | |
collection = chroma_client.get_or_create_collection(name="curly_strings_knowledge") | |
# ✅ Load Local Embedding Model | |
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
# ✅ Curly Strings Knowledge (Stored in ChromaDB as Vectors) | |
knowledge_base = [ | |
{"id": "song_list", "text": """ | |
Here are some songs by Curly Strings: | |
1. **Kalakesed** | |
2. **Kus mu süda on ...** | |
3. **Vitsalaul** | |
4. **Viimases jaamas** | |
5. **Salaja** | |
6. **Üle ilma** | |
7. **Šveits** | |
8. **Kallimale** | |
9. **Üksteist peab hoidma** | |
10. **Suuda öelda ei** | |
11. **Annan käe** | |
12. **Tulbid ja Bonsai** | |
13. **Tüdruk Pika Kleidiga** | |
14. **Armasta mind (feat. Vaiko Eplik)** | |
15. **Minu, Pets, Margus ja Priit** | |
16. **Kauges külas** | |
17. **Tule ja jää** | |
18. **Kuutõbine** | |
19. **Omaenese ilus ja veas** | |
20. **Pulmad** | |
21. **Pillimeeste laul** | |
22. **Tehke ruumi!** | |
"""}, | |
{"id": "related_artists", "text": """ | |
If you enjoy Curly Strings, you might also like: | |
- **Trad.Attack!** | |
- **Eesti Raadio laululapsed** | |
- **Körsikud** | |
- **Karl-Erik Taukar** | |
- **Dag** | |
- **Sadamasild** | |
- **Kruuv** | |
- **Smilers** | |
- **Mari Jürjens** | |
- **Terminaator** | |
"""}, | |
{"id": "background", "text": """ | |
Curly Strings started in Estonia and became famous for their unique blend of folk and contemporary music. | |
They often perform at international festivals and are known for their emotional and poetic lyrics. | |
"""} | |
] | |
# ✅ Store Knowledge in ChromaDB (If Not Already Stored) | |
existing_data = collection.get() | |
if not existing_data["ids"]: | |
for item in knowledge_base: | |
embedding = embedder.encode(item["text"]).tolist() | |
collection.add(documents=[item["text"]], embeddings=[embedding], ids=[item["id"]]) | |
# ✅ Load Llama Model | |
llm = Llama.from_pretrained( | |
repo_id="krishna195/second_guff", | |
filename="unsloth.Q4_K_M.gguf", | |
) | |
# ✅ Function to Retrieve Relevant Knowledge (Fixed Nested List Issue) | |
def retrieve_context(query): | |
query_embedding = embedder.encode(query).tolist() | |
results = collection.query(query_embeddings=[query_embedding], n_results=2) | |
# Flatten nested lists and ensure only strings are joined | |
retrieved_texts = [doc for sublist in results.get("documents", []) for doc in sublist if isinstance(doc, str)] | |
return "\n".join(retrieved_texts) if retrieved_texts else "No relevant data found." | |
# ✅ Chatbot Function with ChromaDB-RAG | |
def chatbot_response(user_input): | |
context = retrieve_context(user_input) # Retrieve relevant info from ChromaDB | |
messages = [ | |
{"role": "system", "content": "Use the knowledge retrieved to answer the user’s question."}, | |
{"role": "user", "content": user_input}, | |
{"role": "assistant", "content": f"Retrieved Context:\n{context}"}, | |
] | |
response = llm.create_chat_completion( | |
messages=messages, | |
temperature=0.5, | |
max_tokens=500, | |
top_p=0.9, | |
frequency_penalty=0.8, | |
) | |
return response["choices"][0]["message"]["content"].strip() | |
# ✅ Gradio Interface | |
def chat_interface(user_input): | |
return chatbot_response(user_input) | |
demo = gr.Interface( | |
fn=chat_interface, | |
inputs=gr.Textbox(placeholder="Ask me about Curly Strings..."), | |
outputs="text", | |
title="Curly Strings Chatbot", | |
description="Chat with the bot about Curly Strings, their songs, and related artists." | |
) | |
demo.launch() |