import chromadb from sentence_transformers import SentenceTransformer from llama_cpp import Llama import gradio as gr # ✅ Initialize ChromaDB chroma_client = chromadb.PersistentClient(path="./chromadb_store") collection = chroma_client.get_or_create_collection(name="curly_strings_knowledge") # ✅ Load Local Embedding Model embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # ✅ Curly Strings Knowledge (Stored in ChromaDB as Vectors) knowledge_base = [ {"id": "song_list", "text": """ Here are some songs by Curly Strings: 1. **Kalakesed** 2. **Kus mu süda on ...** 3. **Vitsalaul** 4. **Viimases jaamas** 5. **Salaja** 6. **Üle ilma** 7. **Šveits** 8. **Kallimale** 9. **Üksteist peab hoidma** 10. **Suuda öelda ei** 11. **Annan käe** 12. **Tulbid ja Bonsai** 13. **Tüdruk Pika Kleidiga** 14. **Armasta mind (feat. Vaiko Eplik)** 15. **Minu, Pets, Margus ja Priit** 16. **Kauges külas** 17. **Tule ja jää** 18. **Kuutõbine** 19. **Omaenese ilus ja veas** 20. **Pulmad** 21. **Pillimeeste laul** 22. **Tehke ruumi!** """}, {"id": "related_artists", "text": """ If you enjoy Curly Strings, you might also like: - **Trad.Attack!** - **Eesti Raadio laululapsed** - **Körsikud** - **Karl-Erik Taukar** - **Dag** - **Sadamasild** - **Kruuv** - **Smilers** - **Mari Jürjens** - **Terminaator** """}, {"id": "background", "text": """ Curly Strings started in Estonia and became famous for their unique blend of folk and contemporary music. They often perform at international festivals and are known for their emotional and poetic lyrics. """} ] # ✅ Store Knowledge in ChromaDB (If Not Already Stored) existing_data = collection.get() if not existing_data["ids"]: for item in knowledge_base: embedding = embedder.encode(item["text"]).tolist() collection.add(documents=[item["text"]], embeddings=[embedding], ids=[item["id"]]) # ✅ Load Llama Model llm = Llama.from_pretrained( repo_id="krishna195/second_guff", filename="unsloth.Q4_K_M.gguf", ) # ✅ Function to Retrieve Relevant Knowledge (Fixed Nested List Issue) def retrieve_context(query): query_embedding = embedder.encode(query).tolist() results = collection.query(query_embeddings=[query_embedding], n_results=2) # Flatten nested lists and ensure only strings are joined retrieved_texts = [doc for sublist in results.get("documents", []) for doc in sublist if isinstance(doc, str)] return "\n".join(retrieved_texts) if retrieved_texts else "No relevant data found." # ✅ Chatbot Function with ChromaDB-RAG def chatbot_response(user_input): context = retrieve_context(user_input) # Retrieve relevant info from ChromaDB messages = [ {"role": "system", "content": "Use the knowledge retrieved to answer the user’s question."}, {"role": "user", "content": user_input}, {"role": "assistant", "content": f"Retrieved Context:\n{context}"}, ] response = llm.create_chat_completion( messages=messages, temperature=0.5, max_tokens=500, top_p=0.9, frequency_penalty=0.8, ) return response["choices"][0]["message"]["content"].strip() # ✅ Gradio Interface def chat_interface(user_input): return chatbot_response(user_input) demo = gr.Interface( fn=chat_interface, inputs=gr.Textbox(placeholder="Ask me about Curly Strings..."), outputs="text", title="Curly Strings Chatbot", description="Chat with the bot about Curly Strings, their songs, and related artists." ) demo.launch()