Spaces:
Sleeping
Sleeping
__import__('pysqlite3') | |
import sys | |
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
import os | |
import gradio as gr | |
import chromadb | |
from sentence_transformers import SentenceTransformer | |
import pandas as pd | |
import numpy as np | |
from chromadb.utils import embedding_functions | |
from huggingface_hub import InferenceClient | |
dfs = pd.read_csv('TSLyrics.csv') | |
ids= [str(x) for x in dfs.index.tolist()] | |
docs = dfs['full_text'].tolist() | |
client = chromadb.Client() | |
def text_embedding(input): | |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
return model.encode(input) | |
collection = client.get_or_create_collection(name="TSlYRICS") | |
default_ef = text_embedding(docs).tolist() | |
collection.add(documents=docs,embeddings = default_ef, ids=ids) | |
def gen_context(query): | |
vector = text_embedding(query).tolist() | |
results = collection.query(query_embeddings=vector,n_results=25,include=["documents"]) | |
res = "\n".join(str(item) for item in results['documents'][0]) | |
return res | |
client = InferenceClient(model = "mistralai/Mixtral-8x7B-Instruct-v0.1") | |
def get_prompt(history, query): | |
prompt = "" | |
for i, (old_query, response) in enumerate(history): | |
prompt += f"Human: {old_query}\nAI: {response}\n" | |
prompt += f"Human: {query}\nAI:" | |
return prompt | |
def chat_completion(query): | |
length = 1000 | |
context = gen_context(query) | |
user_prompt = f"""Based on the context:{context}Answer the below query:{query}""" | |
system_prompt = """You are a helpful AI assistant that can answer questions on the Taylor Swift song lyrics. Answer based on the context provided.If you cannot find the correct answer, say I don't know. Be concise and just include the response""" | |
final_prompt = f"""<s>[INST]<<SYS>>{system_prompt}<</SYS>>{user_prompt}[/INST]""" | |
return client.text_generation(prompt=final_prompt,max_new_tokens = length).strip() | |
demo = gr.Interface(fn=chat_completion, | |
inputs=[gr.Textbox(label="Query", lines=2)], | |
outputs=[gr.Textbox(label="Result", lines=16)], | |
title="Chat on Taylor Swift Lyrics") | |
demo.queue().launch(share=True) |