import gradio as gr from huggingface_hub import InferenceClient import os from pinecone import Pinecone, ServerlessSpec from sentence_transformers import SentenceTransformer """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') pinecone_client = Pinecone(api_key = os.getenv('PINECONE_API_KEY')) index = pinecone_client.Index("movies") def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) # encode user query encoded_query = embedding_model.encode(message) # retrieve most relevant movie from vector db matches = index.query( vector= encoded_query.tolist(), top_k=1, include_metadata = True ) # movie which is most similar retrieved_data = matches['matches'][0]['metadata']['title'] # Add as context to LLM messages.append({"role":"user", "content": retrieved_data}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a movie recommender named Exodia. You are extremely reliable. You always mention your name in the beginning of conversation. You will provide me with answers from the given info. Give not more than 5 choices and make sure that answers are complete sentences.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) if __name__ == "__main__": demo.launch()