import gradio as gr from transformers import pipeline from transformers.utils import logging from llama_index.embeddings.huggingface import HuggingFaceEmbedding import torch from llama_index.core import VectorStoreIndex from llama_index.core import Document from llama_index.core import Settings from llama_index.llms.huggingface import ( HuggingFaceInferenceAPI, HuggingFaceLLM, ) # logging.set_verbosity_error() # llm = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") # llm = pipeline("question-answering", model="sshleifer/distilbart-cnn-12-6") # pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto") #Settings.llm = pipeline("text2text-generation", model="facebook/blenderbot-400M-distill") Settings.llm = HuggingFaceLLM(model_name="facebook/blenderbot-400M-distill", device_map="cpu", context_window=128, tokenizer_name="facebook/blenderbot-400M-distill" ) Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") documents = [Document(text="Indian parliament elections happened in April-May 2024. BJP Party won.")] index = VectorStoreIndex.from_documents( documents, ) query_engine = index.as_query_engine() # def get_embeddings(mytext): # embeddings = embed_model.get_text_embedding("Hello World!") # subarray = embeddings[:5] # out = [str(i) for i in subarray] # return '::'.join(out) # def summarize(mytext): # out = llm(mytext, max_length=130, min_length=30, do_sample=False) # out = llm(mytext) # return str(out[0]) # def chat_completion(question): # messages = [ # { # "role": "system", # "content": "You are a friendly chatbot.", # }, # {"role": "user", "content": question}, # ] # prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # outputs = pipe(prompt, max_new_tokens=1024, do_sample=False, temperature=0.3, top_k=75, top_p=0.97) # return outputs[0]["generated_text"] # def generate_response(input_text): # result = pipe(input_text) # return result[0]['generated_text'] def rag(input_text, file): return query_engine.query( input_text ) iface = gr.Interface(fn=rag, inputs=[gr.Textbox(label="Question", lines=6), gr.File()], outputs=[gr.Textbox(label="Result", lines=6)], title="Answer my question", description= "CoolChatBot" ) iface.launch()