import os from langchain.embeddings import HuggingFaceEmbeddings from huggingface_hub import InferenceClient import langchain from langchain import HuggingFaceHub from langchain.cache import InMemoryCache import gradio as gr HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"] embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) model="bigscience/bloom" llm = HuggingFaceHub(repo_id=model , model_kwargs={"max_length":512,"do_sample":True, "temperature":0.2}) langchain.llm_cache = InMemoryCache() def predict(input_file): return "ok" input_file = gr.File(label="Upload PDF file") output_text = gr.Textbox(label="test") demo = gr.Interface(fn=predict, inputs=[input_file], outputs=output_text) demo.launch()