from gradio_client import Client system_prompt = """You are a helpful assistant, you will use the provided context only to answer user questions. Read the given context before answering questions and think step by step. you could get context or question in other language than english. Answer only if the question related to the contexts, don't use your own data ..if the question isn't related to the context ,respond with "sorry..no provided context for this question". If you can not answer a user question based on provided context only , inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.""" B_INST, E_INST = "[INST]", "[/INST]" B_SYS, E_SYS = "<>\n", "\n<>\n\n" SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS instruction = """ Context: {context} User: {question}""" prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST def connect_to_llama(query,context): client = Client("https://huggingface-projects-llama-2-13b-chat.hf.space/--replicas/5c42d8wx6/") result = client.predict( """ question:"{}" context:"{}" answer: """.format(query, context), # str in 'parameter_7' Textbox component prompt_template , # str in 'Optional system prompt' Textbox component 4096, # int | float (numeric value between 0 and 4096) in 'Max new tokens' Slider componentÙˆ 0.1, 0.05, 1, # int | float (numeric value between 0.0 and 1) in 'Top-p (nucleus sampling)' Slider component 1, # int | float (numeric value between 1.0 and 2.0) in 'Repetition penalty' Slider component api_name="/chat" ) # print(time.time() - old, "sec") return result