Spaces:
Sleeping
Sleeping
from gradio_client import Client | |
system_prompt = """You are a helpful assistant, you will use the provided context only to answer user questions. | |
Read the given context before answering questions and think step by step. you could get context or question in other language than english. | |
Answer only if the question related to the contexts, don't use your own data ..if the question isn't related to the context ,respond with "sorry..no provided context for this question". | |
If you can not answer a user question based on provided context only , inform the user. | |
Do not use any other information for answering user. Provide a detailed answer to the question.""" | |
B_INST, E_INST = "[INST]", "[/INST]" | |
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n" | |
SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS | |
instruction = """ | |
Context: {context} | |
User: {question}""" | |
prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST | |
def connect_to_llama(query,context): | |
client = Client("https://huggingface-projects-llama-2-13b-chat.hf.space/--replicas/5c42d8wx6/") | |
result = client.predict( | |
""" | |
question:"{}" | |
context:"{}" | |
answer: | |
""".format(query, context), # str in 'parameter_7' Textbox component | |
prompt_template , # str in 'Optional system prompt' Textbox component | |
4096, # int | float (numeric value between 0 and 4096) in 'Max new tokens' Slider componentو | |
0.1, | |
0.05, | |
1, # int | float (numeric value between 0.0 and 1) in 'Top-p (nucleus sampling)' Slider component | |
1, # int | float (numeric value between 1.0 and 2.0) in 'Repetition penalty' Slider component | |
api_name="/chat" | |
) | |
# print(time.time() - old, "sec") | |
return result |