mixture-of-experts-dr-llama

Sleeping

File size: 2,191 Bytes

f44db6c
 
 
690bb75
f44db6c
63b0d2d
f44db6c
 
 
f46ac02
 
f44db6c
 
1530ac8
47c116b
db967aa
1530ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6318fa
1530ac8
 
 
 
 
 
 
a6318fa
f44db6c
f46ac02
9264b57
f46ac02
 
 
 
 
 
a6318fa
1530ac8
 
f46ac02
1530ac8
 
f46ac02

import requests
import streamlit as st
import os
from huggingface_hub import InferenceClient

API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud'
API_KEY = os.getenv('API_KEY')

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

# Prompt Set of Examples:
prompt = f"Write instructions to teach anyone to write a discharge plan. List the entities, features and relationships to CCDA and FHIR objects in boldface."

def StreamLLMChatResponse(prompt):
    endpoint_url = API_URL
    hf_token = API_KEY
    client = InferenceClient(endpoint_url, token=hf_token)
    gen_kwargs = dict(
        max_new_tokens=512,
        top_k=30,
        top_p=0.9,
        temperature=0.2,
        repetition_penalty=1.02,
        stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
    )
    stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
    report=[]
    res_box = st.empty()
    collected_chunks=[]
    collected_messages=[]
    for r in stream:
        if r.token.special:
            continue
        if r.token.text in gen_kwargs["stop_sequences"]:
            break
        collected_chunks.append(r.token.text)
        chunk_message = r.token.text
        collected_messages.append(chunk_message)

        try:
            report.append(r.token.text)
            if len(r.token.text) > 0:
                result="".join(report).strip()
                res_box.markdown(f'*{result}*')
        except:
            st.write(' ')

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    st.markdown(response.json())
    return response.json()

def get_output(prompt):
    return query({"inputs": prompt})

def main():
    st.title("Medical Llama Test Bench with Inference Endpoints Llama 7B")
    prompt = f"Write instructions to teach anyone to write a discharge plan. List the entities, features and relationships to CCDA and FHIR objects in boldface."
    example_input = st.text_input("Enter your example text:", value=prompt)

    if st.button("Run Prompt With Dr Llama"):
        StreamLLMChatResponse(example_input)

if __name__ == "__main__":
    main()