Spaces:

LVKinyanjui
/

QueryYourDocs

Sleeping

QueryYourDocs / inference_main.py

Unignored comment

0285186 about 2 months ago

883 Bytes

	import streamlit as st
	from huggingface_hub import InferenceClient
	from modules.inference.instruct import infer, load_model
	import os

	token = os.environ["HF_TOKEN"]
	client = InferenceClient(model="meta-llama/Llama-3.2-1B-Instruct", token=token)

	st.write("## Ask your Local LLM")
	text_input = st.text_input("Query", value="Why is the sky Blue")
	submit = st.button("Submit")

	# @st.cache_resource
	# def load_model_cached():
	# return load_model()

	# model = load_model_cached()

	if submit:
	# response = infer(model, text_input)
	# response
	output = client.chat.completions.create(
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": text_input},
	],
	stream=True,
	max_tokens=1024,
	)

	for chunk in output:
	st.write(chunk.choices[0].delta.content)