Mykes commited on
Commit
43906f9
1 Parent(s): 9334c42

Upload app_stream_working.py

Browse files
Files changed (1) hide show
  1. app_stream_working.py +55 -0
app_stream_working.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llama_cpp import Llama
3
+
4
+ # llm = Llama.from_pretrained(
5
+ # repo_id="Mykes/med_gemma7b_gguf",
6
+ # filename="*Q4_K_M.gguf",
7
+ # verbose=False,
8
+ # n_ctx=512,
9
+ # n_batch=512,
10
+ # n_threads=4
11
+ # )
12
+ @st.cache_resource
13
+ def load_model():
14
+ return Llama.from_pretrained(
15
+ # repo_id="Mykes/med_gemma7b_gguf",
16
+ # filename="*Q4_K_M.gguf",
17
+ repo_id="Mykes/med_phi3-mini-4k-GGUF",
18
+ filename="*Q4_K_M.gguf",
19
+ verbose=False,
20
+ n_ctx=256,
21
+ n_batch=256,
22
+ n_threads=4
23
+ )
24
+
25
+ llm = load_model()
26
+
27
+ # basic_prompt = "Below is the context which is your conversation history and the last user question. Write a response according the context and question. ### Context: user: Ответь мне на вопрос о моем здоровье. assistant: Конечно! Какой у Вас вопрос? ### Question: {question} ### Response:"
28
+ basic_prompt = "Q: {question}\nA:"
29
+ input_text = st.text_input('text')
30
+ model_input = basic_prompt.format(question=input_text)
31
+
32
+ if input_text:
33
+ # Create an empty placeholder for the output
34
+ output_placeholder = st.empty()
35
+
36
+ # Initialize an empty string to store the generated text
37
+ generated_text = ""
38
+
39
+ # Stream the output
40
+ for token in llm(
41
+ model_input,
42
+ # max_tokens=32,
43
+ max_tokens=None,
44
+ stop=["<end_of_turn>"],
45
+ echo=True,
46
+ stream=True # Enable streaming
47
+ ):
48
+ # Append the new token to the generated text
49
+ generated_text += token['choices'][0]['text']
50
+
51
+ # Update the placeholder with the current generated text
52
+ output_placeholder.write(generated_text)
53
+
54
+ # After the generation is complete, you can do any final processing if needed
55
+ st.write("Generation complete!")