File size: 2,028 Bytes
826e275
 
7a75a15
 
826e275
 
 
d73a8e9
826e275
6e4f775
7a75a15
7b34e37
826e275
e0bb50d
 
b598d9f
 
826e275
6e4f775
 
 
 
826e275
 
 
0b321da
826e275
 
 
 
 
 
 
b1dd47e
826e275
0e07a66
 
 
d73a8e9
826e275
 
 
 
 
b1dd47e
826e275
 
 
b1dd47e
0499581
 
 
 
 
d73a8e9
 
 
b1dd47e
d73a8e9
9c8dd72
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
The Streamlit app for the project demo.
In the demo, the user can write a prompt
 and the model will generate a response using the grouped sampling algorithm.
"""

import streamlit as st
from torch.cuda import CudaError

from available_models import AVAILABLE_MODELS
from hanlde_form_submit import on_form_submit
from on_server_start import main as on_server_start_main

on_server_start_main()

st.title("Grouped Sampling Demo")

with st.form("request_form"):
    selected_model_name: str = st.selectbox(
        label="Select a model",
        options=AVAILABLE_MODELS,
        help="opt-iml-max-30b generates better texts but is slower",
    )

    output_length: int = st.number_input(
        label="Number of word pieces in the generated text, 1-4096 (default: 100)",
        min_value=1,
        max_value=4096,
        value=100,
        help="The length of the output text in tokens (word pieces)."
    )

    submitted_prompt: str = st.text_area(
        label="Input for the model, It is highly recommended to write an English prompt.",
        help="Enter the prompt for the model. The model will generate a response based on this prompt.",
        value="Instruction: Answer in yes or no.\n"
              "Question: Is this a prompt?\n"
              "Answer: ",
        max_chars=2048,
    )

    submitted: bool = st.form_submit_button(
        label="Generate",
        help="Generate the output text.",
        disabled=False,
    )

    if submitted:
        try:
            output = on_form_submit(
                selected_model_name,
                output_length,
                submitted_prompt,
            )
        except CudaError as e:
            st.error("Out of memory. Please try a smaller model, shorter prompt, or a smaller output length.")
        except (ValueError, TypeError, RuntimeError) as e:
            st.error(e)
        st.write(f"Generated text: {output}")


with open("user_instructions_hebrew.md", "r") as fh:
    long_description = fh.read()
st.markdown(long_description)