File size: 5,366 Bytes
be5354c
 
 
 
 
 
 
 
 
80866c3
 
be5354c
 
f6bba37
6571183
86121b0
be5354c
86121b0
be5354c
 
 
6571183
 
 
 
 
 
 
 
 
 
be5354c
 
 
 
 
 
 
 
6571183
c6e9e12
6571183
 
4d03b08
d9e1e84
be5354c
ec2f6c7
2558343
ec2f6c7
2558343
d9e1e84
 
 
 
be5354c
 
 
 
 
 
f6bba37
be5354c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2e2b73
80866c3
be5354c
 
 
 
 
 
 
 
b9dcff2
b6658ee
88958e1
b6658ee
 
f87c90d
b6658ee
6571183
be5354c
 
b9dcff2
be5354c
 
 
 
 
73c64c6
be5354c
 
 
 
 
 
d9e1e84
be5354c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from __future__ import annotations
from typing import Iterable
import gradio as gr
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes

from llama_cpp import Llama
from huggingface_hub import hf_hub_download

hf_hub_download(repo_id="Monster/a", filename="ggml-alpaca-7b-q4.bin", local_dir=".")
llm = Llama(model_path="./ggml-alpaca-7b-q4.bin")


ins = '''Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Response:
'''

ins_inp = '''Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{}

### Input:
{}

### Response:
'''

theme = gr.themes.Monochrome(
    primary_hue="indigo",
    secondary_hue="blue",
    neutral_hue="slate",
    radius_size=gr.themes.sizes.radius_sm,
    font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
)

def generate(
    instruction,
    input=None,
    temperature=0.1,
    top_p=0.95,
    top_k=40,): 
    result = ""
    if input:
        instruction = ins_inp.format(instruction, input)
    else:
        instruction = ins.format(instruction)
    for x in llm(instruction, stop=['### Instruction:', '### End'], stream=True, temperature=temperature, top_p=top_p, top_k=top_k):
        result += x['choices'][0]['text']
        yield result
    


examples = [
    "Instead of making a peanut butter and jelly sandwich, what else could I combine peanut butter with in a sandwich? Give five ideas",
    "How do I make a campfire?",
    "Explain to me the difference between nuclear fission and fusion.",
    "Write an ad for sale Nikon D750."
]

def process_example(args):
    for x in generate(args):
        pass
    return x
    
css = ".generating {visibility: hidden}"

# Based on the gradio theming guide and borrowed from https://huggingface.co/spaces/shivi/dolly-v2-demo
class SeafoamCustom(Base):
    def __init__(
        self,
        *,
        primary_hue: colors.Color | str = colors.emerald,
        secondary_hue: colors.Color | str = colors.blue,
        neutral_hue: colors.Color | str = colors.blue,
        spacing_size: sizes.Size | str = sizes.spacing_md,
        radius_size: sizes.Size | str = sizes.radius_md,
        font: fonts.Font
        | str
        | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("Quicksand"),
            "ui-sans-serif",
            "sans-serif",
        ),
        font_mono: fonts.Font
        | str
        | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("IBM Plex Mono"),
            "ui-monospace",
            "monospace",
        ),
    ):
        super().__init__(
            primary_hue=primary_hue,
            secondary_hue=secondary_hue,
            neutral_hue=neutral_hue,
            spacing_size=spacing_size,
            radius_size=radius_size,
            font=font,
            font_mono=font_mono,
        )
        super().set(
            button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
            button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
            button_primary_text_color="white",
            button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
            block_shadow="*shadow_drop_lg",
            button_shadow="*shadow_drop_lg",
            input_background_fill="zinc",
            input_border_color="*secondary_300",
            input_shadow="*shadow_drop",
            input_shadow_focus="*shadow_drop_lg",
        )


seafoam = SeafoamCustom()


with gr.Blocks(theme=seafoam, analytics_enabled=False, css=css) as demo:
    with gr.Column():
        gr.Markdown(
            """ ## Alpaca-LoRa

            7b quantized 4bit (q4_0)
            
            Type in the box below and click the button to generate answers to your most pressing questions!
            
      """
        )

        with gr.Row():
            with gr.Column(scale=3):
                instruction = gr.Textbox(lines=2, placeholder="Tell me more about alpacas.", label="Instruction", elem_id="q-input")

                with gr.Accordion("Advanced setting", open=False):
                    input = gr.components.Textbox(lines=2, label="Input", placeholder="none")
                    temperature = gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature")
                    top_p = gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p")
                    top_k = gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k")
                

                with gr.Box():
                    gr.Markdown("**Output**")
                    output = gr.Markdown(elem_id="q-output")
                submit = gr.Button("Generate", variant="primary")
                gr.Examples(
                    examples=examples,
                    inputs=[instruction],
                    cache_examples=False,
                    fn=process_example,
                    outputs=[output],
                )
        


    submit.click(generate, inputs=[instruction, input, temperature, top_p, top_k], outputs=[output])
    instruction.submit(generate, inputs=[instruction], outputs=[output])

demo.queue(concurrency_count=1).launch(debug=True)