File size: 1,845 Bytes
b235afa e1b6041 711fde0 4406c32 b235afa 711fde0 5dd95bc 35d2c47 4406c32 8f108e1 35d2c47 8f108e1 5dd95bc 8f108e1 5dd95bc 4406c32 aaae658 4406c32 6a8a445 4406c32 5dd95bc 4406c32 60a4892 4406c32 5dd95bc a28067f 5dd95bc 4406c32 711fde0 4406c32 711fde0 5dd95bc 4406c32 5dd95bc 4406c32 5dd95bc b235afa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import gradio as gr
import ctransformers
class Z(object):
def __init__(self):
self.llm = None
def init(self):
pass
def run0(self, txt0, paramTemp):
prompt0 = txt0
# for Wizard-Vicuna-13B
prompt00 = f'''USER: {prompt0}
ASSISTANT:'''
# for TheBloke/Wizard-Vicuna-13B-Uncensored-GGML
prompt00 = f'''Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{prompt0}
### Response:'''
# raw
prompt00 = prompt0
response0 = llm(prompt00, max_new_tokens=198, temperature=paramTemp) # 0.5, 0.3
return f'{response0}'
from ctransformers import AutoModelForCausalLM
# experiment
#llm = AutoModelForCausalLM.from_pretrained('mverrilli/dolly-v2-12b-ggml', model_file='ggml-model-q5_0.bin', model_type='dolly-v2')
# experiment
#llm = AutoModelForCausalLM.from_pretrained('mverrilli/dolly-v2-7b-ggml', model_file='ggml-model-q5_0.bin', model_type='dolly-v2')
# wizzard vicuna
# see https://github.com/melodysdreamj/WizardVicunaLM
#llm = AutoModelForCausalLM.from_pretrained('TheBloke/Wizard-Vicuna-13B-Uncensored-GGML', model_file='Wizard-Vicuna-13B-Uncensored.ggmlv3.q4_0.bin', model_type='llama')
modelInfo = {'path2':'TheBloke/hippogriff-30b-chat-GGML:hippogriff-30b.ggmlv3.q4_1.bin', 'promptType':'raw', 'modelType':'llama'}
print('[D] load LMt...')
llm = AutoModelForCausalLM.from_pretrained(modelInfo['path2'].split(':')[0], model_file=modelInfo['path2'].split(':')[1], model_type=modelInfo['modelType'])
print('[D] ...done')
z = Z()
z.llm = llm
z.init()
def run0(prompt, temperature):
global z
return z.run0(prompt, temperature)
iface = gr.Interface(fn=run0, inputs=["text", gr.Slider(0.0, 1.0, value=0.41)], outputs="text")
iface.launch() |