saitejad commited on
Commit
85a85fc
β€’
1 Parent(s): f00d4fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -32
app.py CHANGED
@@ -2,8 +2,6 @@ import gradio as gr
2
  import librosa
3
  import torch
4
  from transformers import AutoTokenizer, pipeline, logging
5
- from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
6
-
7
  from transformers import SpeechT5Processor, SpeechT5ForSpeechToText
8
 
9
 
@@ -52,47 +50,24 @@ def audio_to_text(audio, mic_audio=None):
52
 
53
  # Text Generation
54
 
55
- model_name_or_path = "TheBloke/Llama-2-7b-Chat-GPTQ"
56
- model_basename = "gptq_model-4bit-128g"
57
-
58
- use_triton = False
59
-
60
- llama_tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
61
-
62
- llama_model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
63
- model_basename=model_basename,
64
- use_safetensors=True,
65
- trust_remote_code=True,
66
- device="cuda:0",
67
- use_triton=use_triton,
68
- quantize_config=None)
69
 
70
  def generate(text):
71
- prompt = text
72
  system_message = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Give short, simple and direct answers"
73
  prompt_template=f'''[INST] <<SYS>>
74
  {system_message}
75
  <</SYS>>
76
 
77
- {prompt} [/INST]'''
78
-
79
- pipe = pipeline(
80
- "text-generation",
81
- model=model,
82
- tokenizer=tokenizer,
83
- max_new_tokens=512,
84
- temperature=0.7,
85
- top_p=0.95,
86
- repetition_penalty=1.15
87
- )
88
-
89
- return pipe(prompt_template)[0]['generated_text']
90
 
91
  def audio_text_generate(audio):
92
  audio_text = audio_to_text(audio)
93
  generated_text = generate(audio_text)
94
- response = generated_text[generated_text.index("[/INST]")+7:].strip()
95
- return audio_text, response
96
 
97
 
98
  demo = gr.Interface(fn=audio_text_generate,
 
2
  import librosa
3
  import torch
4
  from transformers import AutoTokenizer, pipeline, logging
 
 
5
  from transformers import SpeechT5Processor, SpeechT5ForSpeechToText
6
 
7
 
 
50
 
51
  # Text Generation
52
 
53
+ model_path= hf_hub_download(repo_id="TheBloke/Llama-2-7B-Chat-GGML", filename="llama-2-7b-chat.ggmlv3.q4_0.bin")
54
+ llm2 = Llama(model_path=model_path)
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def generate(text):
 
57
  system_message = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Give short, simple and direct answers"
58
  prompt_template=f'''[INST] <<SYS>>
59
  {system_message}
60
  <</SYS>>
61
 
62
+ {text} [/INST]'''
63
+ chat_compl = llm2.create_completion(prompt=prompt_template, top_k=50, top_p=0.7, temperature=0.7, repeat_penalty=1.5)
64
+ return chat_compl['choices'][0]['text'].strip()
 
 
 
 
 
 
 
 
 
 
65
 
66
  def audio_text_generate(audio):
67
  audio_text = audio_to_text(audio)
68
  generated_text = generate(audio_text)
69
+ # response = generated_text[generated_text.index("[/INST]")+7:].strip()
70
+ return audio_text, generated_text
71
 
72
 
73
  demo = gr.Interface(fn=audio_text_generate,