grebelsm commited on
Commit
5e2af15
1 Parent(s): d71ca0a

Use faster model

Browse files
Files changed (2) hide show
  1. app.py +49 -33
  2. requirements.txt +1 -5
app.py CHANGED
@@ -1,44 +1,60 @@
 
 
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
-
5
- model = AutoModelForCausalLM.from_pretrained(
6
- "tiiuae/falcon-7b-instruct",
7
- torch_dtype=torch.bfloat16,
8
- trust_remote_code=True,
9
- device_map="auto",
10
- low_cpu_mem_usage=True,
11
- )
12
- tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
13
 
14
 
15
- def generate_text(input_text):
16
- input_ids = tokenizer.encode(input_text, return_tensors="pt")
17
- attention_mask = torch.ones(input_ids.shape)
 
 
 
 
18
 
19
- output = model.generate(
20
- input_ids,
21
- attention_mask=attention_mask,
22
- max_length=200,
23
- do_sample=True,
24
- top_k=10,
25
- num_return_sequences=1,
26
- eos_token_id=tokenizer.eos_token_id,
27
- )
28
 
29
- output_text = tokenizer.decode(output[0], skip_special_tokens=True)
30
- print(output_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # Remove Prompt Echo from Generated Text
33
- cleaned_output_text = output_text.replace(input_text, "")
34
  return cleaned_output_text
35
 
36
 
37
- text_generation_interface = gr.Interface(
38
- fn=generate_text,
39
- inputs=[
40
- gr.inputs.Textbox(label="Input Text"),
 
 
 
41
  ],
42
- outputs=gr.inputs.Textbox(label="Generated Text"),
43
- title="Falcon-7B Instruct",
44
- ).launch()
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import urllib.request
3
  import gradio as gr
4
+ from llama_cpp import Llama
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
+ def download_file(file_link, filename):
8
+ # Checks if the file already exists before downloading
9
+ if not os.path.isfile(filename):
10
+ urllib.request.urlretrieve(file_link, filename)
11
+ print("File downloaded successfully.")
12
+ else:
13
+ print("File already exists.")
14
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # Dowloading GGML model from HuggingFace
17
+ ggml_model_path = "https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized/resolve/main/ggml-vicuna-7b-1.1-q4_1.bin"
18
+ filename = "ggml-vicuna-7b-1.1-q4_1.bin"
19
+
20
+ download_file(ggml_model_path, filename)
21
+
22
+
23
+ llm = Llama(model_path=filename, n_ctx=512, n_batch=126)
24
+
25
+
26
+ def generate_text(prompt="Who is the CEO of Apple?"):
27
+ output = llm(
28
+ prompt,
29
+ max_tokens=256,
30
+ temperature=0.1,
31
+ top_p=0.5,
32
+ echo=False,
33
+ stop=["#"],
34
+ )
35
+ output_text = output["choices"][0]["text"].strip()
36
 
37
  # Remove Prompt Echo from Generated Text
38
+ cleaned_output_text = output_text.replace(prompt, "")
39
  return cleaned_output_text
40
 
41
 
42
+ description = "Vicuna-7B"
43
+
44
+ examples = [
45
+ ["What is the capital of France?", "The capital of France is Paris."],
46
+ [
47
+ "Who wrote the novel 'Pride and Prejudice'?",
48
+ "The novel 'Pride and Prejudice' was written by Jane Austen.",
49
  ],
50
+ ["What is the square root of 64?", "The square root of 64 is 8."],
51
+ ]
52
+
53
+ gradio_interface = gr.Interface(
54
+ fn=generate_text,
55
+ inputs="text",
56
+ outputs="text",
57
+ examples=examples,
58
+ title="Vicuna-7B",
59
+ )
60
+ gradio_interface.launch()
requirements.txt CHANGED
@@ -1,5 +1 @@
1
- datasets
2
- transformers
3
- accelerate
4
- einops
5
- safetensors
 
1
+ llama-cpp-python==0.1.62