Johntad110 commited on
Commit
ac3d1df
1 Parent(s): 2550105

add app.py and requirments.txt

Browse files
Files changed (3) hide show
  1. README.md +0 -1
  2. app.py +88 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -10,4 +10,3 @@ pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
10
  license: apache-2.0
11
  ---
12
 
 
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import os
4
+ import sys
5
+ import time
6
+ import json
7
+ from typing import List
8
+
9
+ from transformers import (
10
+ LlamaTokenizer,
11
+ LlamaForCausalLM,
12
+ AutoModelForCausalLM,
13
+ AutoTokenizer,
14
+ LlamaConfig
15
+ )
16
+ from peft import PeftModel
17
+ from accelerate import disk_offload
18
+
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ "Johntad110/llama-2-7b-amharic-tokenizer",
21
+ return_dict=True,
22
+ load_in_8bit=True,
23
+ device_map="auto",
24
+ low_cpu_mem_usage=True,
25
+ attn_implementation="sdpa"
26
+ )
27
+
28
+ tokenizer = LlamaTokenizer.from_pretrained(
29
+ "Johntad110/llama-2-7b-amharic-tokenizer"
30
+ )
31
+
32
+ embedding_size = model.get_input_embeddings().weight.shape[0]
33
+ if len(tokenizer) != embedding_size:
34
+ model.resize_token_embeddings(len(tokenizer))
35
+
36
+ model = PeftModel.from_pretrained(model, "Johntad110/llama-2-amharic-peft")
37
+
38
+ model.eval() # Set model to evaluation mode
39
+
40
+
41
+ def generate_text(
42
+ prompt: str,
43
+ max_new_tokens: int = None,
44
+ seed: int = 42,
45
+ do_sample: bool = True,
46
+ min_length: int = None,
47
+ use_cache: bool = True,
48
+ top_p: float = 1.0,
49
+ temperature: float = 1.0,
50
+ top_k: int = 1,
51
+ repetition_penalty: float = 1.0,
52
+ length_penalty: int = 1,
53
+ ):
54
+ """
55
+ Function to perform text generation with user-defined parameters
56
+ """
57
+
58
+ torch.cuda.manual_seed(seed)
59
+ torch.manual_seed(seed)
60
+
61
+ batch = tokenizer(prompt, return_tensors="pt")
62
+ batch = {k: v.to("cuda") for k, v in batch.items()}
63
+
64
+ with torch.no_grad():
65
+ outputs = model.generate(
66
+ **batch,
67
+ max_new_tokens=max_new_tokens,
68
+ do_sample=do_sample,
69
+ top_p=top_p,
70
+ temperature=temperature,
71
+ min_length=min_length,
72
+ use_cache=use_cache,
73
+ top_k=top_k,
74
+ repetition_penalty=repetition_penalty,
75
+ length_penalty=length_penalty,
76
+ )
77
+
78
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
79
+ return output_text
80
+
81
+
82
+ interface = gr.Interface(
83
+ fn=generate_text,
84
+ inputs=[gr.Textbox(label="Prompt")],
85
+ outputs="text"
86
+ )
87
+
88
+ interface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ peft==0.10.0
2
+ fire==0.6.0
3
+ accelerate==0.29.3
4
+ bitsandbytes==0.43.1
5
+ gradio==4.27.0