adarshj322 commited on
Commit
6569da3
1 Parent(s): 6a5e8f0

model changes

Browse files
Files changed (2) hide show
  1. app.py +69 -2
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,7 +1,74 @@
1
  import gradio as gr
2
 
3
- def greet(name):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
  iface.launch()
 
1
  import gradio as gr
2
 
3
+
4
+ import wandb
5
+ import torch
6
+
7
+ from transformers import GPT2Tokenizer,GPT2LMHeadModel
8
+ from peft import PeftModel
9
+
10
+ import os
11
+
12
+ import re
13
+
14
+ def clean_text(text):
15
+ # Lowercase the text
16
+ text = text.lower()
17
+ # Remove special characters
18
+ text = re.sub(r'\W', ' ', text)
19
+ # Remove extra white spaces
20
+ text = re.sub(r'\s+', ' ', text).strip()
21
+ return text
22
+
23
+
24
+
25
+ os.environ["WANDB_API_KEY"] = "d2ad0a7285379c0808ca816971d965fc242d0b5e"
26
+
27
+ wandb.login()
28
+
29
+ run = wandb.init(project="Email_subject_gen", job_type="model_loading")
30
+
31
+ artifact = run.use_artifact('Email_subject_gen/final_model:v0')
32
+ artifact_dir = artifact.download()
33
+
34
+ #tokenizer= GPT2Tokenizer.from_pretrained(artifact_dir)
35
+
36
+ MODEL_KEY = 'olm/olm-gpt2-dec-2022'
37
+ tokenizer= GPT2Tokenizer.from_pretrained(MODEL_KEY)
38
+ tokenizer.add_special_tokens({'pad_token':'{PAD}'})
39
+
40
+ model = GPT2LMHeadModel.from_pretrained(MODEL_KEY)
41
+ model.resize_token_embeddings(len(tokenizer))
42
+ model.config.dropout = 0.1 # Set dropout rate
43
+ model.config.attention_dropout = 0.1
44
+
45
+ model = PeftModel.from_pretrained(model, artifact_dir)
46
+
47
+
48
+ def generateSubject(email):
49
+
50
+ clean_text(email)
51
+
52
+ email = "<email>" + clean_text(email) + "<subject>"
53
+
54
+ prompts = list()
55
+ prompts.append(email)
56
+ tokenizer.padding_side='left'
57
+ prompts_batch_ids = tokenizer(prompts,
58
+ padding=True, truncation=True, return_tensors='pt').to(model.device)
59
+ output_ids = model.generate(
60
+ **prompts_batch_ids, max_new_tokens=10,
61
+ pad_token_id=tokenizer.pad_token_id)
62
+ outputs_batch = [seq.split('<subject>')[1] for seq in
63
+ tokenizer.batch_decode(output_ids, skip_special_tokens=True)]
64
+ tokenizer.padding_side='right'
65
+
66
+ return outputs_batch[0]
67
+
68
+
69
+
70
+ def predict(name):
71
  return "Hello " + name + "!!"
72
 
73
+ iface = gr.Interface(fn=predict, inputs=gr.inputs.Textbox(), outputs="text")
74
  iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ wandb
2
+ peft