imenayadi commited on
Commit
5207833
1 Parent(s): e265489
app.py CHANGED
@@ -1,7 +1,235 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ from key_info import extract_entities
4
+ from summarization_with_bart import summarize_email_conditional
5
+ from llama2_response_mail_generator import generate_email_response
6
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
7
+ import torch
8
+ import spacy
9
+ from huggingface_hub import hf_hub_download
10
+ from llama_cpp import Llama
11
+ import subprocess
12
 
13
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
14
+
15
+
16
+ """**Original code**
17
+
18
+ **CSS for Interface**
19
+ """
20
+
21
+ custom_css = ''' @import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.min.css');
22
+
23
+ body {
24
+ background-color: #eef1f5; /* Light grey-blue background for a neutral, clean look */
25
+ }
26
+ label {
27
+ color: #34495e; /* Dark blue-grey for a professional appearance */
28
+ font-weight: bold;
29
+ }
30
+ textarea, input, select, button {
31
+ background-color: #ffffff; /* Crisp white background for input fields and buttons */
32
+ border: 1px solid #bdc3c7; /* Soft grey border for a subtle, refined look */
33
+ color: #2c3e50; /* Darker shade of blue-grey for text, enhancing readability */
34
+ }
35
+ button {
36
+ background-color: #3498db; /* Bright blue for buttons to stand out */
37
+ color: black ; /* White text on buttons for clarity */
38
+ border-radius: 4px; /* Slightly rounded corners for a modern touch */
39
+ font-weight: bold; /* Bold text for emphasis */
40
+ font-size: 16px; /* Sizable text for easy interaction */
41
+ }
42
+ button[type="submit"], button[type="reset"], button[type="button"] {
43
+ font-weight: bold; /* Ensures all actionable buttons are prominent */
44
+ font-size: 18px; /* Larger text size for better visibility and impact */
45
+ }
46
+ .result-box {
47
+ background-color: #ecf0f1; /* Very light grey for result boxes, ensuring focus */
48
+ color: #2c3e50; /* Consistent dark blue-grey text for uniformity */
49
+ border: 1px solid #bdc3c7; /* Matching the input field borders for design coherence */
50
+ }
51
+ .gradio-toolbar {
52
+ background-color: #ffffff; /* Maintains a clean, unobtrusive toolbar appearance */
53
+ border-top: 2px solid #3498db; /* A pop of bright blue to delineate the toolbar */
54
+ }
55
+
56
+ '''
57
+
58
+ """**Seperate** **Interface**"""
59
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
60
+ nlp = spacy.load("en_core_web_sm")
61
+ ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", tokenizer="dbmdz/bert-large-cased-finetuned-conll03-english")
62
+ model_path = './fine_tuned_roberta_for_category_model_'
63
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
64
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
65
+ model.eval()
66
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
67
+ model.to(device)
68
+ # Load model and tokenizer from the drive
69
+ model_sentiment_path = './fine_tuned_roberta_for_sentiment_analysis_2000_'
70
+ tokenizer_sentiment = AutoTokenizer.from_pretrained(model_sentiment_path)
71
+ model_sentiment = AutoModelForSequenceClassification.from_pretrained(model_sentiment_path)
72
+ model_sentiment.eval()
73
+ model_sentiment.to(device)
74
+
75
+
76
+ model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
77
+ model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
78
+
79
+ # Download the model file
80
+ model_path_llama = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
81
+
82
+ # Initialize the Llama model with appropriate settings for GPU
83
+ lcpp_llm = Llama(
84
+ model_path=model_path_llama,
85
+ n_threads=2, # CPU cores to use
86
+ n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
87
+ n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
88
+ )
89
+
90
+ def generate_email_response(email_prompt):
91
+ # Check input received by the function
92
+ print("Received prompt:", email_prompt)
93
+
94
+ # Determine if the input is a shorthand command or an actual email
95
+ if 'email to' in email_prompt.lower():
96
+ # Assume it's a shorthand command, format appropriately
97
+ formatted_prompt = f'''
98
+ Email received: "{email_prompt}"
99
+ Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
100
+ Response:
101
+ '''
102
+ else:
103
+ # Assume it's direct email content
104
+ formatted_prompt = f'''
105
+ Email received: "{email_prompt}"
106
+ Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
107
+ Response:
108
+ '''
109
+
110
+ # Generate response using Llama-2 model
111
+ try:
112
+ response = lcpp_llm(
113
+ prompt=formatted_prompt,
114
+ max_tokens=256,
115
+ temperature=0.5,
116
+ top_p=0.95,
117
+ repeat_penalty=1.2,
118
+ top_k=150,
119
+ echo=True
120
+ )
121
+ generated_response = response["choices"][0]["text"]
122
+ # Remove the input part from the output if it is included
123
+ if formatted_prompt in generated_response:
124
+ generated_response = generated_response.replace(formatted_prompt, '').strip()
125
+ print("Generated response:", generated_response)
126
+ return generated_response
127
+ except Exception as e:
128
+ print("Error in response generation:", str(e))
129
+ return "Failed to generate response, please check the console for errors."
130
+
131
+ def classify_sentiment(text):
132
+ # Encode the text using the tokenizer
133
+ inputs = tokenizer_sentiment(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
134
+ input_ids = inputs['input_ids'].to(device)
135
+ attention_mask = inputs['attention_mask'].to(device)
136
+
137
+ # Get model predictions
138
+ with torch.no_grad():
139
+ outputs = model_sentiment(input_ids=input_ids, attention_mask=attention_mask)
140
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=1)
141
+
142
+ # Convert predictions to probabilities and sentiment category
143
+ probabilities = predictions.cpu().numpy()[0]
144
+ categories = ["Positive", "Neutral", "Negative"]
145
+ predicted_sentiment = categories[probabilities.argmax()]
146
+
147
+ # Return the predicted sentiment and the confidence
148
+ confidence = max(probabilities)
149
+ return f"Sentiment: {predicted_sentiment}, Confidence: {confidence:.2f}"
150
+
151
+ def generate_summary(email_text):
152
+ return summarize_email_conditional(email_text, summarizer)
153
+
154
+ def display_entities(email_text):
155
+ try:
156
+ results = extract_entities(email_text, nlp, ner_pipeline)
157
+
158
+ # Convert to DataFrames
159
+ data_spacy = pd.DataFrame(results['spaCy Entities'])
160
+ data_transformer = pd.DataFrame(results['Transformer Entities'])
161
+
162
+ return data_spacy, data_transformer, ", ".join(results['Dates'])
163
+ except Exception as e:
164
+ print(f"Error: {e}")
165
+ # Return empty outputs in case of error
166
+ return pd.DataFrame(), pd.DataFrame(), ""
167
+
168
+ def classify_email(email):
169
+ # Encode the email text using the tokenizer
170
+ inputs = tokenizer(email, return_tensors="pt", truncation=True, max_length=512, padding=True)
171
+ input_ids = inputs['input_ids'].to(device)
172
+ attention_mask = inputs['attention_mask'].to(device)
173
+
174
+ # Get model predictions
175
+ with torch.no_grad():
176
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
177
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=1)
178
+
179
+ # Convert predictions to probabilities and category
180
+ probabilities = predictions.cpu().numpy()[0]
181
+ categories = ["Urgent Requests", "Project Updates", "Client Communications", "Meeting Coordination", "Internal Announcements"]
182
+ predicted_category = categories[probabilities.argmax()]
183
+
184
+ # Return the predicted category and the confidence
185
+ confidence = max(probabilities)
186
+ return f"Category: {predicted_category}, Confidence: {confidence:.2f}"
187
+
188
+
189
+ iface_category = gr.Interface(
190
+ fn=classify_email,
191
+ inputs=gr.Textbox(lines=10, placeholder="Enter Email Content Here..."),
192
+ outputs="text",
193
+ title="Email Category Classifier",
194
+ description="This model classifies email text into one of five categories: Urgent Requests, Project Updates, Client Communications, Meeting Coordination, Internal Announcements."
195
+ )
196
+
197
+
198
+ iface_sentiment = gr.Interface(
199
+ fn=classify_sentiment,
200
+ inputs=gr.Textbox(lines=5, placeholder="Enter Email Text Here..."),
201
+ outputs=gr.Textbox(label="Sentiment Analysis"),
202
+ title="Sentiment Analysis"
203
+ )
204
+
205
+
206
+ iface_summary = gr.Interface(
207
+ fn=generate_summary,
208
+ inputs=[gr.Textbox(lines=5, placeholder="Enter Email Text Here...")],
209
+ outputs=gr.Textbox(label="Generated Summary"),
210
+ title="Summary Generation"
211
+ )
212
+
213
+ iface_ner = gr.Interface(
214
+ fn=display_entities,
215
+ inputs=gr.Textbox(lines=5, placeholder="Enter Email Text Here..."),
216
+ outputs=[
217
+ gr.Dataframe(label="spaCy Entity Recognition"),
218
+ gr.Dataframe(label="Transformer Entity Recognition"),
219
+ gr.Textbox(label="Extracted Dates")
220
+ ],
221
+ title="NER Analysis",
222
+ description="Performs Named Entity Recognition using spaCy and Transformer models."
223
+ )
224
+ iface_response = gr.Interface(
225
+ fn=generate_email_response,
226
+ inputs=gr.Textbox(lines=10, placeholder="Enter the email prompt..."),
227
+ outputs=gr.Textbox(label="Generated Email Response"),
228
+ title="Email Response Generator",
229
+ description="Generate email responses using Llama-2 model."
230
+ )
231
+
232
+ # Using tabs to organize the interfaces
233
+ tabs = gr.TabbedInterface([iface_category, iface_sentiment,iface_summary,iface_ner,iface_response], ["Category", "Sentiment"," Summary","NER","Response Generator"], css=custom_css)
234
+ tabs.launch(share=True)
235
 
 
 
key_info.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ from transformers import pipeline
3
+ import re
4
+ from dateutil.parser import parse
5
+
6
+ # Regex pattern for dates
7
+ def extract_entities(email_text, nlp, ner_pipeline):
8
+ date_pattern = r'\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2}(?:th|st|nd|rd)?,\s+\d{4}\b'
9
+ # Use spaCy for initial extraction
10
+ doc = nlp(email_text)
11
+ spacy_entities = [{"Text": ent.text, "Type": ent.label_} for ent in doc.ents]
12
+
13
+ # Use transformer model for refined extraction
14
+ transformer_entities = ner_pipeline(email_text)
15
+ transformer_entities = [{"Text": ent['word'], "Type": ent['entity'], "Score": ent['score']} for ent in transformer_entities if ent['score'] > 0.75]
16
+
17
+ # Extract dates using regex
18
+ potential_dates = re.findall(date_pattern, email_text)
19
+ dates = [parse(date).strftime('%Y-%m-%d') for date in potential_dates]
20
+
21
+ return {
22
+ "spaCy Entities": spacy_entities,
23
+ "Transformer Entities": transformer_entities,
24
+ "Dates": dates
25
+ }
26
+
27
+
llama2_response_mail_generator.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+
3
+ from llama_cpp import Llama
4
+
5
+ model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
6
+ model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
7
+
8
+ # Download the model file
9
+ model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
10
+
11
+ # Initialize the Llama model with appropriate settings for GPU
12
+ lcpp_llm = Llama(
13
+ model_path=model_path,
14
+ n_threads=2, # CPU cores to use
15
+ n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
16
+ n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
17
+ )
18
+
19
+ def generate_email_response(email_prompt):
20
+ # Check input received by the function
21
+ print("Received prompt:", email_prompt)
22
+
23
+ # Determine if the input is a shorthand command or an actual email
24
+ if 'email to' in email_prompt.lower():
25
+ # Assume it's a shorthand command, format appropriately
26
+ formatted_prompt = f'''
27
+ Email received: "{email_prompt}"
28
+ Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
29
+ Response:
30
+ '''
31
+ else:
32
+ # Assume it's direct email content
33
+ formatted_prompt = f'''
34
+ Email received: "{email_prompt}"
35
+ Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
36
+ Response:
37
+ '''
38
+
39
+ # Generate response using Llama-2 model
40
+ try:
41
+ response = lcpp_llm(
42
+ prompt=formatted_prompt,
43
+ max_tokens=256,
44
+ temperature=0.5,
45
+ top_p=0.95,
46
+ repeat_penalty=1.2,
47
+ top_k=150,
48
+ echo=True
49
+ )
50
+ generated_response = response["choices"][0]["text"]
51
+ # Remove the input part from the output if it is included
52
+ if formatted_prompt in generated_response:
53
+ generated_response = generated_response.replace(formatted_prompt, '').strip()
54
+ print("Generated response:", generated_response)
55
+ return generated_response
56
+ except Exception as e:
57
+ print("Error in response generation:", str(e))
58
+ return "Failed to generate response, please check the console for errors."
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ datasets
3
+ torch
4
+ gradio
5
+ spacy
6
+ llama-cpp-python
7
+ numpy
8
+ huggingface_hub
9
+ ##TODO python -m spacy download en_core_web_sm
summarization_with_bart.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ def summarize_email_conditional(email_text, summarizer, min_input_length=50):
4
+ """
5
+ Summarizes the email if it's longer than min_input_length.
6
+ Adjusts max_length parameter based on the length of the email.
7
+
8
+ Args:
9
+ - email_text (str): The text of the email to summarize.
10
+ - min_input_length (int): Minimum length of email to apply summarization.
11
+
12
+ Returns:
13
+ - str: The summary of the email or the original email if below the min_input_length.
14
+ """
15
+ # Only summarize if the email is longer than min_input_length
16
+ if len(email_text.split()) > min_input_length:
17
+ # Dynamically set max_length to be about 75% of the email length, or any ratio that suits your need
18
+ max_length = max(12, int(len(email_text.split()) * 0.75))
19
+ summary = summarizer(email_text, max_length=max_length, min_length=5, do_sample=False)
20
+ return summary[0]['summary_text']
21
+ else:
22
+ # Return the original email text if it's not long enough to require summarization
23
+ return email_text
24
+