shivakerur99 commited on
Commit
ddfe7e7
1 Parent(s): e30f8a4

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +243 -0
  2. requirements.txt +0 -0
main.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel
3
+ from fastapi import FastAPI, HTTPException, File, UploadFile
4
+ from datetime import datetime
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
7
+ from databases import Database
8
+ from textblob import TextBlob
9
+
10
+
11
+ import nltk
12
+ from nltk.tokenize import word_tokenize
13
+ from nltk.corpus import stopwords
14
+ import openai
15
+ import time
16
+ nltk.download('punkt')
17
+ nltk.download('stopwords')
18
+
19
+ openai.api_key = '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'
20
+
21
+ app = FastAPI()
22
+
23
+ import os
24
+
25
+ import requests
26
+ import json
27
+
28
+ # Replace 'YOUR_DEEPGRAM_API_KEY' with your actual API key
29
+ API_KEY = '4fab359b7ff314b1fc7a49bd34e389384b00d5ad'
30
+
31
+ # Define the API endpoint
32
+ url = 'https://api.deepgram.com/v1/listen'
33
+
34
+ # Set request headers
35
+ headers = {
36
+ 'Authorization': f'Token {API_KEY}',
37
+ 'Content-Type': 'audio/wav' # Ensure the correct content type for your audio file
38
+ }
39
+
40
+ # Set request parameters
41
+ params = {
42
+ 'diarize': 'true',
43
+ 'punctuate': 'true',
44
+ 'utterances': 'true',
45
+ }
46
+
47
+ # Set up CORS (Cross-Origin Resource Sharing) for allowing requests from all origins
48
+ origins = ["*"]
49
+ app.add_middleware(
50
+ CORSMiddleware,
51
+ allow_origins=origins,
52
+ allow_credentials=True,
53
+ allow_methods=["GET", "POST", "PUT", "DELETE"],
54
+ allow_headers=["*"],
55
+ )
56
+
57
+ # Define SQLAlchemy engine and metadata
58
+ DATABASE_URL = "sqlite:///./test.db"
59
+ engine = create_engine(DATABASE_URL)
60
+ metadata = MetaData()
61
+
62
+ # Define the document table schema
63
+ documents = Table(
64
+ "documents",
65
+ metadata,
66
+ Column("id", Integer, primary_key=True),
67
+ Column("filename", String),
68
+ Column("upload_date", String),
69
+ Column("content", String),
70
+ )
71
+
72
+ # Create the document table in the database
73
+ metadata.create_all(engine)
74
+
75
+ # Define Pydantic model for the document
76
+ class Document(BaseModel):
77
+ filename: str
78
+ upload_date: str
79
+ content: str
80
+
81
+ # Initialize database connection pool
82
+ database = Database(DATABASE_URL)
83
+
84
+
85
+
86
+ def analyze_sentiment(text):
87
+ blob = TextBlob(text)
88
+ sentiment = blob.sentiment.polarity
89
+ if sentiment > 0:
90
+ return "positive"
91
+ elif sentiment < 0:
92
+ return "negative"
93
+ else:
94
+ return "neutral"
95
+
96
+ def analyze_conversation_sentiment(conversation):
97
+ sentiment_analysis = {}
98
+ for line in conversation:
99
+ speaker, dialogue = line.strip().split(':')
100
+ sentiment = analyze_sentiment(dialogue)
101
+ sentiment_analysis[line] = sentiment
102
+ return sentiment_analysis
103
+
104
+
105
+ def parse_conversation(content):
106
+ return content.strip().split('\n')
107
+
108
+
109
+ def extract_active_words(text):
110
+ tokens = word_tokenize(text)
111
+ stop_words = set(stopwords.words('english'))
112
+ active_words = [word for word in tokens if word.isalnum() and word.lower() not in stop_words]
113
+ return active_words
114
+
115
+
116
+ def generate_description(speaker, sentiment, active_words):
117
+ prompt = f"{speaker}: Sentiment: {sentiment}\nActive Words: {', '.join(active_words)}\nDescription:"
118
+ response = openai.Completion.create(
119
+ engine="gpt-3.5-turbo-instruct",
120
+ prompt=prompt+"do not mention sentiment and active words in description, In output based on sentiment get psychological insights derived from the conversation, some insights about speakers. Please don’t provide summary of conversation, key words, etc. Output should be related to sentimental analysis.",
121
+ temperature=0.7,
122
+ max_tokens=len(speaker) + 50 # Adjusted to a fixed value for simplicity
123
+ )
124
+ return response.choices[0].text.strip()
125
+
126
+ # Endpoint for uploading text or mp3 or wav files
127
+
128
+
129
+ @app.post("/upload/")
130
+ async def upload_text_file(file: UploadFile = File(...)):
131
+ # Check if the uploaded file is a text file
132
+ if not file.filename.lower().endswith(('.txt', '.mp3', '.wav')):
133
+ raise HTTPException(status_code=400, detail="Only text files (TXT) or mp3 or wav are allowed.")
134
+
135
+ # Define the file path to save the uploaded file in the current directory
136
+ file_path = os.path.join(os.getcwd(), file.filename)
137
+
138
+ if file.filename.lower().endswith('.txt'):
139
+ # Read the content of the file asynchronously
140
+ contentinitial = await file.read()
141
+ contentlast = contentinitial.decode('utf-8')
142
+ filtered_content = '\n'.join(line for line in contentlast.splitlines() if line.strip())
143
+ content = filtered_content
144
+ print(content)
145
+
146
+ elif file.filename.lower().endswith((".mp3", ".wav")):
147
+ # Save the uploaded audio file in the current directory
148
+ with open(file_path, 'wb') as audio_file:
149
+ audio_file.write(await file.read())
150
+
151
+ # Initialize an empty string to store the transcript
152
+ full_transcript = ""
153
+
154
+ # Make the API request
155
+ response = requests.post(url, headers=headers, params=params, data=open(file_path, 'rb'))
156
+
157
+ # Check if request was successful
158
+ if response.ok:
159
+ # Parse JSON response
160
+ data = response.json()
161
+ # Extract and concatenate transcripts with speaker information into a single string
162
+ for utterance in data['results']['utterances']:
163
+ speaker = utterance['speaker']
164
+ transcript = utterance['transcript']
165
+ full_transcript += f'[Speaker: {speaker}] {transcript}\n'
166
+ else:
167
+ print(f"Error: {response.status_code} - {response.reason}")
168
+ content = full_transcript
169
+ print(content)
170
+
171
+
172
+
173
+ # Create document object
174
+ doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=content)
175
+
176
+ # Insert the document data into the database
177
+ async with database.transaction():
178
+ query = documents.insert().values(
179
+ filename=doc.filename,
180
+ upload_date=doc.upload_date,
181
+ content=doc.content
182
+ )
183
+ last_record_id = await database.execute(query)
184
+
185
+ return doc
186
+
187
+ class DataInput(BaseModel):
188
+ responseData: str
189
+
190
+ @app.post("/doc/")
191
+ async def process_data(data: DataInput):
192
+ # Access responseData and userInput
193
+ content = data.responseData
194
+ conversation = parse_conversation(content)
195
+ sentiments_with_active_words = []
196
+
197
+
198
+ #IMPORTANT KINDLY READ IT:
199
+ #IMPORTANT KINDLY READ IT:
200
+ #IMPORTANT KINDLY READ IT:
201
+ #IMPORTANT KINDLY READ IT:
202
+ #********************OpenAI sentiment analysis part which takes to many api request calls to process big files *****************************#
203
+ # for sentence in conversation:
204
+ # # Using OpenAI's sentiment analysis API
205
+ # result = openai.Completion.create(
206
+ # engine="gpt-3.5-turbo-instruct",
207
+ # prompt=sentence + " sentiment:",
208
+ # temperature=0,
209
+ # max_tokens=1,
210
+ # n=1,
211
+ # stop=None,
212
+ # )
213
+ # sentiment = result['choices'][0]['text'].strip()
214
+ # time.sleep(20)
215
+ # # Extract active words
216
+ # active_words = extract_active_words(sentence)
217
+
218
+
219
+
220
+ #********************sentiment analysis Using Textblob which is good and effecient and efficiency match with OpenAI's sentimental analysis********************#
221
+ sentiment_analysis = analyze_conversation_sentiment(conversation)
222
+ for line, sentiment in sentiment_analysis.items():
223
+ active_words = extract_active_words(line)
224
+ sentiments_with_active_words.append((sentiment, active_words))
225
+
226
+ # print(sentiments_with_active_words)
227
+ descriptions = []
228
+ for sentence, (sentiment, active_words) in zip(conversation, sentiments_with_active_words):
229
+ speaker = sentence.split(":")[0]
230
+ time.sleep(20) # Reduced sleep time for demonstration; adjust as per rate limits
231
+ description = generate_description(speaker, sentiment, active_words)
232
+ descriptions.append(description)
233
+
234
+
235
+ print("Generated Descriptions for each sentence:")
236
+ l=[]
237
+ for i, (sentence, description) in enumerate(zip(conversation, descriptions)):
238
+ l.append(f"Sentence {i+1}: {sentence}\n")
239
+ l.append(f"Description: {description}\n")
240
+
241
+
242
+ return l
243
+
requirements.txt ADDED
Binary file (1.22 kB). View file