Spaces:
Running
Running
import datetime | |
from urllib.request import Request, urlopen | |
from pypdf import PdfReader | |
from io import StringIO | |
import io | |
import pandas as pd | |
import os | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import pipeline | |
from openai import OpenAI | |
from groq import Groq | |
import time | |
from openai import OpenAI | |
#openai_key = "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" | |
#client = OpenAI(api_key = openai_key) | |
desc = pd.read_excel('Descriptor.xlsx',header = None) | |
desc_list = desc.iloc[:,0].to_list() | |
def callAzure(prompt,text): | |
url = "https://Meta-Llama-3-70B-Instruct-fkqip-serverless.eastus2.inference.ai.azure.com" | |
api_key = "o5yaLhTIvg0s5zuYVInBpyneEZO8oonY" | |
client = OpenAI(base_url=url, api_key=api_key) | |
msg = "{} {}".format(prompt, text) | |
msg = msg[:7000] | |
response = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": msg, | |
} | |
], | |
model="azureai", | |
) | |
return response.choices[0].message.content | |
def call(prompt, text): | |
client = Groq(api_key=os.getenv("key"),) | |
prompt = prompt + " Answer only the summary, no instructions" | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": "{} {}".format(prompt, text), | |
} | |
], | |
model=model, | |
) | |
return chat_completion.choices[0].message.content | |
def filter(input_json): | |
sym = pd.read_excel('symbol.xlsx',header = None) | |
sym_list = sym.iloc[:,0].to_list() | |
if input_json['FileURL']==None or input_json['FileURL'].lower()=='null': | |
return [0,"File_URL"] | |
if input_json['symbol']== 'null' or input_json['symbol'] not in sym_list: | |
return [0,"symbol"] | |
if input_json['TypeofAnnouncement'] not in ['General_Announcements','Outcome','General']: | |
return [0,"Annoucement"] | |
if input_json['Descriptor'] not in desc_list: | |
return [0,"Desc"] | |
url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1] | |
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) | |
html = urlopen(req) | |
cont = html.read() | |
reader = PdfReader(io.BytesIO(cont)) | |
content = '' | |
for i in range(len(reader.pages)): | |
content+= reader.pages[i].extract_text() | |
document = content | |
return [1, document] | |
def summary(input_json): | |
prompt = pd.read_excel('DescriptorPrompt.xlsx') | |
promptShort = prompt.iloc[:,1].to_list() | |
promptLong = prompt.iloc[:,2].to_list() | |
output = {} | |
filtering_results = filter(input_json) | |
if filtering_results[0] == 0: | |
#return 0 | |
return filtering_results[1] | |
id = desc_list.index(input_json['Descriptor']) | |
long_text = filtering_results[1] | |
url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1] | |
output["Link to BSE website"] = url | |
output["Date of time of receiving data from BSE"] = input_json["newsdate"] + "Z" | |
output["Stock Ticker"] = input_json['symbol'] | |
answer = callAzure(promptShort[id], long_text) | |
try: | |
idx = answer.index("\n") | |
except: | |
idx = -2 | |
output['Short Summary'] = answer[idx+2:] | |
answer = callAzure(promptLong[id], long_text) | |
try: | |
idx = answer.index("\n") | |
except: | |
idx = -2 | |
output['Long summary'] = answer[idx+2:] | |
prompt = "1 word Financial SEO tag for this news article" | |
answer = callAzure(prompt, output['Short Summary']) | |
try: | |
idx = answer.index("\n") | |
except: | |
idx = -2 | |
output['Tag'] = answer[idx+2:] | |
prompt = "Give a single headline for this News Article" | |
answer = callAzure(prompt, output['Short Summary']) | |
try: | |
idx = answer.index("\n") | |
except: | |
idx = -2 | |
output['Headline'] = answer[idx+2:] | |
utc_now = datetime.datetime.utcnow() | |
ist_now = utc_now.astimezone(datetime.timezone(datetime.timedelta(hours=5, minutes=30))) | |
Date = ist_now.strftime("%Y-%m-%d") | |
time = ist_now.strftime("%X") | |
output['Date and time of data delivery from Skylark'] = Date+"T"+time+"Z" | |
prompt = "Answer in one word the sentiment of this News out of Positive, Negative or Neutral {}" | |
output['Sentiment'] = callAzure(prompt, output['Short Summary']) | |
#time.sleep(60) | |
# response = client.images.generate( | |
# model="dall-e-3", | |
# prompt=headline.text, | |
# size="1024x1024", | |
# quality="standard", | |
# n=1 | |
# ) | |
# output["Link to Infographic (data visualization only)] = response.data[0].url | |
return output |