Spaces:
Sleeping
Sleeping
import datetime | |
from urllib.request import Request, urlopen | |
from pypdf import PdfReader | |
from io import StringIO | |
import io | |
import pandas as pd | |
import os | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import pipeline | |
from openai import OpenAI | |
from groq import Groq | |
#openai_key = "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" | |
#client = OpenAI(api_key = openai_key) | |
desc = pd.read_excel('Descriptor.xlsx',header = None) | |
desc_list = desc.iloc[:,0].to_list() | |
model = "llama3-70b-8192" | |
pipe = pipeline("text-classification", model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis") | |
def call(prompt, text): | |
client = Groq(api_key=os.getenv("key"),) | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": "{} {}".format(prompt, text), | |
} | |
], | |
model=model, | |
) | |
return chat_completion.choices[0].message.content | |
def filter(input_json): | |
sym = pd.read_excel('symbol.xlsx',header = None) | |
sym_list = sym.iloc[:,0].to_list() | |
if input_json['FileURL']==None or input_json['FileURL'].lower()=='null': | |
return [0,"File_URL"] | |
if input_json['symbol']== 'null' or input_json['symbol'] not in sym_list: | |
return [0,"symbol"] | |
if input_json['TypeofAnnouncement'] not in ['General_Announcements','Outcome','General']: | |
return [0,"Annoucement"] | |
if input_json['Descriptor'] not in desc_list: | |
return [0,"Desc"] | |
url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1] | |
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) | |
html = urlopen(req) | |
cont = html.read() | |
reader = PdfReader(io.BytesIO(cont)) | |
content = '' | |
for i in range(len(reader.pages)): | |
content+= reader.pages[i].extract_text() | |
document = content | |
return [1, document] | |
def summary(input_json): | |
prompt = pd.read_excel('DescriptorPrompt.xlsx') | |
promptShort = prompt.iloc[:,1].to_list() | |
promptLong = prompt.iloc[:,2].to_list() | |
id = desc_list.index(input_json['Descriptor']) | |
output = {} | |
filtering_results = filter(input_json) | |
if filtering_results[0] == 0: | |
return 0 | |
#return filtering_results[1] | |
long_text = filtering_results[1] | |
output['mobile'] = call(promptShort[id], long_text) | |
output['web'] = call(promptLong[id], long_text) | |
prompt = "1 word Financial SEO tag for this news article" | |
output['tag'] = call(prompt, output['mobile']) | |
prompt = "Headline for this News Article" | |
output['headline'] = call(prompt, output['web']) | |
utc_now = datetime.datetime.utcnow() | |
ist_now = utc_now.astimezone(datetime.timezone(datetime.timedelta(hours=5, minutes=30))) | |
output['Time'] = ist_now.strftime("%I:%M %p") | |
month_name = ist_now.strftime("%B") | |
output['Date'] = f"{ist_now.day} {month_name}, {ist_now.year}" | |
prompt = "Answer in one word the sentiment of this News out of Positive, Negative or Neutral {}" | |
output['sentiment'] = call(prompt, output['web']) | |
# response = client.images.generate( | |
# model="dall-e-3", | |
# prompt=headline.text, | |
# size="1024x1024", | |
# quality="standard", | |
# n=1 | |
# ) | |
# output["image_url"] = response.data[0].url | |
return output |