Spaces:
Running
Running
import google.generativeai as genai | |
import datetime | |
from urllib.request import Request, urlopen | |
from pypdf import PdfReader | |
from io import StringIO | |
import io | |
import pandas as pd | |
import os | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import pipeline | |
from openai import OpenAI | |
openai_key = "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" | |
client = OpenAI(api_key = openai_key) | |
#tokenizer = AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis") | |
#model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis") | |
pipe = pipeline("text-classification", model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis") | |
def filter(input_json): | |
sym = pd.read_excel('symbol.xlsx',header = None) | |
sym_list = sym.iloc[:,0].to_list() | |
desc = pd.read_excel('Descriptor.xlsx',header = None) | |
desc_list = desc.iloc[:,0].to_list() | |
if input_json['FileURL']==None or input_json['FileURL'].lower()=='null': | |
return [0,"File_URL"] | |
if input_json['symbol']== 'null' or input_json['symbol'] not in sym_list: | |
return [0,"symbol"] | |
if input_json['TypeofAnnouncement'] not in ['General_Announcements','Outcome','General']: | |
return [0,"Annoucement"] | |
if input_json['Descriptor'] not in desc_list: | |
return [0,"Desc"] | |
url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1] | |
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) | |
html = urlopen(req) | |
cont = html.read() | |
reader = PdfReader(io.BytesIO(cont)) | |
content = '' | |
for i in range(len(reader.pages)): | |
content+= reader.pages[i].extract_text() | |
document = content | |
return [1, document] | |
def summary(input_json): | |
key = os.getenv("key") | |
genai.configure(api_key=key) | |
model = genai.GenerativeModel('gemini-pro') | |
output = {} | |
filtering_results = filter(input_json) | |
if filtering_results[0] == 0: | |
return 0 | |
#return filtering_results[1] | |
long_text = filtering_results[1] | |
mobile = model.generate_content("Summarize this Financial letter in 60 words to be used as a news article. {}".format(long_text)) | |
output['mobile'] = mobile.text | |
web = model.generate_content("Summarize this Financial letter in 128 words to be used as a news article. {}".format(long_text)) | |
output['web'] = web.text | |
tag = model.generate_content("1 word Financial SEO tag for this news article {}".format(mobile.text)) | |
output['tag'] = tag.text | |
headline = model.generate_content("Headline for this News Article {}".format(web.text)) | |
output['headline'] = headline.text | |
utc_now = datetime.datetime.utcnow() | |
ist_now = utc_now.astimezone(datetime.timezone(datetime.timedelta(hours=5, minutes=30))) | |
output['Time'] = ist_now.strftime("%I:%M %p") | |
month_name = ist_now.strftime("%B") | |
output['Date'] = f"{ist_now.day} {month_name}, {ist_now.year}" | |
#senti = pipe(mobile.text) | |
#output['sentiment'] = senti[0]['label'] | |
senti = model.generate_content("Answer in one word the sentiment of this News out of Positive, Negative or Neutral {}".format(web.text)) | |
output['sentiment'] = senti.text | |
# response = client.images.generate( | |
# model="dall-e-3", | |
# prompt=headline.text, | |
# size="1024x1024", | |
# quality="standard", | |
# n=1 | |
# ) | |
# output["image_url"] = response.data[0].url | |
return output |