Spaces:

tensorgirl
/

FinTech-Azure

Running

App Files Files Community

OmkarGhugarkar commited on May 10

Commit

35efc3d

•

1 Parent(s): 9a85adb

First Commit

Browse files

Files changed (11) hide show

Descriptor.xlsx +0 -0
DescriptorPrompt.xlsx +0 -0
Dockerfile +23 -0
README.md +5 -4
calling_script.py +46 -0
index.html +0 -19
main.py +27 -0
requirements.txt +13 -0
style.css +0 -28
symbol.xlsx +0 -0
utils.py +108 -0

Descriptor.xlsx ADDED Viewed

Binary file (13.3 kB). View file

DescriptorPrompt.xlsx ADDED Viewed

Binary file (13.3 kB). View file

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+#Added from here
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+#COPY . .
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 ---
-title: FinTech Groq
-emoji: 😻
-colorFrom: gray
 colorTo: indigo
-sdk: static
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: FinTech
+emoji: 🌖
+colorFrom: green
 colorTo: indigo
+sdk: docker
 pinned: false
+license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

calling_script.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from pydantic import BaseModel
+import requests
+import json
+class Item(BaseModel):
+    FileURL: str = "https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=d141ef4f-7856-4236-8f6f-efe09592df40.pdf"
+    memo: str = "Please find attached RTA Certificate u/r 74(5) of SEBI (DP) Regulations 2018 for QE March 2024"
+    TypeofAnnouncement: str = "General_Announcements"
+    Descriptor: str = "Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
+    caption: str = "Compliances-Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
+    newsdate: str = "2024-04-08T13:05:27"
+    symbol: str = "null"
+url = "http://jwttoken.cmots.com/cotovia/api/BSEAnnouncement"
+header = {"Content-Type":"application/json",
+"Authorization":"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1bmlxdWVfbmFtZSI6ImNvdG92aWEiLCJyb2xlIjoiQWRtaW4iLCJuYmYiOjE3MTIxNDgzMzMsImV4cCI6MTcxMzAxMjMzMywiaWF0IjoxNzEyMTQ4MzMzLCJpc3MiOiJodHRwOi8vbG9jYWxob3N0OjUwMTkxIiwiYXVkIjoiaHR0cDovL2xvY2FsaG9zdDo1MDE5MSJ9.kvy4kv29zl0OkmpNXe5hZS2cHdCXF7OrShOFnxzyQfU"}
+output = requests.get(url,headers=header)
+data = json.loads(output.text)
+sample = data['data'][0]
+input_data = Item(
+    FileURL = sample['FileURL'] or "",
+    memo = sample['memo'] or "",
+    TypeofAnnouncement = sample['TypeofAnnouncement'] or "",
+    Descriptor = sample['Descriptor'] or "",
+    caption = sample['caption'] or "",
+    newsdate = sample['newsdate'] or "",
+    symbol = sample['symbol'] or ""
+)
+url = "https://tensorgirl-fintech.hf.space/Summarize/"
+response = requests.post(url, json = input_data.dict())
+print(response.text)
+'''
+The response would be 0 if the json doesn't pass the filter.
+Else it will return data in the form of dictionary who's keys would be as follows:
+1. mobile - For 280 words summary
+2. web - For 680 words summary
+3. tag - Single Tag
+4. headline - It will give the headline
+5. date-time - It will give the time and date when the summary was created
+'''

index.html DELETED Viewed

@@ -1,19 +0,0 @@
-<!doctype html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
-</html>

main.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from fastapi import FastAPI
+from pydantic import BaseModel, validator
+from utils import summary
+import os
+from huggingface_hub import login
+os.environ['HF_HOME'] = '/hug/cache/'
+os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
+class Item(BaseModel):
+    FileURL: str = "https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=d141ef4f-7856-4236-8f6f-efe09592df40.pdf"
+    memo: str  = "Please find attached RTA Certificate u/r 74(5) of SEBI (DP) Regulations 2018 for QE March 2024"
+    TypeofAnnouncement: str   = "General_Announcements"
+    Descriptor: str   = "Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
+    caption: str  = "Compliances-Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
+    newsdate: str  = "2024-04-08T13:05:27"
+    symbol: str = "EDELWEISS"
+app = FastAPI()
+@app.get("/")
+async def root():
+ return {"Summarize":"Version 1.5 'Images Added'"}
+@app.post("/Summarize/")
+def read_user(input_json: Item):
+    return summary(input_json.dict())

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+groq
+requests
+pypdf
+pandas
+datetime
+fastapi
+pydantic
+uvicorn
+openpyxl
+huggingface_hub
+torch
+transformers
+openai

style.css DELETED Viewed

@@ -1,28 +0,0 @@
-body {
-	padding: 2rem;
-	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
-}
-h1 {
-	font-size: 16px;
-	margin-top: 0;
-}
-p {
-	color: rgb(107, 114, 128);
-	font-size: 15px;
-	margin-bottom: 10px;
-	margin-top: 5px;
-}
-.card {
-	max-width: 620px;
-	margin: 0 auto;
-	padding: 16px;
-	border: 1px solid lightgray;
-	border-radius: 16px;
-}
-.card p:last-child {
-	margin-bottom: 0;
-}

symbol.xlsx ADDED Viewed

Binary file (117 kB). View file

utils.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import datetime
+from urllib.request import Request, urlopen
+from pypdf import PdfReader
+from io import StringIO
+import io
+import pandas as pd
+import os
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import pipeline
+from openai import OpenAI
+from groq import Groq
+#openai_key = "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"
+#client = OpenAI(api_key = openai_key)
+desc = pd.read_excel('Descriptor.xlsx',header = None)
+desc_list  = desc.iloc[:,0].to_list()
+model = "llama3-70b-8192"
+pipe = pipeline("text-classification", model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
+def call(prompt, text):
+  client = Groq(api_key=os.getenv("key"),)
+  chat_completion = client.chat.completions.create(
+  messages=[
+      {
+          "role": "user",
+          "content": "{} {}".format(prompt, text),
+      }
+  ],
+  model=model,
+  )
+  return chat_completion.choices[0].message.content
+def filter(input_json):
+  sym = pd.read_excel('symbol.xlsx',header = None)
+  sym_list  = sym.iloc[:,0].to_list()
+  if input_json['FileURL']==None or input_json['FileURL'].lower()=='null':
+    return [0,"File_URL"]
+  if input_json['symbol']== 'null' or input_json['symbol'] not in sym_list:
+    return [0,"symbol"]
+  if input_json['TypeofAnnouncement'] not in ['General_Announcements','Outcome','General']:
+    return [0,"Annoucement"]
+  if input_json['Descriptor'] not in desc_list:
+    return [0,"Desc"]
+  url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1]
+  req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+  html = urlopen(req)
+  cont = html.read()
+  reader = PdfReader(io.BytesIO(cont))
+  content = ''
+  for i in range(len(reader.pages)):
+    content+= reader.pages[i].extract_text()
+  document = content
+  return [1, document]
+def summary(input_json):
+    prompt = pd.read_excel('DescriptorPrompt.xlsx')
+    promptShort  = prompt.iloc[:,1].to_list()
+    promptLong  = prompt.iloc[:,2].to_list()
+    id = desc_list.index(input_json['Descriptor'])
+    output = {}
+    filtering_results = filter(input_json)
+    if filtering_results[0] == 0:
+        return 0
+    #return filtering_results[1]
+    long_text = filtering_results[1]
+    output['mobile'] = call(promptShort[id], long_text)
+    output['web'] = call(promptLong[id], long_text)
+    prompt = "1 word Financial SEO tag for this news article"
+    output['tag'] = call(prompt, output['mobile'])
+    prompt = "Headline for this News Article"
+    output['headline'] = call(prompt, output['web'])
+    utc_now = datetime.datetime.utcnow()
+    ist_now = utc_now.astimezone(datetime.timezone(datetime.timedelta(hours=5, minutes=30)))
+    output['Time'] = ist_now.strftime("%I:%M %p")
+    month_name = ist_now.strftime("%B")
+    output['Date'] = f"{ist_now.day} {month_name}, {ist_now.year}"
+    prompt = "Answer in one word the sentiment of this News out of Positive, Negative or Neutral {}"
+    output['sentiment'] = call(prompt, output['web'])
+    # response = client.images.generate(
+    # model="dall-e-3",
+    # prompt=headline.text,
+    # size="1024x1024",
+    # quality="standard",
+    # n=1
+    # )
+    # output["image_url"] = response.data[0].url
+    return output