OmkarGhugarkar commited on
Commit
35efc3d
1 Parent(s): 9a85adb

First Commit

Browse files
Files changed (11) hide show
  1. Descriptor.xlsx +0 -0
  2. DescriptorPrompt.xlsx +0 -0
  3. Dockerfile +23 -0
  4. README.md +5 -4
  5. calling_script.py +46 -0
  6. index.html +0 -19
  7. main.py +27 -0
  8. requirements.txt +13 -0
  9. style.css +0 -28
  10. symbol.xlsx +0 -0
  11. utils.py +108 -0
Descriptor.xlsx ADDED
Binary file (13.3 kB). View file
 
DescriptorPrompt.xlsx ADDED
Binary file (13.3 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ #Added from here
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
+
20
+ COPY --chown=user . $HOME/app
21
+ #COPY . .
22
+
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,11 @@
1
  ---
2
- title: FinTech Groq
3
- emoji: 😻
4
- colorFrom: gray
5
  colorTo: indigo
6
- sdk: static
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: FinTech
3
+ emoji: 🌖
4
+ colorFrom: green
5
  colorTo: indigo
6
+ sdk: docker
7
  pinned: false
8
+ license: apache-2.0
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
calling_script.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ import requests
3
+ import json
4
+
5
+ class Item(BaseModel):
6
+ FileURL: str = "https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=d141ef4f-7856-4236-8f6f-efe09592df40.pdf"
7
+ memo: str = "Please find attached RTA Certificate u/r 74(5) of SEBI (DP) Regulations 2018 for QE March 2024"
8
+ TypeofAnnouncement: str = "General_Announcements"
9
+ Descriptor: str = "Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
10
+ caption: str = "Compliances-Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
11
+ newsdate: str = "2024-04-08T13:05:27"
12
+ symbol: str = "null"
13
+
14
+ url = "http://jwttoken.cmots.com/cotovia/api/BSEAnnouncement"
15
+
16
+ header = {"Content-Type":"application/json",
17
+ "Authorization":"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1bmlxdWVfbmFtZSI6ImNvdG92aWEiLCJyb2xlIjoiQWRtaW4iLCJuYmYiOjE3MTIxNDgzMzMsImV4cCI6MTcxMzAxMjMzMywiaWF0IjoxNzEyMTQ4MzMzLCJpc3MiOiJodHRwOi8vbG9jYWxob3N0OjUwMTkxIiwiYXVkIjoiaHR0cDovL2xvY2FsaG9zdDo1MDE5MSJ9.kvy4kv29zl0OkmpNXe5hZS2cHdCXF7OrShOFnxzyQfU"}
18
+
19
+ output = requests.get(url,headers=header)
20
+ data = json.loads(output.text)
21
+
22
+ sample = data['data'][0]
23
+
24
+ input_data = Item(
25
+ FileURL = sample['FileURL'] or "",
26
+ memo = sample['memo'] or "",
27
+ TypeofAnnouncement = sample['TypeofAnnouncement'] or "",
28
+ Descriptor = sample['Descriptor'] or "",
29
+ caption = sample['caption'] or "",
30
+ newsdate = sample['newsdate'] or "",
31
+ symbol = sample['symbol'] or ""
32
+ )
33
+
34
+ url = "https://tensorgirl-fintech.hf.space/Summarize/"
35
+
36
+ response = requests.post(url, json = input_data.dict())
37
+ print(response.text)
38
+ '''
39
+ The response would be 0 if the json doesn't pass the filter.
40
+ Else it will return data in the form of dictionary who's keys would be as follows:
41
+ 1. mobile - For 280 words summary
42
+ 2. web - For 680 words summary
43
+ 3. tag - Single Tag
44
+ 4. headline - It will give the headline
45
+ 5. date-time - It will give the time and date when the summary was created
46
+ '''
index.html DELETED
@@ -1,19 +0,0 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel, validator
3
+ from utils import summary
4
+ import os
5
+ from huggingface_hub import login
6
+
7
+ os.environ['HF_HOME'] = '/hug/cache/'
8
+ os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
9
+
10
+ class Item(BaseModel):
11
+ FileURL: str = "https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=d141ef4f-7856-4236-8f6f-efe09592df40.pdf"
12
+ memo: str = "Please find attached RTA Certificate u/r 74(5) of SEBI (DP) Regulations 2018 for QE March 2024"
13
+ TypeofAnnouncement: str = "General_Announcements"
14
+ Descriptor: str = "Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
15
+ caption: str = "Compliances-Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
16
+ newsdate: str = "2024-04-08T13:05:27"
17
+ symbol: str = "EDELWEISS"
18
+
19
+ app = FastAPI()
20
+
21
+ @app.get("/")
22
+ async def root():
23
+ return {"Summarize":"Version 1.5 'Images Added'"}
24
+
25
+ @app.post("/Summarize/")
26
+ def read_user(input_json: Item):
27
+ return summary(input_json.dict())
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ groq
2
+ requests
3
+ pypdf
4
+ pandas
5
+ datetime
6
+ fastapi
7
+ pydantic
8
+ uvicorn
9
+ openpyxl
10
+ huggingface_hub
11
+ torch
12
+ transformers
13
+ openai
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
symbol.xlsx ADDED
Binary file (117 kB). View file
 
utils.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from urllib.request import Request, urlopen
3
+ from pypdf import PdfReader
4
+ from io import StringIO
5
+ import io
6
+ import pandas as pd
7
+ import os
8
+ import torch
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
+ from transformers import pipeline
11
+ from openai import OpenAI
12
+ from groq import Groq
13
+
14
+ #openai_key = "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"
15
+ #client = OpenAI(api_key = openai_key)
16
+ desc = pd.read_excel('Descriptor.xlsx',header = None)
17
+ desc_list = desc.iloc[:,0].to_list()
18
+
19
+ model = "llama3-70b-8192"
20
+ pipe = pipeline("text-classification", model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
21
+
22
+ def call(prompt, text):
23
+ client = Groq(api_key=os.getenv("key"),)
24
+
25
+ chat_completion = client.chat.completions.create(
26
+ messages=[
27
+ {
28
+ "role": "user",
29
+ "content": "{} {}".format(prompt, text),
30
+ }
31
+ ],
32
+ model=model,
33
+ )
34
+
35
+ return chat_completion.choices[0].message.content
36
+
37
+ def filter(input_json):
38
+
39
+ sym = pd.read_excel('symbol.xlsx',header = None)
40
+ sym_list = sym.iloc[:,0].to_list()
41
+
42
+ if input_json['FileURL']==None or input_json['FileURL'].lower()=='null':
43
+ return [0,"File_URL"]
44
+ if input_json['symbol']== 'null' or input_json['symbol'] not in sym_list:
45
+ return [0,"symbol"]
46
+ if input_json['TypeofAnnouncement'] not in ['General_Announcements','Outcome','General']:
47
+ return [0,"Annoucement"]
48
+ if input_json['Descriptor'] not in desc_list:
49
+ return [0,"Desc"]
50
+
51
+ url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1]
52
+ req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
53
+ html = urlopen(req)
54
+ cont = html.read()
55
+ reader = PdfReader(io.BytesIO(cont))
56
+ content = ''
57
+ for i in range(len(reader.pages)):
58
+ content+= reader.pages[i].extract_text()
59
+ document = content
60
+
61
+ return [1, document]
62
+
63
+ def summary(input_json):
64
+
65
+ prompt = pd.read_excel('DescriptorPrompt.xlsx')
66
+ promptShort = prompt.iloc[:,1].to_list()
67
+ promptLong = prompt.iloc[:,2].to_list()
68
+
69
+ id = desc_list.index(input_json['Descriptor'])
70
+ output = {}
71
+ filtering_results = filter(input_json)
72
+ if filtering_results[0] == 0:
73
+ return 0
74
+ #return filtering_results[1]
75
+
76
+
77
+ long_text = filtering_results[1]
78
+
79
+ output['mobile'] = call(promptShort[id], long_text)
80
+
81
+ output['web'] = call(promptLong[id], long_text)
82
+
83
+ prompt = "1 word Financial SEO tag for this news article"
84
+ output['tag'] = call(prompt, output['mobile'])
85
+
86
+ prompt = "Headline for this News Article"
87
+ output['headline'] = call(prompt, output['web'])
88
+
89
+ utc_now = datetime.datetime.utcnow()
90
+ ist_now = utc_now.astimezone(datetime.timezone(datetime.timedelta(hours=5, minutes=30)))
91
+ output['Time'] = ist_now.strftime("%I:%M %p")
92
+
93
+ month_name = ist_now.strftime("%B")
94
+ output['Date'] = f"{ist_now.day} {month_name}, {ist_now.year}"
95
+
96
+ prompt = "Answer in one word the sentiment of this News out of Positive, Negative or Neutral {}"
97
+ output['sentiment'] = call(prompt, output['web'])
98
+
99
+ # response = client.images.generate(
100
+ # model="dall-e-3",
101
+ # prompt=headline.text,
102
+ # size="1024x1024",
103
+ # quality="standard",
104
+ # n=1
105
+ # )
106
+ # output["image_url"] = response.data[0].url
107
+
108
+ return output