tensorgirl commited on
Commit
95bc4d4
1 Parent(s): dc0a72b

Upload 10 files

Browse files
Files changed (10) hide show
  1. .gitattributes +35 -35
  2. Descriptor.xlsx +0 -0
  3. DescriptorPrompt.xlsx +0 -0
  4. Dockerfile +23 -0
  5. README.md +11 -10
  6. calling_script.py +46 -0
  7. main.py +27 -0
  8. requirements.txt +13 -0
  9. symbol.xlsx +0 -0
  10. utils.py +156 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Descriptor.xlsx ADDED
Binary file (13.3 kB). View file
 
DescriptorPrompt.xlsx ADDED
Binary file (17.6 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ #Added from here
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
+
20
+ COPY --chown=user . $HOME/app
21
+ #COPY . .
22
+
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,11 @@
1
- ---
2
- title: FinTech Mistral
3
- emoji:
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: static
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
+ ---
2
+ title: FinTech
3
+ emoji: 🌖
4
+ colorFrom: green
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: apache-2.0
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
calling_script.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ import requests
3
+ import json
4
+
5
+ class Item(BaseModel):
6
+ FileURL: str = "https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=d141ef4f-7856-4236-8f6f-efe09592df40.pdf"
7
+ memo: str = "Please find attached RTA Certificate u/r 74(5) of SEBI (DP) Regulations 2018 for QE March 2024"
8
+ TypeofAnnouncement: str = "General_Announcements"
9
+ Descriptor: str = "Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
10
+ caption: str = "Compliances-Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
11
+ newsdate: str = "2024-04-08T13:05:27"
12
+ symbol: str = "null"
13
+
14
+ url = "http://jwttoken.cmots.com/cotovia/api/BSEAnnouncement"
15
+
16
+ header = {"Content-Type":"application/json",
17
+ "Authorization":"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1bmlxdWVfbmFtZSI6ImNvdG92aWEiLCJyb2xlIjoiQWRtaW4iLCJuYmYiOjE3MTIxNDgzMzMsImV4cCI6MTcxMzAxMjMzMywiaWF0IjoxNzEyMTQ4MzMzLCJpc3MiOiJodHRwOi8vbG9jYWxob3N0OjUwMTkxIiwiYXVkIjoiaHR0cDovL2xvY2FsaG9zdDo1MDE5MSJ9.kvy4kv29zl0OkmpNXe5hZS2cHdCXF7OrShOFnxzyQfU"}
18
+
19
+ output = requests.get(url,headers=header)
20
+ data = json.loads(output.text)
21
+
22
+ sample = data['data'][0]
23
+
24
+ input_data = Item(
25
+ FileURL = sample['FileURL'] or "",
26
+ memo = sample['memo'] or "",
27
+ TypeofAnnouncement = sample['TypeofAnnouncement'] or "",
28
+ Descriptor = sample['Descriptor'] or "",
29
+ caption = sample['caption'] or "",
30
+ newsdate = sample['newsdate'] or "",
31
+ symbol = sample['symbol'] or ""
32
+ )
33
+
34
+ url = "https://tensorgirl-fintech.hf.space/Summarize/"
35
+
36
+ response = requests.post(url, json = input_data.dict())
37
+ print(response.text)
38
+ '''
39
+ The response would be 0 if the json doesn't pass the filter.
40
+ Else it will return data in the form of dictionary who's keys would be as follows:
41
+ 1. mobile - For 280 words summary
42
+ 2. web - For 680 words summary
43
+ 3. tag - Single Tag
44
+ 4. headline - It will give the headline
45
+ 5. date-time - It will give the time and date when the summary was created
46
+ '''
main.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel, validator
3
+ from utils import summary
4
+ import os
5
+ from huggingface_hub import login
6
+
7
+ os.environ['HF_HOME'] = '/hug/cache/'
8
+ os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
9
+
10
+ class Item(BaseModel):
11
+ FileURL: str = "https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=d141ef4f-7856-4236-8f6f-efe09592df40.pdf"
12
+ memo: str = "Please find attached RTA Certificate u/r 74(5) of SEBI (DP) Regulations 2018 for QE March 2024"
13
+ TypeofAnnouncement: str = "General_Announcements"
14
+ Descriptor: str = "Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
15
+ caption: str = "Compliances-Certificate under Reg. 74 (5) of SEBI (DP) Regulations 2018"
16
+ newsdate: str = "2024-04-08T13:05:27"
17
+ symbol: str = "EDELWEISS"
18
+
19
+ app = FastAPI()
20
+
21
+ @app.get("/")
22
+ async def root():
23
+ return {"Summarize":"Version 1.5 'Images Added'"}
24
+
25
+ @app.post("/Summarize/")
26
+ def read_user(input_json: Item):
27
+ return summary(input_json.dict())
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ groq
2
+ requests
3
+ pypdf
4
+ pandas
5
+ datetime
6
+ fastapi
7
+ pydantic
8
+ uvicorn
9
+ openpyxl
10
+ huggingface_hub
11
+ torch
12
+ transformers
13
+ openai
symbol.xlsx ADDED
Binary file (117 kB). View file
 
utils.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from urllib.request import Request, urlopen
3
+ from pypdf import PdfReader
4
+ from io import StringIO
5
+ import io
6
+ import pandas as pd
7
+ import os
8
+ import torch
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
+ from transformers import pipeline
11
+ from openai import OpenAI
12
+ from groq import Groq
13
+ import time
14
+ from openai import OpenAI
15
+
16
+ #openai_key = "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"
17
+ #client = OpenAI(api_key = openai_key)
18
+ desc = pd.read_excel('Descriptor.xlsx',header = None)
19
+ desc_list = desc.iloc[:,0].to_list()
20
+
21
+ def callAzure(prompt,text):
22
+
23
+ url = "https://Mistral-large-tmhcg-serverless.eastus2.inference.ai.azure.com"
24
+ api_key = "LB0ha1R4k3pNpHl68P3VtUZ3sMLr3wT7"
25
+ client = OpenAI(base_url=url, api_key=api_key)
26
+ msg = "{} {}".format(prompt, text)
27
+ msg = msg[:7000]
28
+
29
+ response = client.chat.completions.create(
30
+ messages=[
31
+ {
32
+ "role": "user",
33
+ "content": msg,
34
+ }
35
+ ],
36
+ model="azureai",
37
+ )
38
+
39
+ return response.choices[0].message.content
40
+
41
+ def call(prompt, text):
42
+ client = Groq(api_key=os.getenv("key"),)
43
+
44
+ prompt = prompt + " Answer only the summary, no instructions"
45
+ chat_completion = client.chat.completions.create(
46
+ messages=[
47
+ {
48
+ "role": "user",
49
+ "content": "{} {}".format(prompt, text),
50
+ }
51
+ ],
52
+ model=model,
53
+ )
54
+
55
+ return chat_completion.choices[0].message.content
56
+
57
+ def filter(input_json):
58
+
59
+ sym = pd.read_excel('symbol.xlsx',header = None)
60
+ sym_list = sym.iloc[:,0].to_list()
61
+
62
+ if input_json['FileURL']==None or input_json['FileURL'].lower()=='null':
63
+ return [0,"File_URL"]
64
+ if input_json['symbol']== 'null' or input_json['symbol'] not in sym_list:
65
+ return [0,"symbol"]
66
+ if input_json['TypeofAnnouncement'] not in ['General_Announcements','Outcome','General']:
67
+ return [0,"Annoucement"]
68
+ if input_json['Descriptor'] not in desc_list:
69
+ return [0,"Desc"]
70
+
71
+ url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1]
72
+ req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
73
+ html = urlopen(req)
74
+ cont = html.read()
75
+ reader = PdfReader(io.BytesIO(cont))
76
+ content = ''
77
+ for i in range(len(reader.pages)):
78
+ content+= reader.pages[i].extract_text()
79
+ document = content
80
+
81
+ return [1, document]
82
+
83
+ def summary(input_json):
84
+
85
+ prompt = pd.read_excel('DescriptorPrompt.xlsx')
86
+ promptShort = prompt.iloc[:,1].to_list()
87
+ promptLong = prompt.iloc[:,2].to_list()
88
+
89
+ output = {}
90
+ filtering_results = filter(input_json)
91
+ if filtering_results[0] == 0:
92
+ #return 0
93
+ return filtering_results[1]
94
+
95
+ id = desc_list.index(input_json['Descriptor'])
96
+ long_text = filtering_results[1]
97
+
98
+ url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1]
99
+
100
+ output["Link to BSE website"] = url
101
+
102
+ output["Date of time of receiving data from BSE"] = input_json["newsdate"] + "Z"
103
+
104
+ output["Stock Ticker"] = input_json['symbol']
105
+
106
+ answer = callAzure(promptShort[id], long_text)
107
+ try:
108
+ idx = answer.index("\n")
109
+ except:
110
+ idx = -2
111
+ output['Short Summary'] = answer[idx+2:]
112
+
113
+ answer = callAzure(promptLong[id], long_text)
114
+ try:
115
+ idx = answer.index("\n")
116
+ except:
117
+ idx = -2
118
+ output['Long summary'] = answer[idx+2:]
119
+
120
+ prompt = "1 word Financial SEO tag for this news article"
121
+ answer = callAzure(prompt, output['Short Summary'])
122
+ try:
123
+ idx = answer.index("\n")
124
+ except:
125
+ idx = -2
126
+ output['Tag'] = answer[idx+2:]
127
+
128
+ prompt = "Give a single headline for this News Article"
129
+ answer = callAzure(prompt, output['Short Summary'])
130
+ try:
131
+ idx = answer.index("\n")
132
+ except:
133
+ idx = -2
134
+ output['Headline'] = answer[idx+2:]
135
+
136
+ utc_now = datetime.datetime.utcnow()
137
+ ist_now = utc_now.astimezone(datetime.timezone(datetime.timedelta(hours=5, minutes=30)))
138
+
139
+ Date = ist_now.strftime("%Y-%m-%d")
140
+ time = ist_now.strftime("%X")
141
+ output['Date and time of data delivery from Skylark'] = Date+"T"+time+"Z"
142
+
143
+ prompt = "Answer in one word the sentiment of this News out of Positive, Negative or Neutral {}"
144
+ output['Sentiment'] = callAzure(prompt, output['Short Summary'])
145
+
146
+ #time.sleep(60)
147
+ # response = client.images.generate(
148
+ # model="dall-e-3",
149
+ # prompt=headline.text,
150
+ # size="1024x1024",
151
+ # quality="standard",
152
+ # n=1
153
+ # )
154
+ # output["Link to Infographic (data visualization only)] = response.data[0].url
155
+
156
+ return output