DataAIDemo

Sleeping

App Files Files Community

DataAIDemo / pages /core_risk.py

themeetjani

Upload 10 files

6060e42 verified 8 months ago

raw

history blame

No virus

4.51 kB

	import numpy as np
	import torch
	import transformers
	import streamlit as st
	from streamlit import session_state
	import json
	import torch.nn.functional as F
	import boto3
	import pandas as pd
	bucket = 'data-ai-dev2'
	from transformers import BertTokenizer, BertModel
	from torch import cuda
	device = 'cuda' if cuda.is_available() else 'cpu'
	import numpy
	from numpy.random import seed
	seed(1)
	import emoji
	import string
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import PorterStemmer # PorterStemmer LancasterStemmer
	from nltk.stem import WordNetLemmatizer
	import re
	stemmer = PorterStemmer()

	# uncomment this when run first time
	nltk.download('wordnet')
	nltk.download('omw-1.4')
	nltk.download('stopwords')

	lemmatizer = WordNetLemmatizer()

	from transformers import pipeline
	stopwords = nltk.corpus.stopwords.words('english')


	model = 'C:/Users/Meet/Downloads/core_risk/models/'
	tokenizer = 'C:/Users/Meet/Downloads/core_risk/tokenizer/'


	from transformers import pipeline

	classifier = pipeline("text-classification", model= model, tokenizer = tokenizer, truncation=True, max_length=512)
	def pre_processing_str_esg(df_col):
	df_col = df_col.lower()
	#defining the function to remove punctuation
	def remove_punctuation(text):
	punctuationfree="".join([i for i in text if i not in string.punctuation])
	return punctuationfree
	#storing the puntuation free text
	df_col= remove_punctuation(df_col)
	df_col = re.sub(r"http\S+", " ", df_col)

	def remove_stopwords(text):
	return " ".join([word for word in str(text).split() if word not in stopwords])
	#applying the function
	df_col = remove_stopwords(df_col)
	df_col = re.sub('[%s]' % re.escape(string.punctuation), ' ' , df_col)
	df_col = df_col.replace("¶", "")
	df_col = df_col.replace("§", "")
	df_col = df_col.replace('“', ' ')
	df_col = df_col.replace('”', ' ')
	df_col = df_col.replace('-', ' ')
	REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\\|@,;]')
	BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
	df_col = REPLACE_BY_SPACE_RE.sub(' ',df_col)
	df_col = BAD_SYMBOLS_RE.sub(' ',df_col)

	# df_col = re.sub('Wdw','',df_col)
	df_col = re.sub('[0-9]+', ' ', df_col)
	df_col = re.sub(' ', ' ', df_col)

	def remove_emoji(string):
	emoji_pattern = re.compile("["
	u"\U0001F600-\U0001F64F" # emoticons
	u"\U0001F300-\U0001F5FF" # symbols & pictographs
	u"\U0001F680-\U0001F6FF" # transport & map symbols
	u"\U0001F1E0-\U0001F1FF" # flags (iOS)
	u"\U00002702-\U000027B0"
	u"\U000024C2-\U0001F251"
	"]+", flags=re.UNICODE)
	return emoji_pattern.sub(r'', string)
	df_col = remove_emoji(df_col)

	return df_col

	def pre_processing_str(df_col):
	# df_col = df_col.lower()
	if len(df_col.split()) >= 70:
	return pre_processing_str_esg(df_col)
	else:
	df_col = df_col.replace('#', '')
	df_col = df_col.replace('!', '')
	df_col = re.sub(r"http\S+", " ", df_col)

	df_col = re.sub('[0-9]+', ' ', df_col)
	df_col = re.sub(' ', ' ', df_col)
	def remove_emojis(text):
	return emoji.replace_emoji(text)
	df_col = remove_emojis(df_col)
	df_col = re.sub(r"(?:\@\|https?\://)\S+", "", df_col)
	df_col = re.sub(r"[^\x20-\x7E]+", "", df_col)
	df_col = df_col.strip()
	return df_col


	# start for the api steps make sure name should me match with file name and application = Flask(__name__). 'application.py and application

	def process(text):
	text = pre_processing_str(text)

	try:
	if len(text) != 0:
	results = classifier(text, top_k = 2)
	else:
	results = 'No Text'

	return {'output_16':results}
	except:
	return {'output_16':'something went wrong'}

	st.set_page_config(page_title="core_risk", page_icon="📈")
	if 'topic_class' not in session_state:
	session_state['topic_class']= ""

	st.title("Topic Classifier")
	text= st.text_area(label= "Please write the text bellow",
	placeholder="What does the tweet say?")
	def classify(text):
	session_state['topic_class'] = process(text)


	st.text_area("result", value=session_state['topic_class'])

	st.button("Classify", on_click=classify, args=[text])