RohitGuptaAI commited on
Commit
860860a
·
1 Parent(s): 14b8594

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -61
app.py CHANGED
@@ -9,58 +9,9 @@ import torch.nn.functional as F
9
  import gradio as gr
10
  import torch
11
 
12
- def check_by_url(txt_url):
13
- #txt_url = "https://www.c-sharpcorner.com/article/how-to-add-multimedia-content-with-html/default.txt"
14
- parsed_url = urlparse(txt_url)
15
- url = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rsplit('/', 1)[0]}/"
16
- print(url)
17
-
18
- new_data =[]
19
- page = urlopen(url=url).read().decode("utf-8")
20
- soup = BeautifulSoup(page, 'html.parser')
21
- title = soup.find('title').get_text()
22
-
23
- css_class_to_remove = "dp-highlighter" # Replace with the CSS class you want to remove
24
- #Find <div> tags with the specified CSS class and remove their content
25
- div_tags = soup.find_all(['code', 'pre'])
26
- for div_tag in div_tags:
27
- div_tag.clear()
28
-
29
- div_tags = soup.find_all('div', class_=css_class_to_remove)
30
- for div_tag in div_tags:
31
- div_tag.clear()
32
-
33
- # Fetch content of remaining tags
34
- content_with_style = ""
35
- p_tags_with_style = soup.find_all('p', style=True)
36
- for p_tag in p_tags_with_style:
37
- p_content = re.sub(r'\n', '', p_tag.get_text())
38
- content_with_style += p_content
39
-
40
- # Fetch content of <p> tags without style
41
- content_without_style = ""
42
- p_tags_without_style = soup.find_all('p', style=False)
43
- for p_tag in p_tags_without_style:
44
- p_content = re.sub(r'\n', '', p_tag.get_text())
45
- content_without_style += p_content
46
-
47
- # Replace Unicode characters in the content and remove duplicates
48
- normalized_content_with_style = re.sub(r'\s+', ' ', content_with_style) # Remove extra spaces
49
- normalized_content_with_style = normalized_content_with_style.replace('\r', '') # Replace '\r' characters
50
- normalized_content_with_style = unicodedata.normalize('NFKD', normalized_content_with_style)
51
- normalized_content_with_style = unidecode.unidecode(normalized_content_with_style)
52
-
53
- normalized_content_without_style = re.sub(r'\s+', ' ', content_without_style) # Remove extra spaces
54
- normalized_content_without_style = normalized_content_without_style.replace('\r', '') # Replace '\r' characters
55
- normalized_content_without_style = unicodedata.normalize('NFKD', normalized_content_without_style)
56
- normalized_content_without_style = unidecode.unidecode(normalized_content_without_style)
57
-
58
- normalized_content_with_style += normalized_content_without_style
59
- new_data = {"title": title, "content": normalized_content_with_style}
60
-
61
  model = DistilBertForSequenceClassification.from_pretrained(".")
62
  tokenizer = DistilBertTokenizer.from_pretrained(".")
63
-
64
  test_encodings = tokenizer.encode_plus(
65
  title,
66
  truncation=True,
@@ -83,8 +34,9 @@ def check_by_url(txt_url):
83
  predicted_labels = torch.argmax(outputs.logits, dim=1)
84
  label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
85
  predicted_label_title = label_mapping[predicted_labels.item()]
86
-
87
 
 
88
  test_encodings = tokenizer.encode_plus(
89
  normalized_content_with_style,
90
  truncation=True,
@@ -107,25 +59,26 @@ def check_by_url(txt_url):
107
  label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
108
  predicted_label_content = label_mapping[predicted_labels.item()]
109
 
110
- return predicted_label_title, confidence_score_title, predicted_label_content, confidence_scores_content, new_data
111
 
112
- def predict_2( url):
113
- predicted_label_title, confidence_score_title,predicted_label_content, confidence_scores_content, new_data = check_by_url(url)
114
- return predicted_label_title, confidence_score_title, predicted_label_content, confidence_scores_content, new_data
 
115
 
116
  demo = gr.Interface(
117
  fn=predict_2,
118
- inputs= [
119
- gr.inputs.Textbox(label="Enter URL"),
120
-
121
  ],
122
- outputs= [
123
-
124
  gr.outputs.Textbox(label="Title_prediction"),
125
  gr.outputs.Textbox(label="Title_confidence_score"),
126
  gr.outputs.Textbox(label="Content_prediction"),
127
  gr.outputs.Textbox(label="content_confidence_score"),
128
- gr.outputs.Textbox(label="new_data").style(show_copy_button=True)
129
  ],
 
130
  )
131
  demo.launch()
 
9
  import gradio as gr
10
  import torch
11
 
12
+ def check_by_title(title):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  model = DistilBertForSequenceClassification.from_pretrained(".")
14
  tokenizer = DistilBertTokenizer.from_pretrained(".")
 
15
  test_encodings = tokenizer.encode_plus(
16
  title,
17
  truncation=True,
 
34
  predicted_labels = torch.argmax(outputs.logits, dim=1)
35
  label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
36
  predicted_label_title = label_mapping[predicted_labels.item()]
37
+ return predicted_label_title, confidence_score_title
38
 
39
+ def check_by_content(normalized_content_with_style):
40
  test_encodings = tokenizer.encode_plus(
41
  normalized_content_with_style,
42
  truncation=True,
 
59
  label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
60
  predicted_label_content = label_mapping[predicted_labels.item()]
61
 
62
+ return predicted_label_content, confidence_scores_content
63
 
64
+ def predict_2(title, normalized_content_with_style):
65
+ predicted_label_title, confidence_score_title = check_by_title(title)
66
+ predicted_label_content, confidence_scores_content = check_by_content(normalized_content_with_style)
67
+ return predicted_label_title, confidence_score_title, predicted_label_content, confidence_scores_content
68
 
69
  demo = gr.Interface(
70
  fn=predict_2,
71
+ inputs=[
72
+ gr.inputs.Textbox(label="Title", placeholder="Enter title"),
73
+ gr.inputs.Textbox(label="Content", placeholder="enter Content"),
74
  ],
75
+ outputs= [
 
76
  gr.outputs.Textbox(label="Title_prediction"),
77
  gr.outputs.Textbox(label="Title_confidence_score"),
78
  gr.outputs.Textbox(label="Content_prediction"),
79
  gr.outputs.Textbox(label="content_confidence_score"),
80
+ #gr.outputs.Textbox(label="Description").style(show_copy_button=True)
81
  ],
82
+
83
  )
84
  demo.launch()