File size: 2,909 Bytes
7d81008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6108c6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
import numpy as np
import streamlit as st

from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

model_name = "deepset/roberta-base-squad2"

# a) Get predictions
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

# b) Load model & tokenizer
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)


suspicious_words = [
    "robbery", "crime", "exchange", "extortion", "threat", "suspicious", "fraud", "laundering",
    "illegal", "contraband", "smuggling", "burglary", "assault", "hijacking", "kidnapping", "ransom",
    "hostage", "terrorism", "homicide", "murder", "manslaughter", "weapon", "gun", "explosive", "bomb", "knives",
    "threaten", "blackmail", "intimidate", "menace", "harassment", "stalking", "kidnap", "abduction", "guns", "bombs",
    "abuse", "trafficking", "prostitution", "pimping", "drug", "narcotic", "cocaine", "heroin", "methamphetamine",
    "amphetamine", "opiate", "meth", "gang", "gangster", "mafia", "racket", "extort", "embezzle", "corruption",
    "bribe", "scam", "forgery", "counterfeit", "fraudulent", "cybercrime", "hacker", "phishing", "identity", "theft",
    "credit card", "fraud", "identity", "fraud", "ponzi", "scheme", "pyramid", "scheme", "money", "scam", "swindle", "deception",
    "conspiracy", "scheme", "plot", "coercion", "corrupt", "criminal", "felony", "misdemeanor", "felon", "fugitive",
    "wanted", "arson", "arsonist", "arsony", "stolen", "steal", "loot", "heist", "launder", "hitman", "racketeer",
    "hijack", "smuggle", "terrorist", "kidnapper", "perpetrator", "ringleader", "prowler", "vigilante", "sabotage",
    "saboteur", "suicide", "discreet", "hide", "action", "profile", "alert", "vigilant", "clandestine", "riot", "arms", "deal"
]


q = ["","",""]
a = ["","",""]


q[0] = "What event is going to take place?"
q[1] = "Where is it going to happen"
q[2] = "What time is it going to happen?"


QA_input = [{} for i in range(3)]
res = [{} for i in range(3)]

df = pd.read_excel('senti.xlsx')

parsed_column = df['sentences'].to_list()

print(parsed_column)
for sentence in parsed_column:
    for i in range(3):
      QA_input[i] = {
        'question': q[i],
        'context': sentence
      }
      res[i] = nlp(QA_input[i])
      a[i] = res[i]['answer']
    
    a1 = a[0].lower()
    a1s = set(a1.split())
    sus = set(suspicious_words)
    cw = a1s.intersection(sus)
    
    if len(cw) != 0:
      st.write("The crime detected is: ",a[0])
      if len(a[1]) != 0:
        st.write("The location of crime detected is: ",a[1])
      elif len(a[1]) == 0:
        st.write("No location detected")
      if len(a[2]) != 0:
        st.write("The time of crime detected is: ",a[2])
      elif len(a[2]) == 0:
        st.write("No time detected")
    elif len(cw) == 0:
      st.write("No crime detected")