import pandas as pd
import numpy as np
import streamlit as st

from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

model_name = "deepset/roberta-base-squad2"

# a) Get predictions
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

# b) Load model & tokenizer
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)


suspicious_words = [
    "robbery", "crime", "exchange", "extortion", "threat", "suspicious", "fraud", "laundering",
    "illegal", "contraband", "smuggling", "burglary", "assault", "hijacking", "kidnapping", "ransom",
    "hostage", "terrorism", "homicide", "murder", "manslaughter", "weapon", "gun", "explosive", "bomb", "knives",
    "threaten", "blackmail", "intimidate", "menace", "harassment", "stalking", "kidnap", "abduction", "guns", "bombs",
    "abuse", "trafficking", "prostitution", "pimping", "drug", "narcotic", "cocaine", "heroin", "methamphetamine",
    "amphetamine", "opiate", "meth", "gang", "gangster", "mafia", "racket", "extort", "embezzle", "corruption",
    "bribe", "scam", "forgery", "counterfeit", "fraudulent", "cybercrime", "hacker", "phishing", "identity", "theft",
    "credit card", "fraud", "identity", "fraud", "ponzi", "scheme", "pyramid", "scheme", "money", "scam", "swindle", "deception",
    "conspiracy", "scheme", "plot", "coercion", "corrupt", "criminal", "felony", "misdemeanor", "felon", "fugitive",
    "wanted", "arson", "arsonist", "arsony", "stolen", "steal", "loot", "heist", "launder", "hitman", "racketeer",
    "hijack", "smuggle", "terrorist", "kidnapper", "perpetrator", "ringleader", "prowler", "vigilante", "sabotage",
    "saboteur", "suicide", "discreet", "hide", "action", "profile", "alert", "vigilant", "clandestine", "riot", "arms", "deal"
]


q = ["","",""]
a = ["","",""]


q[0] = "What event is going to take place?"
q[1] = "Where is it going to happen"
q[2] = "What time is it going to happen?"


QA_input = [{} for i in range(3)]
res = [{} for i in range(3)]

df = pd.read_excel('senti.xlsx')

parsed_column = df['sentences'].to_list()

print(parsed_column)
for sentence in parsed_column:
    for i in range(3):
      QA_input[i] = {
        'question': q[i],
        'context': sentence
      }
      res[i] = nlp(QA_input[i])
      a[i] = res[i]['answer']
    
    a1 = a[0].lower()
    a1s = set(a1.split())
    sus = set(suspicious_words)
    cw = a1s.intersection(sus)
    
    if len(cw) != 0:
      st.write("The crime detected is: ",a[0])
      if len(a[1]) != 0:
        st.write("The location of crime detected is: ",a[1])
      elif len(a[1]) == 0:
        st.write("No location detected")
      if len(a[2]) != 0:
        st.write("The time of crime detected is: ",a[2])
      elif len(a[2]) == 0:
        st.write("No time detected")
    elif len(cw) == 0:
      st.write("No crime detected")