import pandas as pd
import spacy
import gradio as gr
import csv
from nrclex import NRCLex
from transformers import pipeline
from rake_nltk import Rake

# Initialize objects
emotion_pipeline = pipeline('sentiment-analysis', model='nlptown/bert-base-multilingual-uncased-sentiment')
nlp = spacy.load('en_core_web_sm')
rake = Rake()

def process_csv(file):
    reader = csv.DictReader(file)
    emotions = []
    sentiments = []
    entities = []
    keywords = []
    for row in reader:
        text = row['Content']  # Replace 'Content' with the correct column name
        nrc_obj = NRCLex(text)
        emotion_scores = nrc_obj.affect_frequencies
        emotions.append(emotion_scores)
        sentiment = analyze_emotion(text)
        sentiments.append(sentiment)
        entities.append(analyze_entities(text))
        keywords.append(extract_keywords(text))  # Extract keywords for each text

    fieldnames = reader.fieldnames + list(emotions[0].keys()) + ['sentiment', 'entities', 'keywords']
    output = []
    for row, emotion_scores, sentiment, entity, keyword in zip(reader, emotions, sentiments, entities, keywords):
        row.update(emotion_scores)  # Update the row dictionary with emotion scores
        row.update({'sentiment': sentiment, 'entities': entity, 'keywords': keyword})  # Update the row dictionary with sentiment, entities and keywords
        output.append({field: row.get(field, '') for field in fieldnames})  # Write row with matching fields or empty values
    return pd.DataFrame(output).to_csv(index=False)

def analyze_emotion(text):
    result = emotion_pipeline(text)[0]
    sentiment = result['label']
    return sentiment

def analyze_entities(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

def extract_keywords(text):
    rake.extract_keywords_from_text(text)
    return rake.get_ranked_phrases()  # Extract keywords from text

iface = gr.Interface(fn=process_csv, inputs=gr.inputs.File(type='csv'), outputs=gr.outputs.File())
iface.launch()