Spaces:

svummidi
/

slackdemo

Running on CPU Upgrade

File size: 4,272 Bytes

a31ba66

from enum import Enum
from typing import List, Optional

from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index.llms.base import LLM
from llama_index.llms.openai_utils import to_openai_function
from pydantic import BaseModel, ValidationError


class SentimentEnum(str, Enum):
    """
    Enum for predicted overall sentiment of a discussion thread
    """

    POSITIVE = "POSITIVE"
    NEGATIVE = "NEGATIVE"
    NEUTRAL = "NEUTRAL"
    MIXED = "MIXED"
    UNKNOWN = "UNKNOWN"


class DiscussionStatusEnum(str, Enum):
    """
    Enum for representing the predicted status of the discussion thread
    """

    ON_GOING = "ON_GOING"
    RESOLVED_OR_CLOSED = "RESOLVED_OR_CLOSED"
    UNKNOWN = "UNKNOWN"


class ThreadMetadata(BaseModel):
    """ Metadata of a discussion thread for topics and sentiment. Topics must be under 50 characters. """
    list_of_positive_topics: List[str]
    list_of_negative_topics: List[str]
    overall_sentiment: Optional[SentimentEnum]
    discussion_status_enum: Optional[DiscussionStatusEnum]


class MetadataExtractor:
    def __init__(self, llm: LLM):
        self.llm = llm

    def extract_metadata(self, thread_summary: str) -> Optional[ThreadMetadata]:
        """
        Extracts the metadata from the thread summary
        :param thread_summary:  of the thread
        :return: metadata of the thread
        """
        api_spec = to_openai_function(ThreadMetadata)
        response = self.llm.complete(
            "Analyze the thread summary: " + thread_summary,
            functions=[api_spec],
        )
        function_call_resp = response.additional_kwargs["function_call"]["arguments"]
        # print(f"Function calling spec: {function_call_resp}")
        try:
            return ThreadMetadata.parse_raw(function_call_resp)
        except ValidationError:
            print(f"Error while parsing the detected question metadata: {function_call_resp}")
            return None


if __name__ == "__main__":
    import csv

    gpt_turbo: OpenAI = OpenAI(temperature=0, model="gpt-3.5-turbo")
    service_context = ServiceContext.from_defaults(llm=gpt_turbo, chunk_size=1024)
    metadata_extractor = MetadataExtractor(gpt_turbo)

    # Input CSV file name
    input_csv = "csv/platform-engg.csv"
    # Output CSV file name
    output_csv = "csv/platform-engg-updated.csv"
    # Column header to read from
    column_to_read = "Summary"
    new_column_header = "Predicted Status"

    metadata_extractor = MetadataExtractor(gpt_turbo)
    # Initialize lists to hold the headers and rows
    headers = []
    rows = []

    # Reading the input CSV file
    with open(input_csv, mode='r', newline='', encoding='utf-8') as infile:
        csvreader = csv.reader(infile)
        headers = next(csvreader)

        # Check if the column exists in the CSV
        if column_to_read not in headers:
            print(f"Error: Column '{column_to_read}' not found in the CSV file.")
            exit(1)

        # Get index of the column to read
        index_to_read = headers.index(column_to_read)

        # Read the rows into a list
        for row in csvreader:
            rows.append(row)

    # Create a new header for the new column

    # Add the new header to the headers list
    headers.append(new_column_header)

    # Iterate over each row to create a new column
    for row in rows:
        old_value = row[index_to_read]
        metadata = metadata_extractor.extract_metadata(old_value)
        new_value = "UNKNOWN"
        if metadata is not None and metadata.discussion_status_enum is not None:
            if (metadata.discussion_status_enum.value == "RESOLVED_OR_CLOSED" or
                    metadata.discussion_status_enum.value == "ON_GOING" or
                    metadata.discussion_status_enum.value == "UNKNOWN"):
                new_value = metadata.discussion_status_enum.value
        row.append(new_value)

    # Writing to the output CSV file
    with open(output_csv, mode='w', newline='', encoding='utf-8') as outfile:
        csvwriter = csv.writer(outfile)
        csvwriter.writerow(headers)
        for row in rows:
            csvwriter.writerow(row)

    print(f"Successfully added a new column '{new_column_header}' to the '{output_csv}' file.")