File size: 2,955 Bytes
91532c2
24b744c
91532c2
478e016
91532c2
478e016
91532c2
 
05d3d0d
882f683
24b744c
08357ff
91532c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24b744c
 
 
 
 
91532c2
 
 
 
 
 
 
 
 
 
 
 
 
882f683
05d3d0d
478e016
 
 
ff2e27d
91532c2
478e016
 
 
24b744c
 
478e016
91532c2
 
 
c6287f2
478e016
05d3d0d
91532c2
 
 
 
 
 
478e016
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
from pathlib import Path

import gradio as gr
from llama_index import VectorStoreIndex, StorageContext, download_loader, load_index_from_storage

dataFiles = ["OnCall", "RetroMay", "RetroApril", "RetroMarch", "Snowflake", "Datadog", "Databricks", "SplunkProducts",
             "SplunkEnterprise"]

cache = {}


def index_file(filePath, index_root):
    csv_file = f'./raw/{filePath}.csv'
    pdf_file = f'./raw/{filePath}.pdf'
    documents = None
    storage_context = StorageContext.from_defaults()
    if os.path.exists(csv_file):
        PandasCSVReader = download_loader("PandasCSVReader")
        loader = PandasCSVReader()
        documents = loader.load_data(file=csv_file)
        print(f"Loading from CSV {csv_file}")
    elif os.path.exists(pdf_file):
        PDFReader = download_loader("PDFReader")
        loader = PDFReader()
        documents = loader.load_data(file=Path(pdf_file))
        # PyMuPDFReader = download_loader("PyMuPDFReader")
        # loader = PyMuPDFReader()
        # documents = loader.load(file_path=Path(pdf_file), metadata=False)
        print(f"Loading from PDF {pdf_file}")
    index = VectorStoreIndex.from_documents(documents=documents, storage_context=storage_context)
    save_location = f"{index_root}/{filePath}"
    if not os.path.exists(save_location):
        os.makedirs(save_location)
    storage_context.persist(save_location)
    return index


def loadData():
    """
    Load indices from disk for improved performance
    """
    index_root = "./index_v2"
    for file in dataFiles:
        index_file_path = f'{index_root}/{file}'
        index = None
        if not os.path.exists(index_file_path):
            print("Creating index " + index_file_path)
            index = index_file(file, index_root)
        else:
            print("Loading from existing index " + index_file_path)
            storage_context = StorageContext.from_defaults(persist_dir=index_file_path)
            index = load_index_from_storage(storage_context)
        cache[file] = index


def chatbot(indexName, input_text):
    """
    Chatbot function that takes in a prompt and returns a response
    """
    index = cache[indexName]
    response = index.as_query_engine().query(input_text)
    return response.response


loadData()

iface = gr.Interface(fn=chatbot,
                     inputs=[
                         gr.Dropdown(dataFiles,
                                     type="value", value="Agreement", label="Select Pulse Data"),
                         gr.Textbox(lines=7, label="Ask any question", placeholder='What is the summary?')],
                     outputs="text",
                     title="NLP Demo for Chat Interface")
if 'LOGIN_PASS' in os.environ:
    iface.launch(auth=('axiamatic', os.environ['LOGIN_PASS']),
                 auth_message='For access, please check my Slack profile or contact me in Slack.',
                 share=False)
else:
    iface.launch(share=False)