File size: 5,330 Bytes
180ada1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
056867b
180ada1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gradio as gr
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain_groq import ChatGroq
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_loaders import WebBaseLoader
import os

# Function to process text and create ConversationalRetrievalChain
def process_text_and_create_chain(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_text(text)
    metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
    
    model_name = "BAAI/bge-small-en"
    model_kwargs = {"device": "cpu"}
    encode_kwargs = {"normalize_embeddings": True}
    hf = HuggingFaceBgeEmbeddings(
        model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
    )
    
    db = Chroma.from_texts(texts, hf, metadatas=metadatas)
    
    message_history = ChatMessageHistory()
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True,
    )
    
    llm_groq = ChatGroq(
        groq_api_key="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",
        model_name='mixtral-8x7b-32768'
    )
    
    chain = ConversationalRetrievalChain.from_llm(
        llm=llm_groq,
        chain_type="stuff",
        retriever=db.as_retriever(),
        memory=memory,
        return_source_documents=True,
    )
    
    return chain

# Initialize global variables
global_chain = None

# Function to handle PDF upload
def handle_pdf_upload(file):
    if file is None:
        return "No file uploaded. Please upload a PDF file.", gr.update(visible=False), gr.update(visible=True)
    
    if not file.name.lower().endswith('.pdf'):
        return "Error: Please upload a PDF file.", gr.update(visible=False), gr.update(visible=True)
    
    try:
        print(f"Processing file: {file.name}")
        pdf_reader = PyPDF2.PdfReader(file.name)
        pdf_text = ""
        for page in pdf_reader.pages:
            pdf_text += page.extract_text()
        
        global global_chain
        global_chain = process_text_and_create_chain(pdf_text)
        return "PDF processed successfully.", gr.update(visible=True), gr.update(visible=False)
    except Exception as e:
        print(f"Error processing PDF: {str(e)}")
        return f"Error processing PDF: {str(e)}", gr.update(visible=False), gr.update(visible=True)

# Function to handle link input
def handle_link_input(link):
    try:
        loader = WebBaseLoader(link)
        data = loader.load()
        doc = "\n".join([doc.page_content for doc in data])
        
        global global_chain
        global_chain = process_text_and_create_chain(doc)
        return "Link processed successfully.", gr.update(visible=True), gr.update(visible=False)
    except Exception as e:
        print(f"Error processing link: {str(e)}")
        return f"Error processing link: {str(e)}", gr.update(visible=False), gr.update(visible=True)

# Function to handle user query
def handle_query(query, chatbot):
    if global_chain is None:
        return chatbot + [("Bot", "Please provide input first.")]
    try:
        result = global_chain({"question": query})
        return chatbot + [("You", query), ("System", result['answer'])]
    except Exception as e:
        print(f"Error processing query: {str(e)}")
        return chatbot + [("Bot", f"Error: {str(e)}")]

# Function to toggle input method
def toggle_input_method(input_method):
    if input_method == "Upload PDF":
        return gr.update(visible=True), gr.update(visible=False)
    elif input_method == "Paste Link":
        return gr.update(visible=False), gr.update(visible=True)
    else:
        return gr.update(visible=False), gr.update(visible=False)

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Chat-With-Context")
    
    with gr.Row():
        input_method = gr.Radio(["Upload PDF", "Paste Link"], label="Choose Input Method", interactive=True)
    
    with gr.Row(visible=False) as upload_section:
        pdf_input = gr.File(label="Upload PDF")
        upload_button = gr.Button("Process PDF")

    with gr.Row(visible=False) as text_input_section:
        text_input = gr.Textbox(label="Paste Link")
        submit_text_button = gr.Button("Process Link")

    input_status = gr.Textbox(label="Status", interactive=False)

    with gr.Row(visible=False) as chat_section:
        chatbot = gr.Chatbot(label="Chat")
        query_input = gr.Textbox(label="Write Your Question", placeholder="Message Chat-With-Context")
        send_button = gr.Button("Send")

    input_method.change(toggle_input_method, inputs=input_method, outputs=[upload_section, text_input_section])
    upload_button.click(fn=handle_pdf_upload, inputs=pdf_input, outputs=[input_status, chat_section, upload_section])
    submit_text_button.click(fn=handle_link_input, inputs=text_input, outputs=[input_status, chat_section, text_input_section])
    send_button.click(fn=handle_query, inputs=[query_input, chatbot], outputs=chatbot)



demo.launch(share=True)