File size: 7,295 Bytes
17b93cd
 
 
4ea9e07
17b93cd
 
4ea9e07
 
 
 
17b93cd
 
 
 
 
 
 
 
 
 
4ea9e07
17b93cd
 
 
4ea9e07
17b93cd
 
 
4ea9e07
17b93cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ea9e07
17b93cd
 
4ea9e07
17b93cd
 
4ea9e07
17b93cd
 
4ea9e07
17b93cd
 
 
 
 
4ea9e07
17b93cd
 
 
 
4ea9e07
17b93cd
 
 
4ea9e07
17b93cd
 
 
4ea9e07
17b93cd
4ea9e07
17b93cd
 
 
4ea9e07
17b93cd
 
4ea9e07
17b93cd
 
 
 
4ea9e07
17b93cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ea9e07
 
 
 
 
 
 
 
 
17b93cd
 
 
4ea9e07
17b93cd
 
4ea9e07
 
 
17b93cd
4ea9e07
17b93cd
 
4ea9e07
 
17b93cd
4ea9e07
 
 
 
 
17b93cd
4ea9e07
17b93cd
 
 
 
 
 
 
 
4ea9e07
 
 
 
 
 
 
 
 
 
 
 
 
 
17b93cd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import gradio as gr
import PyPDF2
import io
import time
import os
from together import Together
import whisper

# Load Whisper model
whisper_model = whisper.load_model("base")

def extract_text_from_pdf(pdf_file):
    text = ""
    try:
        if hasattr(pdf_file, 'read'):
            pdf_content = pdf_file.read()
            if hasattr(pdf_file, 'seek'):
                pdf_file.seek(0)
        else:
            pdf_content = pdf_file

        pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_content))
        for page_num in range(len(pdf_reader.pages)):
            page_text = pdf_reader.pages[page_num].extract_text()
            if page_text:
                text += page_text + "\n\n"
            else:
                text += f"[Page {page_num+1} - No extractable text found]\n\n"

        if not text.strip():
            return "No text could be extracted from the PDF. The document may be scanned or image-based."
        return text
    except Exception as e:
        return f"Error extracting text from PDF: {str(e)}"

def format_chat_history(history):
    formatted_history = []
    for user_msg, bot_msg in history:
        formatted_history.append((user_msg, bot_msg))
    return formatted_history

def chat_with_pdf(api_key, pdf_text, user_question, history):
    if not api_key.strip():
        return history + [(user_question, "Error: Please enter your Together API key.")], history

    if not pdf_text.strip() or pdf_text.startswith("Error") or pdf_text.startswith("No text"):
        return history + [(user_question, "Error: Please upload a valid PDF file with extractable text first.")], history

    if not user_question.strip():
        return history + [(user_question, "Error: Please enter a question.")], history

    try:
        client = Together(api_key=api_key)
        max_context_length = 10000
        if len(pdf_text) > max_context_length:
            half_length = max_context_length // 2
            pdf_context = pdf_text[:half_length] + "\n\n[...Content truncated due to length...]\n\n" + pdf_text[-half_length:]
        else:
            pdf_context = pdf_text

        system_message = f"""You are an intelligent assistant designed to read, understand, and extract information from PDF documents. 
PDF CONTENT:
{pdf_context}
Answer the user's questions only based on the PDF content above. If the answer cannot be found in the PDF, politely state that the information is not available in the provided document."""

        messages = [
            {"role": "system", "content": system_message},
        ]

        for h_user, h_bot in history:
            messages.append({"role": "user", "content": h_user})
            messages.append({"role": "assistant", "content": h_bot})

        messages.append({"role": "user", "content": user_question})

        response = client.chat.completions.create(
            model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
            messages=messages,
            max_tokens=5000,
            temperature=0.7,
        )

        assistant_response = response.choices[0].message.content
        new_history = history + [(user_question, assistant_response)]
        return new_history, new_history
    except Exception as e:
        return history + [(user_question, f"Error: {str(e)}")], history

def process_pdf(pdf_file, api_key_input):
    if pdf_file is None:
        return "Please upload a PDF file.", "", []
    try:
        file_name = os.path.basename(pdf_file.name) if hasattr(pdf_file, 'name') else "Uploaded PDF"
        pdf_text = extract_text_from_pdf(pdf_file)
        if pdf_text.startswith("Error extracting text from PDF"):
            return f"❌ {pdf_text}", "", []
        if not pdf_text.strip() or pdf_text.startswith("No text could be extracted"):
            return f"⚠️ {pdf_text}", "", []
        word_count = len(pdf_text.split())
        status_message = f"βœ… Successfully processed PDF: {file_name} ({word_count} words extracted)"
        return status_message, pdf_text, []
    except Exception as e:
        return f"❌ Error processing PDF: {str(e)}", "", []

def validate_api_key(api_key):
    if not api_key or not api_key.strip():
        return "❌ API Key is required"
    if len(api_key.strip()) < 10:
        return "❌ API Key appears to be too short"
    return "βœ“ API Key format looks valid (not verified with server)"

def transcribe_audio(audio):
    if audio is None:
        return ""
    try:
        result = whisper_model.transcribe(audio)
        return result['text']
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

with gr.Blocks(title="ChatPDF with Together AI") as app:
    gr.Markdown("# πŸ“„ ChatPDF with Together AI")
    gr.Markdown("Upload a PDF and chat with it using the Llama-3.3-70B model.")

    with gr.Row():
        with gr.Column(scale=1):
            api_key_input = gr.Textbox(label="Together API Key", placeholder="Enter your Together API key here...", type="password")
            api_key_status = gr.Textbox(label="API Key Status", interactive=False)
            pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"], type="binary")
            process_button = gr.Button("Process PDF")
            status_message = gr.Textbox(label="Status", interactive=False)
            pdf_text = gr.Textbox(visible=False)
            with gr.Accordion("PDF Content Preview", open=False):
                pdf_preview = gr.Textbox(label="Extracted Text Preview", interactive=False, max_lines=10, show_copy_button=True)

        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Chat with PDF", height=500, show_copy_button=True)
            question = gr.Textbox(label="Ask a question about the PDF", placeholder="What is the main topic of this document?", lines=2)
            with gr.Row():
                voice_input = gr.Audio(label="🎀 Record your question", source="microphone", type="filepath")
                transcribe_button = gr.Button("🎀 Transcribe Audio")
            submit_button = gr.Button("Submit Question")

    def update_preview(text):
        if not text or text.startswith("Error") or text.startswith("No text"):
            return text
        preview = text[:500]
        if len(text) > 500:
            preview += "...\n[Text truncated for preview. Full text will be used for chat.]"
        return preview

    api_key_input.change(fn=validate_api_key, inputs=[api_key_input], outputs=[api_key_status])
    process_button.click(fn=process_pdf, inputs=[pdf_file, api_key_input], outputs=[status_message, pdf_text, chatbot])\
        .then(fn=update_preview, inputs=[pdf_text], outputs=[pdf_preview])

    submit_button.click(fn=chat_with_pdf, inputs=[api_key_input, pdf_text, question, chatbot], outputs=[chatbot, chatbot])\
        .then(fn=lambda: "", outputs=question)

    question.submit(fn=chat_with_pdf, inputs=[api_key_input, pdf_text, question, chatbot], outputs=[chatbot, chatbot])\
        .then(fn=lambda: "", outputs=question)

    transcribe_button.click(fn=transcribe_audio, inputs=[voice_input], outputs=[question])\
        .then(fn=chat_with_pdf, inputs=[api_key_input, pdf_text, question, chatbot], outputs=[chatbot, chatbot])\
        .then(fn=lambda: "", outputs=question)

if __name__ == "__main__":
    app.launch(share=True)