ysharma's picture
ysharma HF staff
Update app.py
617839a verified
import gradio as gr
import anthropic
import base64
from typing import List, Dict, Any
# Default document content
DEFAULT_DOC = "The grass is pink and soil is green. The sky is red while the sun looks blue."
def read_pdf_as_base64(file_path: str) -> str:
"""Read a PDF file and return its base64 encoded content."""
with open(file_path, 'rb') as file:
return base64.b64encode(file.read()).decode('utf-8')
def user_message(
user_input: str,
history: list,
enable_citations: bool,
doc_type: str,
text_content: str,
pdf_file: str,
api_key: str
) -> tuple:
# Logging
print("\n----------- User Message -------------")
print(f"User Input: {user_input}")
print(f"Citations Enabled: {enable_citations}")
print(f"Document Type: {doc_type}")
history.append({
"role": "user",
"content": user_input,
})
return "", history
def format_message_history(
history: list,
enable_citations: bool,
doc_type: str,
text_content: str,
pdf_files: str
) -> List[Dict]:
"""Convert Gradio chat history to Anthropic message format."""
formatted_messages = []
# Add previous messages
for msg in history[:-1]:
if msg["role"] == "user":
formatted_messages.append({
"role": "user",
"content": msg["content"]
})
elif msg["role"] == "assistant":
if "metadata" not in msg or msg["metadata"] is None:
formatted_messages.append({
"role": "assistant",
"content": msg["content"]
})
# Prepare the latest message
latest_message = {
"role": "user",
"content": []
}
# Add documents if citations are enabled
if enable_citations:
# Handle plain text input
if doc_type in ["plain_text", "combined"] and text_content.strip():
latest_message["content"].append({
"type": "document",
"source": {
"type": "text",
"media_type": "text/plain",
"data": text_content.strip()
},
"title": "User Text Document",
"citations": {"enabled": True}
})
# Handle PDF input
if doc_type in ["pdf", "combined"] and pdf_files:
# Handle pdf_files as a list
if isinstance(pdf_files, str):
pdf_files = [pdf_files] # Convert single path to list
# Add each PDF as a separate document
for i, pdf_file in enumerate(pdf_files):
try:
pdf_base64 = read_pdf_as_base64(pdf_file)
latest_message["content"].append({
"type": "document",
"source": {
"type": "base64",
"media_type": "application/pdf",
"data": pdf_base64
},
"title": f"User PDF Document {i+1}",
"citations": {"enabled": True}
})
except Exception as e:
print(f"Error processing PDF {i+1}: {str(e)}")
continue
# If no documents were added and citations are enabled, use default document
if not latest_message["content"]:
latest_message["content"].append({
"type": "document",
"source": {
"type": "text",
"media_type": "text/plain",
"data": DEFAULT_DOC
},
"title": "Sample Document",
"citations": {"enabled": True}
})
# Add the user's question
latest_message["content"].append({
"type": "text",
"text": history[-1]["content"]
})
formatted_messages.append(latest_message)
return formatted_messages
def bot_response(
history: list,
enable_citations: bool,
doc_type: str,
text_content: str,
pdf_file: str,
api_key: str
) -> List[Dict[str, Any]]:
try:
if not api_key:
history.append({
"role": "assistant",
"content": "Please provide your Anthropic API key to continue."
})
return history
# Initialize client with provided API key
client = anthropic.Anthropic(api_key=api_key)
messages = format_message_history(history, enable_citations, doc_type, text_content, pdf_file)
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=messages
)
# Initialize main response and citations
main_response = ""
citations = []
# Process each content block
for block in response.content:
if block.type == "text":
main_response += block.text
if enable_citations and hasattr(block, 'citations') and block.citations:
for citation in block.citations:
if citation.cited_text not in citations:
citations.append(citation.cited_text)
# Add main response
history.append({
"role": "assistant",
"content": main_response
})
# Add citations if any were found and citations are enabled
if enable_citations and citations:
history.append({
"role": "assistant",
"content": "\n".join([f"β€’ {cite}" for cite in citations]),
"metadata": {"title": "πŸ“š Citations"}
})
return history
except Exception as e:
print(f"Error in bot_response: {str(e)}")
error_message = str(e)
if "401" in error_message:
error_message = "Invalid API key. Please check your Anthropic API key and try again."
history.append({
"role": "assistant",
"content": f"I apologize, but I encountered an error: {error_message}"
})
return history
def update_document_inputs(enable_citations: bool, doc_type: str = "plain_text"):
"""Update visibility of document input components based on settings."""
text_visible = enable_citations and (doc_type in ["plain_text", "combined"])
pdf_visible = enable_citations and (doc_type in ["pdf", "combined"])
radio_visible = enable_citations
return {
doc_type_radio: gr.Radio(visible=radio_visible),
text_input: gr.Textbox(visible=text_visible),
pdf_input: gr.File(visible=pdf_visible)
}
with gr.Blocks(theme="ocean", fill_height=True) as demo:
gr.Markdown("# Chat with Anthropic Claude's Citations")
with gr.Row(scale=1):
with gr.Column(scale=4):
chatbot = gr.Chatbot(
type="messages",
bubble_full_width=False,
show_label=False,
scale=1
)
msg = gr.Textbox(
placeholder="Enter your message here...",
show_label=False,
container=False
)
with gr.Column(scale=1):
api_key = gr.Textbox(
type="password",
label="Anthropic API Key",
placeholder="Enter your API key",
info="Your API key will not be stored",
interactive=True,
)
enable_citations = gr.Checkbox(
label="Enable Citations",
value=True,
info="Toggle citation functionality"
)
doc_type_radio = gr.Radio(
choices=["plain_text", "pdf", "combined"],
value="plain_text",
label="Document Type",
info="Choose the type of document(s) to reference"
)
text_input = gr.Textbox(
label="Document Content",
placeholder=f"Enter your document text here.\nDefault text will be picked if citations are enabled and you don't provide the documents. Default document is --{DEFAULT_DOC}",
lines=10,
info="Enter the text you want to reference"
)
pdf_input = gr.File(
label="Upload PDF",
file_count="multiple",
file_types=[".pdf"],
type="filepath",
visible=False
)
clear = gr.ClearButton([msg, chatbot, text_input, pdf_input])
# Update input visibility based on settings
enable_citations.change(
update_document_inputs,
inputs=[enable_citations, doc_type_radio],
outputs=[doc_type_radio, text_input, pdf_input]
)
doc_type_radio.change(
update_document_inputs,
inputs=[enable_citations, doc_type_radio],
outputs=[doc_type_radio, text_input, pdf_input]
)
# Handle message submission
msg.submit(
user_message,
[msg, chatbot, enable_citations, doc_type_radio, text_input, pdf_input, api_key],
[msg, chatbot],
queue=False
).then(
bot_response,
[chatbot, enable_citations, doc_type_radio, text_input, pdf_input, api_key],
chatbot
)
if __name__ == "__main__":
demo.launch(debug=True)