import urllib.request import fitz import re import numpy as np import tensorflow_hub as hub from openai import OpenAI import gradio as gr import os import shutil from pathlib import Path from tempfile import NamedTemporaryFile from sklearn.neighbors import NearestNeighbors import anthropic # client = OpenAI( # base_url='https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1/v1/', # api_key=os.getenv('openai_key') # ) client = anthropic.Anthropic() from util import pdf_to_text, text_to_chunks, SemanticSearch recommender = SemanticSearch() def load_recommender(path, start_page=1): global recommender texts = pdf_to_text(path, start_page=start_page) chunks = text_to_chunks(texts, start_page=start_page) recommender.fit(chunks) return 'Corpus Loaded.' # def openai_generate_text(prompt, model = "gpt-3.5-turbo-16k-0613"): # model="mistralai/Mixtral-8x7B-Instruct-v0.1" # max_tokens=1024 # message = clinet.chat.completions.create( # model=model, # messages=[ # {"role": "user", "content": prompt} # ], # max_tokens=max_tokens, # ).choices[0].message.content # return message def claude_generate_text(prompt, model = "claude-3-haiku-20240307"): message = client.messages.create( model=model, max_tokens=1000, temperature=0.0, # system="Respond only in mandarin", messages=[ {"role": "user", "content": prompt} ] ) return message.content[0].text def generate_answer(question): topn_chunks = recommender(question) prompt = 'search results:\n\n' for c in topn_chunks: prompt += c + '\n\n' prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\ "Cite each reference using [ Page Number] notation. "\ "Only answer what is asked. The answer should be short and concise. "\ "If asked in Chinese, respond in Chinese; if asked in English, respond"\ "in English \n\nQuery: " prompt += f"{question}\nAnswer:" answer = claude_generate_text(prompt) return answer def question_answer(chat_history, file, question): suffix = Path(file.name).suffix with NamedTemporaryFile(delete=False, suffix=suffix) as tmp: shutil.copyfile(file.name, tmp.name) tmp_path = Path(tmp.name) load_recommender(str(tmp_path)) answer = generate_answer(question) chat_history.append([question, answer]) return chat_history title = 'PDF GPT ' description = """ PDF GPT """ with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo: gr.Markdown(f'