import urllib.request import fitz import re import numpy as np import tensorflow_hub as hub import openai import gradio as gr import os import shutil from pathlib import Path from tempfile import NamedTemporaryFile from sklearn.neighbors import NearestNeighbors import huggingface_hub openai.base_url = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1/v1/" openai.api_key = huggingface_hub.get_token() from util import pdf_to_text, text_to_chunks, SemanticSearch recommender = SemanticSearch() def load_recommender(path, start_page=1): global recommender texts = pdf_to_text(path, start_page=start_page) chunks = text_to_chunks(texts, start_page=start_page) recommender.fit(chunks) return 'Corpus Loaded.' def generate_text(prompt, model = "gpt-3.5-turbo-16k-0613"): model="mistralai/Mixtral-8x7B-Instruct-v0.1" temperature=0.7 max_tokens=256 top_p=1 frequency_penalty=0 presence_penalty=0 message = openai.ChatCompletion.create( model=model, messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "assistant", "content": "Here is some initial assistant message."}, {"role": "user", "content": prompt} ], temperature=.3, max_tokens=max_tokens, top_p=top_p, frequency_penalty=frequency_penalty, presence_penalty=presence_penalty, ).choices[0].message['content'] return message def generate_answer(question): topn_chunks = recommender(question) prompt = 'search results:\n\n' for c in topn_chunks: prompt += c + '\n\n' prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\ "Cite each reference using [ Page Number] notation. "\ "Only answer what is asked. The answer should be short and concise. \n\nQuery: " prompt += f"{question}\nAnswer:" answer = generate_text(prompt) return answer def question_anwser(chat_history, file, question): suffix = Path(file.name).suffix with NamedTemporaryFile(delete=False, suffix=suffix) as tmp: shutil.copyfile(file.name, tmp.name) tmp_path = Path(tmp.name) load_recommender(str(tmp_path)) answer = generate_answer(question) chat_history.append([question, answer]) return chat_history title = 'PDF GPT ' description = """ PDF GPT """ with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo: gr.Markdown(f'