ebup-chatbot / app.py
foozy's picture
Create app.py
638c7af verified
import gradio as gr
from ebooklib import epub
from bs4 import BeautifulSoup
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
# === EPUB-Datei verarbeiten ===
def load_epub(epub_path):
book = epub.read_epub(epub_path)
text = []
for item in book.get_items():
if item.get_type() == epub.ITEM_DOCUMENT:
soup = BeautifulSoup(item.get_content(), "html.parser")
text.append(soup.get_text())
return "\n".join(text)
# === Text aufteilen ===
def split_text(text):
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
return splitter.split_text(text)
# === Vektordatenbank erstellen ===
def create_vectorstore(texts):
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
return Chroma.from_texts(texts, embeddings)
# === LLM (GPT-4 oder Open-Source) ===
def load_llm():
return OpenAI(model_name="gpt-4") # Nutzt GPT-4 (ersetze ggf. durch Open-Source)
# === Q&A-Kette erstellen ===
def create_qa_chain(llm, vectorstore):
return RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=vectorstore.as_retriever())
# === Chatbot-Funktion ===
def chatbot(epub_file, question):
text = load_epub(epub_file.name)
texts = split_text(text)
vectorstore = create_vectorstore(texts)
llm = load_llm()
qa_chain = create_qa_chain(llm, vectorstore)
return qa_chain.run(question)
# === Gradio UI ===
with gr.Blocks() as demo:
gr.Markdown("## 📖 E-Book Chatbot mit LangChain")
epub_input = gr.File(label="Lade eine EPUB-Datei hoch")
question_input = gr.Textbox(label="Stelle eine Frage zu deinem Buch")
answer_output = gr.Textbox(label="Antwort", interactive=False)
submit_button = gr.Button("Frage stellen")
submit_button.click(chatbot, inputs=[epub_input, question_input], outputs=answer_output)
# === App starten ===
if __name__ == "__main__":
demo.launch()