I14YChatbotLight

Runtime error

App Files Files Community

fplevit commited on Feb 11

Commit

ae2616f

•

1 Parent(s): 29cdd97

Create app.py

Browse files

Files changed (1) hide show

app.py +107 -0

app.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from langchain_community.document_loaders import WebBaseLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings,ChatOpenAI
+from langchain_community.vectorstores.faiss import FAISS
+from langchain.chains import LLMChain
+from dotenv import find_dotenv, load_dotenv
+from langchain_core.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+import gradio as gr
+load_dotenv(find_dotenv())
+embeddings = OpenAIEmbeddings()
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse, urljoin
+def extract_subdomain_urls(subdomain):
+    response = requests.get(subdomain)
+    # Parse the HTML content using BeautifulSoup
+    soup = BeautifulSoup(response.text, "html.parser")
+    # Extract all anchor tags (links) from the parsed HTML
+    anchors = soup.find_all("a")
+    # Extract and normalize the URLs within the subdomain
+    base_url = urlparse(subdomain).scheme + "://" + urlparse(subdomain).netloc
+    subdomain_urls = []
+    for anchor in anchors:
+        href = anchor.get("href")
+        if href:
+            url = urljoin(base_url, href)
+            if urlparse(url).netloc == urlparse(subdomain).netloc:
+                subdomain_urls.append(url)
+    return subdomain_urls
+# Retrieve all pages from handbook
+subdomain = "https://i14y-ch.github.io/handbook/de/"
+urls = extract_subdomain_urls(subdomain)
+# Use langchain WebBaseLoader to load the handbook, then split into pages to stay under GPT tokens usage threshold
+loader = WebBaseLoader(urls)
+loader.requests_per_second = 1
+pages = loader.load_and_split()
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+docs = text_splitter.split_documents(pages)
+#Embed in a FAISS vectorspace
+db = FAISS.from_documents(docs, embeddings)
+#Define the function that creates a chat prompt given a user query
+def get_response_from_query(query):
+  """
+  gpt-3.5-turbo can handle up to 4097 tokens. Setting the chunksize to 1000 and k to 4 maximizes
+  the number of tokens to analyze.
+  """
+  docs = db.similarity_search(query, k=4)
+  docs_page_content = " ".join([d.page_content for d in docs])
+  chat = ChatOpenAI(temperature=0)
+  # System message prompt
+  template = """
+        The I14Y interoperability platform is the central directory of data, electronic interfaces and authority services in Switzerland.
+        You are a helpful assistant that answers questions about I14Y based on the platform handbook, of which {docs} is an extract.
+        Given a question from a user, you create a final answer based on the information in {docs}.
+        Whenever you have this information, you must cite the relevant section title of the handbook that you used in your answer.
+        If you don't have enough information to answer the question, politely state that you don't know. Do not make up answers.
+        If you don't understand the question, ask the user to reformulate it.
+        If the question is not about the I14Y interoperability platform, say that you only answer question about I14Y.
+        Ensure your answers are detailed, concise, and relevant, providing step-by-step instructions if needed.
+        You are very polite and always greet the user with "Grüezi".
+        At the end of your answer, ask politely the user if they need any further information.
+        Do not include references to platforms other than I14Y in your answers such as for example Geocat.
+        Answer in the language in which the question was asked.
+        I14Y stands for Interoperability. The user may call I14Y "IOP" but you should not use this name in your answer.
+      """
+  system_message_prompt = SystemMessagePromptTemplate.from_template(template)
+  # Human question prompt
+  human_template = "Answer the following question: {question}"
+  human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
+  chat_prompt = ChatPromptTemplate.from_messages(
+    [system_message_prompt, human_message_prompt]
+  )
+  chain = LLMChain(llm=chat, prompt=chat_prompt)
+  response = chain.invoke({'question': query, 'docs':docs_page_content})['text']
+  return response
+with gr.Blocks(title="I14Y Chatbot", theme=gr.themes.Default(font=gr.themes.GoogleFont("Roboto"), primary_hue="red", secondary_hue="pink")) as demo:
+    input = gr.Textbox(label="Frage mich etwas über die I14Y")
+    output = gr.Textbox(label="Antwort")
+    search_btn = gr.Button("Frage stellen")
+    search_btn.click(fn=get_response_from_query, inputs=input, outputs=output, api_name="Frage stellen")
+demo.launch(share=False)