fplevit commited on
Commit
29cdd97
1 Parent(s): c50be4a

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -107
app.py DELETED
@@ -1,107 +0,0 @@
1
- from langchain_community.document_loaders import WebBaseLoader
2
- from langchain.text_splitter import RecursiveCharacterTextSplitter
3
- from langchain_openai import OpenAIEmbeddings,ChatOpenAI
4
- from langchain_community.vectorstores.faiss import FAISS
5
-
6
- from langchain.chains import LLMChain
7
- from dotenv import find_dotenv, load_dotenv
8
-
9
- from langchain_core.prompts.chat import (
10
- ChatPromptTemplate,
11
- SystemMessagePromptTemplate,
12
- HumanMessagePromptTemplate,
13
- )
14
- import gradio as gr
15
-
16
- load_dotenv(find_dotenv())
17
- embeddings = OpenAIEmbeddings()
18
-
19
- import requests
20
- from bs4 import BeautifulSoup
21
- from urllib.parse import urlparse, urljoin
22
-
23
- def extract_subdomain_urls(subdomain):
24
- response = requests.get(subdomain)
25
- # Parse the HTML content using BeautifulSoup
26
- soup = BeautifulSoup(response.text, "html.parser")
27
- # Extract all anchor tags (links) from the parsed HTML
28
- anchors = soup.find_all("a")
29
- # Extract and normalize the URLs within the subdomain
30
- base_url = urlparse(subdomain).scheme + "://" + urlparse(subdomain).netloc
31
- subdomain_urls = []
32
- for anchor in anchors:
33
- href = anchor.get("href")
34
- if href:
35
- url = urljoin(base_url, href)
36
- if urlparse(url).netloc == urlparse(subdomain).netloc:
37
- subdomain_urls.append(url)
38
-
39
- return subdomain_urls
40
-
41
- # Retrieve all pages from handbook
42
- subdomain = "https://i14y-ch.github.io/handbook/de/"
43
- urls = extract_subdomain_urls(subdomain)
44
-
45
- # Use langchain WebBaseLoader to load the handbook, then split into pages to stay under GPT tokens usage threshold
46
- loader = WebBaseLoader(urls)
47
- loader.requests_per_second = 1
48
- pages = loader.load_and_split()
49
-
50
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
51
- docs = text_splitter.split_documents(pages)
52
-
53
-
54
- #Embed in a FAISS vectorspace
55
- db = FAISS.from_documents(docs, embeddings)
56
-
57
- #Define the function that creates a chat prompt given a user query
58
- def get_response_from_query(query):
59
- """
60
- gpt-3.5-turbo can handle up to 4097 tokens. Setting the chunksize to 1000 and k to 4 maximizes
61
- the number of tokens to analyze.
62
- """
63
- docs = db.similarity_search(query, k=4)
64
- docs_page_content = " ".join([d.page_content for d in docs])
65
-
66
- chat = ChatOpenAI(temperature=0)
67
-
68
- # System message prompt
69
- template = """
70
- The I14Y interoperability platform is the central directory of data, electronic interfaces and authority services in Switzerland.
71
- You are a helpful assistant that answers questions about I14Y based on the platform handbook, of which {docs} is an extract.
72
- Given a question from a user, you create a final answer based on the information in {docs}.
73
- Whenever you have this information, you must cite the relevant section title of the handbook that you used in your answer.
74
- If you don't have enough information to answer the question, politely state that you don't know. Do not make up answers.
75
- If you don't understand the question, ask the user to reformulate it.
76
- If the question is not about the I14Y interoperability platform, say that you only answer question about I14Y.
77
- Ensure your answers are detailed, concise, and relevant, providing step-by-step instructions if needed.
78
- You are very polite and always greet the user with "Grüezi".
79
- At the end of your answer, ask politely the user if they need any further information.
80
- Do not include references to platforms other than I14Y in your answers such as for example Geocat.
81
- Answer in the language in which the question was asked.
82
- I14Y stands for Interoperability. The user may call I14Y "IOP" but you should not use this name in your answer.
83
- """
84
- system_message_prompt = SystemMessagePromptTemplate.from_template(template)
85
-
86
- # Human question prompt
87
- human_template = "Answer the following question: {question}"
88
- human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
89
-
90
- chat_prompt = ChatPromptTemplate.from_messages(
91
- [system_message_prompt, human_message_prompt]
92
- )
93
-
94
- chain = LLMChain(llm=chat, prompt=chat_prompt)
95
- response = chain.invoke({'question': query, 'docs':docs_page_content})['text']
96
-
97
- return response
98
-
99
-
100
- with gr.Blocks(title="I14Y Chatbot", theme=gr.themes.Default(font=gr.themes.GoogleFont("Roboto"), primary_hue="red", secondary_hue="pink")) as demo:
101
-
102
- input = gr.Textbox(label="Frage mich etwas über die I14Y")
103
- output = gr.Textbox(label="Antwort")
104
- search_btn = gr.Button("Frage stellen")
105
- search_btn.click(fn=get_response_from_query, inputs=input, outputs=output, api_name="Frage stellen")
106
-
107
- demo.launch(share=False)