AleenDG commited on
Commit
fbfc245
·
verified ·
1 Parent(s): d462c0b

first commit

Files changed (1) hide show
  1. app.py +62 -0
app.py CHANGED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
3
+ from langchain_community.vectorstores import Chroma
4
+ from langchain_community import embeddings
5
+ from langchain_community.chat_models import ChatOllama
6
+ from langchain_core.runnables import RunnablePassthrough
7
+ from langchain_core.output_parsers import StrOutputParser
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain.output_parsers import PydanticOutputParser
10
+ from langchain.text_splitter import CharacterTextSplitter
11
+
12
+ def process_input(urls, question):
13
+ model_local = ChatOllama(model="llama2")
14
+ # Convert string of URLs to list
15
+ urls_list = urls.split("\n")
16
+ docs = [WebBaseLoader(url).load() for url in urls_list]
17
+ docs_list = [item for sublist in docs for item in sublist]
18
+
19
+ text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
20
+ doc_splits = text_splitter.split_documents(docs_list)
21
+
22
+ vectorstore = Chroma.from_documents(
23
+ documents=doc_splits,
24
+ collection_name="rag-chroma",
25
+ embedding=embeddings.ollama.OllamaEmbeddings(model='nomic-embed-text'),
26
+ )
27
+ retriever = vectorstore.as_retriever()
28
+
29
+ after_rag_template = """Answer the question based only on the following context:
30
+ {context}
31
+ Question: {question}
32
+ """
33
+ after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
34
+ after_rag_chain = (
35
+ {"context": retriever, "question": RunnablePassthrough()}
36
+ | after_rag_prompt
37
+ | model_local
38
+ | StrOutputParser()
39
+ )
40
+ return after_rag_chain.invoke(question)
41
+
42
+ import pyttsx3
43
+ engine = pyttsx3.init('sapi5')
44
+ voices = engine.getProperty('voices')
45
+ # print(voices[1].id)
46
+ engine.setProperty('voice', voices[0].id)
47
+
48
+
49
+ def speak(audio):
50
+ engine.say(audio)
51
+ engine.runAndWait()
52
+
53
+
54
+ # Define Gradio interface
55
+ iface = gr.Interface(fn=process_input,
56
+ inputs=[gr.Textbox(label="Enter URLs separated by new lines"), gr.Textbox(label="Question")],
57
+ # server_name
58
+ outputs="text",
59
+ title="Document Query with Ollama",
60
+ description="Enter URLs and a question to query the documents.")
61
+
62
+ iface.launch()