HeRksTAn commited on
Commit
ddcdb82
β€’
1 Parent(s): a02420e
Files changed (5) hide show
  1. Dockerfile +11 -0
  2. __pycache__/app.cpython-311.pyc +0 -0
  3. app.py +110 -0
  4. chainlit.md +14 -0
  5. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
__pycache__/app.cpython-311.pyc ADDED
Binary file (4.35 kB). View file
 
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chainlit as cl
2
+ import tiktoken
3
+ import os
4
+ from dotenv import load_dotenv
5
+ # from langchain.document_loaders import PyMuPDFLoader
6
+ from langchain_community.document_loaders import PyMuPDFLoader
7
+ from langchain_openai import OpenAIEmbeddings
8
+ # from langchain_community.chat_models import OpenAIEmbeddings
9
+
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ # from langchain.vectorstores import Pinecone
13
+ from langchain_community.vectorstores import Pinecone
14
+ from operator import itemgetter
15
+ from langchain.schema.runnable import RunnablePassthrough
16
+ from langchain_openai import ChatOpenAI
17
+ from langchain.schema.runnable.config import RunnableConfig
18
+ from langchain_core.output_parsers import StrOutputParser
19
+
20
+ load_dotenv()
21
+
22
+ RAG_PROMPT = """
23
+
24
+ CONTEXT:
25
+ {context}
26
+
27
+ QUERY:
28
+ {question}
29
+
30
+ You are a car specialist and can only provide your answers from the context.
31
+
32
+ Don't tell in your response that you are getting it from the context.
33
+
34
+ """
35
+
36
+ init_settings = {
37
+ "model": "gpt-3.5-turbo",
38
+ "temperature": 0,
39
+ "max_tokens": 500,
40
+ "top_p": 1,
41
+ "frequency_penalty": 0,
42
+ "presence_penalty": 0,
43
+ }
44
+
45
+ # embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
46
+
47
+ def tiktoken_len(text):
48
+ tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
49
+ text,
50
+ )
51
+ return len(tokens)
52
+
53
+ car_manual = PyMuPDFLoader(os.environ.get('pdfurl'))
54
+
55
+ car_manual_data = car_manual.load()
56
+
57
+ text_splitter = RecursiveCharacterTextSplitter(
58
+ chunk_size = 400,
59
+ chunk_overlap = 50,
60
+ length_function = tiktoken_len)
61
+
62
+ car_manual_chunks = text_splitter.split_documents(car_manual_data)
63
+
64
+ embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
65
+ vector_store = Pinecone.from_documents(car_manual_chunks, embedding_model, index_name=os.environ.get('index'))
66
+ retriever = vector_store.as_retriever()
67
+
68
+ rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
69
+
70
+ model = ChatOpenAI(model="gpt-3.5-turbo")
71
+
72
+
73
+
74
+ @cl.on_chat_start
75
+ async def main():
76
+ # text_splitter = RecursiveCharacterTextSplitter(
77
+ # chunk_size = 400,
78
+ # chunk_overlap = 50,
79
+ # length_function = tiktoken_len)
80
+
81
+ # car_manual_chunks = text_splitter.split_documents(car_manual_data)
82
+
83
+ # embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
84
+ # vector_store = Pinecone.from_documents(car_manual_chunks, embedding_model, index_name=os.environ.get('index'))
85
+ # retriever = vector_store.as_retriever()
86
+
87
+ # rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
88
+
89
+ # model = ChatOpenAI(model="gpt-3.5-turbo")
90
+
91
+ mecanic_qa_chain = (
92
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
93
+ | RunnablePassthrough.assign(context=itemgetter("context"))
94
+ | rag_prompt | model | StrOutputParser()
95
+ )
96
+
97
+ cl.user_session.set("runnable", mecanic_qa_chain)
98
+
99
+
100
+
101
+ @cl.on_message
102
+ async def on_message(message: cl.Message):
103
+ runnable = cl.user_session.get("runnable")
104
+ msg = cl.Message(content="")
105
+
106
+ async for chunk in runnable.astream(
107
+ {"question":message.content},
108
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
109
+ ):
110
+ await msg.stream_token(chunk)
chainlit.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to Chainlit! πŸš€πŸ€–
2
+
3
+ Hi there, Developer! πŸ‘‹ We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4
+
5
+ ## Useful Links πŸ”—
6
+
7
+ - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) πŸ“š
8
+ - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! πŸ’¬
9
+
10
+ We can't wait to see what you create with Chainlit! Happy coding! πŸ’»πŸ˜Š
11
+
12
+ ## Welcome screen
13
+
14
+ To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ chainlit==0.7.700
2
+ cohere==4.37
3
+ openai
4
+ tiktoken
5
+ python-dotenv==1.0.0
6
+ langchain
7
+ langchain-community
8
+ langchain-openai
9
+ pymupdf
10
+ pinecone-client