Maxjohn12 commited on
Commit
0f0eea2
Β·
verified Β·
1 Parent(s): 4f63140

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +2 -13
  2. env +0 -0
  3. rag_deep.py +148 -0
  4. requirements.txt +5 -0
README.md CHANGED
@@ -1,13 +1,2 @@
1
- ---
2
- title: SmartDocAI
3
- emoji: πŸƒ
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.44.1
8
- app_file: app.py
9
- pinned: false
10
- short_description: SmartDoc - AI
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # SmartDoc-AI
2
+ AI-Powered Document Assistant
 
 
 
 
 
 
 
 
 
 
 
env ADDED
File without changes
rag_deep.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import PDFPlumberLoader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_core.vectorstores import InMemoryVectorStore
5
+ from langchain_ollama import OllamaEmbeddings
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain_ollama.llms import OllamaLLM
8
+
9
+ st.markdown("""
10
+ <style>
11
+ .stApp {
12
+ background-color: #121826; /* Deep Navy Blue */
13
+ color: #EAEAEA; /* Soft White */
14
+ }
15
+
16
+ /* Chat Input Styling */
17
+ .stChatInput input {
18
+ background-color: #1A2238 !important; /* Dark Blue */
19
+ color: #F5F5F5 !important; /* Light Gray */
20
+ border: 1px solid #3E4C72 !important; /* Muted Blue */
21
+ }
22
+
23
+ /* User Message Styling */
24
+ .stChatMessage[data-testid="stChatMessage"]:nth-child(odd) {
25
+ background-color: #1F2A44 !important; /* Dark Blue Gray */
26
+ border: 1px solid #4A5C89 !important; /* Subtle Blue */
27
+ color: #D1D5DB !important; /* Soft White */
28
+ border-radius: 10px;
29
+ padding: 15px;
30
+ margin: 10px 0;
31
+ }
32
+
33
+ /* Assistant Message Styling */
34
+ .stChatMessage[data-testid="stChatMessage"]:nth-child(even) {
35
+ background-color: #253350 !important; /* Rich Deep Blue */
36
+ border: 1px solid #5C6FA9 !important; /* Light Blue Accent */
37
+ color: #F3F4F6 !important; /* Soft White */
38
+ border-radius: 10px;
39
+ padding: 15px;
40
+ margin: 10px 0;
41
+ }
42
+
43
+ /* Avatar Styling */
44
+ .stChatMessage .avatar {
45
+ background-color: #4CAF50 !important; /* Vibrant Green */
46
+ color: #FFFFFF !important; /* White */
47
+ }
48
+
49
+ /* Text Color Fix */
50
+ .stChatMessage p, .stChatMessage div {
51
+ color: #EAEAEA !important; /* Soft White */
52
+ }
53
+
54
+ .stFileUploader {
55
+ background-color: #1A2238;
56
+ border: 1px solid #4A5C89;
57
+ border-radius: 5px;
58
+ padding: 15px;
59
+ }
60
+
61
+ h1, h2, h3 {
62
+ color: #4CAF50 !important; /* Green Accent */
63
+ }
64
+ </style>
65
+
66
+ """, unsafe_allow_html=True)
67
+
68
+ PROMPT_TEMPLATE = """
69
+ You are an expert research assistant. Use the provided context to answer the query.
70
+ If unsure, state that you don't know. Be concise and factual (max 3 sentences).
71
+
72
+ Query: {user_query}
73
+ Context: {document_context}
74
+ Answer:
75
+ """
76
+ PDF_STORAGE_PATH = 'document_store/pdfs/'
77
+ EMBEDDING_MODEL = OllamaEmbeddings(model="deepseek-r1:1.5b")
78
+ DOCUMENT_VECTOR_DB = InMemoryVectorStore(EMBEDDING_MODEL)
79
+ LANGUAGE_MODEL = OllamaLLM(model="deepseek-r1:1.5b")
80
+
81
+
82
+ def save_uploaded_file(uploaded_file):
83
+ file_path = PDF_STORAGE_PATH + uploaded_file.name
84
+ with open(file_path, "wb") as file:
85
+ file.write(uploaded_file.getbuffer())
86
+ return file_path
87
+
88
+ def load_pdf_documents(file_path):
89
+ document_loader = PDFPlumberLoader(file_path)
90
+ return document_loader.load()
91
+
92
+ def chunk_documents(raw_documents):
93
+ text_processor = RecursiveCharacterTextSplitter(
94
+ chunk_size=1000,
95
+ chunk_overlap=200,
96
+ add_start_index=True
97
+ )
98
+ return text_processor.split_documents(raw_documents)
99
+
100
+ def index_documents(document_chunks):
101
+ DOCUMENT_VECTOR_DB.add_documents(document_chunks)
102
+
103
+ def find_related_documents(query):
104
+ return DOCUMENT_VECTOR_DB.similarity_search(query)
105
+
106
+ def generate_answer(user_query, context_documents):
107
+ context_text = "\n\n".join([doc.page_content for doc in context_documents])
108
+ conversation_prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
109
+ response_chain = conversation_prompt | LANGUAGE_MODEL
110
+ return response_chain.invoke({"user_query": user_query, "document_context": context_text})
111
+
112
+
113
+ # UI Configuration
114
+
115
+
116
+ st.title("πŸ“˜ SmartDoc AI")
117
+ st.markdown("### AI-Powered Document Assistant")
118
+ st.markdown("---")
119
+
120
+ # File Upload Section
121
+ uploaded_pdf = st.file_uploader(
122
+ "Upload Research Document (PDF)",
123
+ type="pdf",
124
+ help="Select a PDF document for analysis",
125
+ accept_multiple_files=False
126
+
127
+ )
128
+
129
+ if uploaded_pdf:
130
+ saved_path = save_uploaded_file(uploaded_pdf)
131
+ raw_docs = load_pdf_documents(saved_path)
132
+ processed_chunks = chunk_documents(raw_docs)
133
+ index_documents(processed_chunks)
134
+
135
+ st.success("βœ… Document processed successfully! Ask your questions below.")
136
+
137
+ user_input = st.chat_input("Enter your question about the document...")
138
+
139
+ if user_input:
140
+ with st.chat_message("user"):
141
+ st.write(user_input)
142
+
143
+ with st.spinner("Analyzing document..."):
144
+ relevant_docs = find_related_documents(user_input)
145
+ ai_response = generate_answer(user_input, relevant_docs)
146
+
147
+ with st.chat_message("assistant", avatar="πŸ€–"):
148
+ st.write(ai_response)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ langchain_core
3
+ langchain_community
4
+ langchain_ollama
5
+ pdfplumber