surfiniaburger commited on
Commit
679d006
·
1 Parent(s): 032080e
app.py CHANGED
@@ -260,10 +260,10 @@ if __name__ == "__main__":
260
  else:
261
  print("⚠️ Connected Mode disabled: ADK components not initialized.")
262
 
263
- if STORY_LLM:
264
- story_mode_ui = create_story_mode_ui()
265
- interface_list.append(story_mode_ui)
266
- tab_titles.append("Farmer's Story Mode")
267
  else:
268
  print("⚠️ Farmer's Story Mode disabled: Story LLM not initialized.")
269
  else:
 
260
  else:
261
  print("⚠️ Connected Mode disabled: ADK components not initialized.")
262
 
263
+ # Add the new Document Analysis UI
264
+ document_analysis_ui = create_document_analysis_ui()
265
+ interface_list.append(document_analysis_ui)
266
+ tab_titles.append("Document Analysis")
267
  else:
268
  print("⚠️ Farmer's Story Mode disabled: Story LLM not initialized.")
269
  else:
config.py CHANGED
@@ -6,7 +6,7 @@ ADAPTER_PATH = "surfiniaburger/maize-health-diagnosis-adapter"
6
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
7
 
8
  # RAG and Knowledge Base
9
- KNOWLEDGE_BASE_PATH = "knowledge_base.md"
10
  FAISS_INDEX_PATH = "faiss_index"
11
 
12
  # Model Parameters
 
6
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
7
 
8
  # RAG and Knowledge Base
9
+ KNOWLEDGE_BASE_PATH = "knowledge_base_data"
10
  FAISS_INDEX_PATH = "faiss_index"
11
 
12
  # Model Parameters
document_analyzer.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain_community.llms import HuggingFaceHub
8
+ import config
9
+
10
+ def analyze_pdf(file_path):
11
+ """
12
+ Analyzes a PDF file and returns a question-answering chain.
13
+
14
+ Args:
15
+ file_path: The path to the PDF file.
16
+
17
+ Returns:
18
+ A Langchain QA chain object.
19
+ """
20
+ try:
21
+ pdf_reader = PdfReader(file_path)
22
+ text = ""
23
+ for page in pdf_reader.pages:
24
+ text += page.extract_text()
25
+
26
+ text_splitter = RecursiveCharacterTextSplitter(
27
+ chunk_size=1000,
28
+ chunk_overlap=200,
29
+ length_function=len
30
+ )
31
+ chunks = text_splitter.split_text(text=text)
32
+
33
+ embeddings = HuggingFaceEmbeddings(model_name=config.EMBEDDING_MODEL_NAME)
34
+ vector_store = FAISS.from_texts(chunks, embedding=embeddings)
35
+
36
+ llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
37
+ chain = load_qa_chain(llm=llm, chain_type="stuff")
38
+
39
+ return chain, vector_store
40
+
41
+ except Exception as e:
42
+ print(f"Error analyzing PDF: {e}")
43
+ return None, None
44
+
45
+ def query_pdf(chain, vector_store, query):
46
+ """
47
+ Queries the PDF using the QA chain.
48
+
49
+ Args:
50
+ chain: The Langchain QA chain object.
51
+ vector_store: The FAISS vector store.
52
+ query: The question to ask the PDF.
53
+
54
+ Returns:
55
+ The answer to the query.
56
+ """
57
+ try:
58
+ docs = vector_store.similarity_search(query=query, k=3)
59
+ answer = chain.run(input_documents=docs, question=query)
60
+ return answer
61
+ except Exception as e:
62
+ print(f"Error querying PDF: {e}")
63
+ return "Sorry, I couldn't find an answer to your question in the PDF."
64
+
65
+ import pandas as pd
66
+ from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
67
+ from langchain_community.llms import HuggingFaceHub
68
+
69
+ def analyze_spreadsheet(file_path):
70
+ """
71
+ Analyzes a spreadsheet file and returns a question-answering agent.
72
+
73
+ Args:
74
+ file_path: The path to the spreadsheet file.
75
+
76
+ Returns:
77
+ A Langchain agent object.
78
+ """
79
+ try:
80
+ df = pd.read_csv(file_path)
81
+ llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
82
+ agent = create_pandas_dataframe_agent(llm, df, verbose=True)
83
+ return agent
84
+ except Exception as e:
85
+ print(f"Error analyzing spreadsheet: {e}")
86
+ return None
87
+
88
+ def query_spreadsheet(agent, query):
89
+ """
90
+ Queries the spreadsheet using the agent.
91
+
92
+ Args:
93
+ agent: The Langchain agent object.
94
+ query: The question to ask the spreadsheet.
95
+
96
+ Returns:
97
+ The answer to the query.
98
+ """
99
+ try:
100
+ answer = agent.run(query)
101
+ return answer
102
+ except Exception as e:
103
+ print(f"Error querying spreadsheet: {e}")
104
+ return "Sorry, I couldn't find an answer to your question in the spreadsheet."
knowledge_base.py CHANGED
@@ -15,18 +15,30 @@ def get_retriever():
15
  try:
16
  embeddings = HuggingFaceEmbeddings(model_name=config.EMBEDDING_MODEL_NAME)
17
 
 
18
  if os.path.exists(config.FAISS_INDEX_PATH):
19
- print(f" Loading existing FAISS index from {config.FAISS_INDEX_PATH}...")
20
- db = FAISS.load_local(config.FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
21
- else:
22
- print(f"⚠️ FAISS index not found. Building a new one from {config.KNOWLEDGE_BASE_PATH}...")
23
- loader = TextLoader(config.KNOWLEDGE_BASE_PATH)
24
- documents = loader.load()
25
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
26
- docs = text_splitter.split_documents(documents)
27
- db = FAISS.from_documents(docs, embeddings)
28
- db.save_local(config.FAISS_INDEX_PATH)
29
- print(f"✅ New FAISS index built and saved to {config.FAISS_INDEX_PATH}.")
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  retriever = db.as_retriever(search_kwargs={"k": 1})
32
  print("✅ RAG knowledge base and retriever created successfully!")
 
15
  try:
16
  embeddings = HuggingFaceEmbeddings(model_name=config.EMBEDDING_MODEL_NAME)
17
 
18
+ # Force rebuild of the FAISS index
19
  if os.path.exists(config.FAISS_INDEX_PATH):
20
+ print(f"🗑️ Deleting existing FAISS index from {config.FAISS_INDEX_PATH}...")
21
+ import shutil
22
+ shutil.rmtree(config.FAISS_INDEX_PATH)
23
+
24
+ print(f"⚠️ Building a new FAISS index from all files in {config.KNOWLEDGE_BASE_PATH}...")
25
+
26
+ documents = []
27
+ data_path = config.KNOWLEDGE_BASE_PATH
28
+ for file_name in os.listdir(data_path):
29
+ file_path = os.path.join(data_path, file_name)
30
+ if os.path.isfile(file_path) and file_name.endswith('.txt'):
31
+ print(f" - Loading {file_name}...")
32
+ loader = TextLoader(file_path)
33
+ documents.extend(loader.load())
34
+
35
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
36
+ docs = text_splitter.split_documents(documents)
37
+
38
+ print("\n✨ Creating new FAISS index...")
39
+ db = FAISS.from_documents(docs, embeddings)
40
+ db.save_local(config.FAISS_INDEX_PATH)
41
+ print(f"✅ New FAISS index built and saved to {config.FAISS_INDEX_PATH}.")
42
 
43
  retriever = db.as_retriever(search_kwargs={"k": 1})
44
  print("✅ RAG knowledge base and retriever created successfully!")
knowledge_base_data/comic_relief.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ "Wetin My Eye See So?"
2
+ "Ah! Oga/Madam farmer, this one pass my power o. I don look this picture soteh my eye dey turn. E be like say this thing no be corn at all o, or maybe na some new style of corn wey dem just invent for another planet. Abeg, you fit try another picture? Make my brain no go knock before you come back. No vex!"
knowledge_base_data/healthy_maize_remedy.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ "So your farm don produce beta corn? Oya, make we see different ways you fit enjoy am with your family. Corn no be for roasting alone!"
2
+ "Better Ways to Enjoy Your Healthy Corn (Maize)"
3
+ "Agbado (Roasted Corn) and Ube (Pear):Dis one na classic street food for Naija. Just roast your fresh corn over charcoal fire until e brown small. Eat am with soft pear wey you don roast small or boil inside hot water. Di sweetness of di corn and di creamy pear na match made in heaven!"
4
+ "Boiled Corn: Simple and sweet. Just remove di husk (di green leaf), put di corn inside pot with water and small salt. Cook am until di corn soft. You fit chop am like dat or with coconut. E dey very sweet and filling."
5
+ "Pap (Akamu or Ogi): For dis one, you go need dry corn. Soak di corn for water for like two or three days until e soft. Grind am well well into a paste. Use clean cloth or sieve to separate di smooth paste from di chaff (di rough part). Allow di smooth paste to siddon and ferment small for one day. To prepare am, just mix small of di paste with cold water, then pour hot water on top and stir fast fast until e thick. Enjoy am with akara, moin moin, or milk and sugar."
6
+ "Tuwo Masara:This na like swallow for northern people. You go grind dry corn into a fine powder (corn flour). Put water for pot and make e boil. Mix small of di corn flour with cold water to make a paste, then pour am inside di boiling water and stir well. As e de thick, de add more of di dry flour small small and de turn am with turning stick until e strong like semo or eba. Serve am with any soup like Miyan Kuka or Miyan Taushe."
7
+ "Egusi and Corn Soup:You fit add fresh corn to your egusi soup! When you don fry your egusi finish and add your meat and fish, just cut fresh corn from di cob and pour am inside di soup. Allow am to cook for like 10-15 minutes. Di sweetness of di corn go make your egusi soup taste different and special."
knowledge_base_data/maize_phosphorus_deficiency_remedy.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ "How You Fit Solve Phosphorus Problem for Your Corn (Maize)"
2
+ "If your corn leaves de turn purple or dark green, especially when di plant still small, e fit be say phosphorus no reach am. Phosphorus be like power food for di plant root and for making seed."
3
+ "Wetin You Go Do Sharp Sharp (Short-Term Solution)"
4
+ "Bone Meal: Go market, buy bone meal. Na ground-up animal bone and e full with phosphorus. Sprinkle small quantity around di base of your corn plant and mix am small with di soil. No let am touch di plant stem direct."
5
+ "Fish Fertilizer (Fish Tea): If you fit get fish head or bones, soak dem inside water for some days. Di water go turn to strong fertilizer. Mix one cup of this fish tea with ten cups of plain water, and use am water your corn one time in a week."
6
+ "Wetin You Go Do for Future Planting (Long-Term Solution)"
7
+ "Chicken Manure (Fowl Yansh): Before you plant next time, make sure you add well-decayed chicken manure to your soil. Fowl yansh get plenty phosphorus. No use fresh one, e dey too strong and e go burn your plant. Make sure e don dry well well."
8
+ "Plant Legumes: Plant beans (cowpea) or groundnut for di land before you plant corn again. Dis plants de help make di soil rich and e go help free up phosphorus for di next crop."
9
+ "Check Your Soil pH: Sometimes, di phosphorus dey inside di soil but di soil too strong (acidic) for di plant to chop am. You fit add small wood ash to di soil before you plant. E go help balance di soil and make di phosphorus available for di corn."
10
+ "Remember, small small na im dem de take chop hot soup. Start with small quantity of fertilizer, watch your plant, and add more if you need am."
miscellaneous/offline.md ADDED
@@ -0,0 +1,690 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import pandas as pd
4
+
5
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
6
+ import subprocess
7
+ import numpy as np
8
+
9
+ from database import check_if_indexed
10
+ from create_index import create_initial_index as build_secure_index
11
+ from search import search as secure_search
12
+ from ingest_document import ingest_pdf
13
+ import streamlit as st
14
+
15
+ st.title("Aura-Mind: Your Offline AI Farming Companion")
16
+
17
+ # --- Knowledge Base Management ---
18
+ with st.sidebar:
19
+ st.header("Knowledge Base")
20
+ if st.button("Rebuild Initial Knowledge Base"):
21
+ with st.spinner("Deleting old base and building new one..."):
22
+ docs = {
23
+ "Healthy Maize Plant": "For a Healthy Maize Plant, ensure proper watering and sunlight. No special remedy is needed. Continue good farming practices.",
24
+ "Maize Phosphorus Deficiency": "Phosphorus deficiency in maize is characterized by stunted growth and purplish discoloration of leaves. To remedy this, apply a phosphorus-rich fertilizer like DAP (Di-Ammonium Phosphate) or bone meal to the soil. Follow package instructions for application rates."
25
+ }
26
+ create_initial_index(docs)
27
+ st.success("Initial knowledge base rebuilt!")
28
+
29
+ st.markdown("---")
30
+ st.subheader("Add Your Own Knowledge")
31
+ uploaded_pdf = st.file_uploader("Upload a PDF document", type="pdf")
32
+ if uploaded_pdf is not None:
33
+ # Save the uploaded file temporarily to pass its path
34
+ temp_file_path = os.path.join(".", uploaded_pdf.name)
35
+ with open(temp_file_path, "wb") as f:
36
+ f.write(uploaded_pdf.getbuffer())
37
+
38
+ with st.spinner(f"Ingesting '{uploaded_pdf.name}'... This may take a while for large documents."):
39
+ ingest_pdf(temp_file_path, uploaded_pdf.name)
40
+
41
+ st.success(f"Successfully added '{uploaded_pdf.name}' to your knowledge base!")
42
+ # Clean up the temporary file
43
+ os.remove(temp_file_path)
44
+
45
+
46
+ # Check if the index exists. If not, offer to build it.
47
+ if not check_if_indexed():
48
+ st.warning("Local knowledge base not found. Please build it from the sidebar to enable recommendations.")
49
+ if st.button("Build Local Knowledge Base"):
50
+ document_files = ["healthy_maize_remedy.txt", "maize_phosphorus_deficiency_remedy.txt", "comic_relief.txt"]
51
+ documents_content = []
52
+ for file_path in document_files:
53
+ try:
54
+ with open(file_path, 'r', encoding='utf-8') as f:
55
+ documents_content.append(f.read())
56
+ except FileNotFoundError:
57
+ st.error(f"Required file not found: {file_path}")
58
+
59
+ with st.spinner("Building secure index... This may take a moment."):
60
+ build_secure_index(documents_content)
61
+ st.success("Secure knowledge base built successfully!")
62
+ st.rerun()
63
+
64
+ # --- Performance Tracking Setup ---
65
+ # Initialize session state for storing performance metrics if it doesn't exist.
66
+ if 'vlm_performance_data' not in st.session_state:
67
+ st.session_state.vlm_performance_data = []
68
+ if 'tts_performance_data' not in st.session_state:
69
+ st.session_state.tts_performance_data = []
70
+
71
+ # Audio input
72
+ audio_file = st.audio_input("Record your audio message")
73
+
74
+ # Image input: upload only (no webcam)
75
+ uploaded_image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
76
+
77
+ # Save files if provided
78
+ image_path = None
79
+
80
+ if uploaded_image:
81
+ image_path = "user_image.png"
82
+ with open(image_path, "wb") as f:
83
+ f.write(uploaded_image.getbuffer())
84
+ st.image(uploaded_image)
85
+
86
+ # Model inference
87
+ if st.button("Run Model") and audio_path and image_path:
88
+ import mlx.core as mx
89
+ import gc
90
+ from mlx_vlm import load, generate
91
+ from mlx_vlm.prompt_utils import apply_chat_template
92
+
93
+ model_path = "./finetuned_model_for_conversion"
94
+ try:
95
+ model, processor = load(model_path)
96
+ config = model.config
97
+
98
+ prompt = "Classify the condition of the maize plant. Choose from Healthy Maize Plant, Maize Phosphorus Deficiency."
99
+ formatted_prompt = apply_chat_template(
100
+ processor, config, prompt,
101
+ num_images=1,
102
+ )
103
+
104
+ output = generate(
105
+ model,
106
+ processor,
107
+ formatted_prompt,
108
+ image=[image_path],
109
+ max_tokens=20,
110
+ verbose=True
111
+ )
112
+
113
+ # --- Capture VLM Performance ---
114
+ vlm_stats = {
115
+ "Prompt Tokens": output.prompt_tokens,
116
+ "Generation Tokens": output.generation_tokens,
117
+ "Prompt TPS": output.prompt_tps,
118
+ "Generation TPS": output.generation_tps,
119
+ "Peak Memory (GB)": output.peak_memory
120
+ }
121
+ st.session_state.vlm_performance_data.append(vlm_stats)
122
+
123
+
124
+ st.markdown("### Diagnosis")
125
+ st.write(output.text)
126
+
127
+ query = output.text.strip()
128
+ search_results = secure_search(query, k=3)
129
+
130
+ rag_text_for_display = None
131
+ tts_text = query # Default to VLM output if no remedy is found
132
+
133
+ if search_results:
134
+ for result in search_results:
135
+ st.markdown("### Recommended Actions")
136
+ if result['type'] == 'text':
137
+ st.markdown(result['content'])
138
+ st.caption(f"Source: Text from page {result['page']}")
139
+ elif result['type'] == 'image':
140
+ st.image(result['content'], caption=f"Source: Image from page {result['page']}")
141
+ else:
142
+ st.warning("No relevant information found in your local knowledge base.")
143
+
144
+
145
+ # --- Memory Cleanup ---
146
+ # Explicitly delete the large vision model and processor to free up
147
+ # memory before loading the TTS model. This is crucial on systems
148
+ # with limited RAM to prevent crashes.
149
+ with st.spinner("Clearing vision model from memory..."):
150
+ del model
151
+ del processor
152
+ gc.collect()
153
+
154
+ # --- Text-to-Speech Generation ---
155
+ st.markdown("### Generated Speech")
156
+ try:
157
+ # Get the absolute path to the project directory for robust pathing
158
+ project_root = os.path.dirname(os.path.abspath(__file__))
159
+ # In the Docker container, the TTS virtual environment is at a fixed path.
160
+ tts_env_python = "/app/venv_tts/bin/python"
161
+ tts_script = os.path.join(project_root, "tts_service", "run_tts_service.py")
162
+
163
+ # IMPORTANT: Replace with the actual path to your downloaded model
164
+ # Make model path absolute to avoid ambiguity in the subprocess.
165
+ tts_model_path = os.path.join(project_root, "orpheus-3b-pidgin-voice-v1")
166
+
167
+ # Check if the model path exists
168
+ if not os.path.exists(tts_env_python):
169
+ st.error("TTS virtual environment not found. Please run the setup instructions in Step 3.")
170
+ elif not os.path.exists(tts_model_path):
171
+ st.error(f"TTS model not found at path: {tts_model_path}")
172
+ st.info("Please run `python3 download_model.py` to download the TTS model.")
173
+ else:
174
+ # Make output path absolute to ensure we know where to find it.
175
+ speech_output_path = os.path.join(project_root, "generated_speech.wav")
176
+
177
+ # Sanitize the text for the TTS model by replacing newlines with spaces.
178
+ # This prevents errors with models that can't handle multi-line input.
179
+ sanitized_tts_text = tts_text.replace('\n', ' ')
180
+
181
+ # --- Call the TTS script in the separate environment ---
182
+ command = [
183
+ tts_env_python,
184
+ tts_script,
185
+ "--text", sanitized_tts_text,
186
+ "--model-path", tts_model_path,
187
+ "--output-path", speech_output_path
188
+ ]
189
+ with st.spinner("Generating speech..."):
190
+ result = subprocess.run(command, capture_output=True, text=True, check=False)
191
+
192
+ # --- Capture TTS Performance ---
193
+ # Extract performance metrics from the TTS script's stdout.
194
+ tts_log = result.stdout
195
+ if tts_log:
196
+ try:
197
+ # Example of parsing: "Generation Speed: 123.45 tokens/sec"
198
+ speed_line = [line for line in tts_log.split('\n') if "Generation Speed" in line]
199
+ if speed_line:
200
+ tts_speed = float(speed_line[0].split(':')[1].strip().split()[0])
201
+ st.session_state.tts_performance_data.append({"Generation Speed (tokens/sec)": tts_speed})
202
+ except (IndexError, ValueError) as e:
203
+ st.warning(f"Could not parse TTS performance data: {e}")
204
+
205
+
206
+ if result.returncode == 0:
207
+ # The TTS script appends `_000` to the filename. We need to account for that.
208
+ base, ext = os.path.splitext(speech_output_path)
209
+ actual_speech_path = f"{base}_000{ext}"
210
+
211
+ # Check if the file was actually created before trying to open it.
212
+ if os.path.exists(actual_speech_path):
213
+ # Read the generated audio file into a bytes object
214
+ # to prevent race conditions with Streamlit's file handling.
215
+ with open(actual_speech_path, "rb") as audio_file:
216
+ audio_bytes = audio_file.read()
217
+ st.audio(audio_bytes, format="audio/wav")
218
+ st.success("Speech generated successfully!")
219
+ if result.stdout:
220
+ with st.expander("See TTS Log"):
221
+ st.code(result.stdout)
222
+ else:
223
+ st.error("Generated speech file not found. The TTS script might have failed silently.")
224
+ st.code(f"Expected file at: {actual_speech_path}")
225
+ st.code(f"TTS Service stdout:\n{result.stdout}")
226
+ st.code(f"TTS Service stderr:\n{result.stderr}")
227
+ else:
228
+ st.error("An error occurred during speech generation.")
229
+ st.code(f"TTS Service Error:\n{result.stderr}")
230
+
231
+ except Exception as e:
232
+ st.error(f"An error occurred during speech generation: {e}")
233
+ except FileNotFoundError:
234
+ st.error(f"Error: Model not found at path '{model_path}'.")
235
+ except Exception as e:
236
+ st.error(f"An error occurred: {e}")
237
+ else:
238
+ st.info("Please record audio and provide an image to run the model.")
239
+
240
+ # --- Performance Dashboard ---
241
+ st.sidebar.title("On-Device Performance Dashboard")
242
+
243
+ if st.session_state.vlm_performance_data:
244
+ st.sidebar.markdown("### Vision & Language Model (VLM) Performance")
245
+ vlm_df = pd.DataFrame(st.session_state.vlm_performance_data)
246
+ st.sidebar.dataframe(vlm_df)
247
+
248
+ st.sidebar.markdown("**VLM Performance Over Time**")
249
+ st.sidebar.line_chart(vlm_df[["Prompt TPS", "Generation TPS"]])
250
+ st.sidebar.line_chart(vlm_df[["Peak Memory (GB)"]])
251
+
252
+ if st.session_state.tts_performance_data:
253
+ st.sidebar.markdown("### Text-to-Speech (TTS) Performance")
254
+ tts_df = pd.DataFrame(st.session_state.tts_performance_data)
255
+ st.sidebar.dataframe(tts_df)
256
+
257
+ st.sidebar.markdown("**TTS Performance Over Time**")
258
+ st.sidebar.line_chart(tts_df)
259
+
260
+ if st.sidebar.button("Clear Performance Data"):
261
+ st.session_state.vlm_performance_data = []
262
+ st.session_state.tts_performance_data = []
263
+ st.rerun()
264
+
265
+
266
+ --------------------------------------------------
267
+
268
+ # create_index.py
269
+
270
+ import numpy as np
271
+ import faiss
272
+ from sentence_transformers import SentenceTransformer
273
+ import os
274
+
275
+ from database import init_db, get_db_connection, INDEX_FILE, DB_FILE, delete_database_and_index
276
+ from security import encrypt_data
277
+
278
+ # Use a CLIP model that can handle both text and images
279
+ MODEL_NAME = 'clip-ViT-B-32'
280
+
281
+ def create_initial_index(documents_dict):
282
+ """
283
+ Creates an initial encrypted, persistent index from a dictionary of text documents.
284
+ This will delete any existing database to ensure a clean start.
285
+ """
286
+ print("Performing a clean rebuild of the knowledge base...")
287
+ delete_database_and_index()
288
+ init_db()
289
+
290
+ conn = get_db_connection()
291
+ cursor = conn.cursor()
292
+ model = SentenceTransformer(MODEL_NAME)
293
+
294
+ all_chunks = []
295
+ all_embeddings = []
296
+
297
+ for name, content in documents_dict.items():
298
+ # Add document to documents table
299
+ cursor.execute("INSERT INTO documents (name) VALUES (?)", (name,))
300
+ doc_id = cursor.lastrowid
301
+
302
+ # For initial docs, we treat the whole content as one chunk
303
+ chunk_text = content
304
+ all_chunks.append((doc_id, 'text', encrypt_data(chunk_text.encode('utf-8')), 1))
305
+
306
+ # Create text embedding
307
+ text_embedding = model.encode([chunk_text])
308
+ all_embeddings.append(text_embedding)
309
+
310
+ # Batch insert chunks
311
+ cursor.executemany(
312
+ "INSERT INTO chunks (doc_id, content_type, encrypted_content, page_num) VALUES (?, ?, ?, ?)",
313
+ all_chunks
314
+ )
315
+ conn.commit()
316
+ conn.close()
317
+
318
+ if not all_embeddings:
319
+ print("No content to index.")
320
+ return
321
+
322
+ # Create and save the FAISS index
323
+ embeddings_np = np.vstack(all_embeddings).astype('float32')
324
+ dimension = embeddings_np.shape[1]
325
+ index = faiss.IndexFlatL2(dimension)
326
+ index.add(embeddings_np)
327
+ faiss.write_index(index, INDEX_FILE)
328
+
329
+ print(f"Initial encrypted index created with {len(all_chunks)} chunks.")
330
+ print(f"Database: {DB_FILE}, FAISS Index: {INDEX_FILE}")
331
+
332
+
333
+
334
+ if __name__ == '__main__':
335
+ document_files = ["healthy_maize_remedy.txt", "maize_phosphorus_deficiency_remedy.txt", "comic_relief.txt"]
336
+ documents_content = []
337
+ for file_path in document_files:
338
+ try:
339
+ with open(file_path, 'r', encoding='utf-8') as f:
340
+ documents_content.append(f.read())
341
+ except FileNotFoundError:
342
+ print(f"Warning: File not found, skipping: {file_path}")
343
+
344
+ create_initial_index(documents_content)
345
+ --------------------------------------------------
346
+
347
+
348
+ # database.py
349
+
350
+ import sqlite3
351
+ import os
352
+
353
+ DB_FILE = "auramind_local.db"
354
+ INDEX_FILE = "auramind_faiss.index"
355
+
356
+ def init_db():
357
+ """
358
+ Initializes a more robust database schema for multimodal data.
359
+ - 'documents' table tracks the source files.
360
+ - 'chunks' table stores the individual encrypted text/image chunks.
361
+ """
362
+ conn = sqlite3.connect(DB_FILE)
363
+ cursor = conn.cursor()
364
+
365
+ # Table to track the source documents (e.g., 'healthy_maize.txt', 'user_guide.pdf')
366
+ cursor.execute('''
367
+ CREATE TABLE IF NOT EXISTS documents (
368
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
369
+ name TEXT NOT NULL UNIQUE
370
+ )
371
+ ''')
372
+
373
+ # Table to store each chunk of content (text or image)
374
+ # The faiss_id will correspond to the row number in the FAISS index
375
+ cursor.execute('''
376
+ CREATE TABLE IF NOT EXISTS chunks (
377
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
378
+ doc_id INTEGER,
379
+ content_type TEXT NOT NULL, -- 'text' or 'image'
380
+ encrypted_content BLOB NOT NULL,
381
+ page_num INTEGER,
382
+ FOREIGN KEY (doc_id) REFERENCES documents (id)
383
+ )
384
+ ''')
385
+ conn.commit()
386
+ conn.close()
387
+
388
+ def get_db_connection():
389
+ """Establishes a connection to the database."""
390
+ conn = sqlite3.connect(DB_FILE)
391
+ conn.row_factory = sqlite3.Row
392
+ return conn
393
+
394
+ def check_if_indexed():
395
+ """Checks if the initial database and index file exist."""
396
+ # A basic check. A more robust check might query the db for content.
397
+ return os.path.exists(DB_FILE) and os.path.exists(INDEX_FILE)
398
+
399
+ def delete_database_and_index():
400
+ """Deletes existing db and index files for a clean rebuild."""
401
+ if os.path.exists(DB_FILE):
402
+ os.remove(DB_FILE)
403
+ print(f"Removed old database: {DB_FILE}")
404
+ if os.path.exists(INDEX_FILE):
405
+ os.remove(INDEX_FILE)
406
+ print(f"Removed old index: {INDEX_FILE}")
407
+
408
+ ---------------------------------------------------------
409
+
410
+ # database.py
411
+
412
+ import sqlite3
413
+ import os
414
+
415
+ DB_FILE = "auramind_local.db"
416
+ INDEX_FILE = "auramind_faiss.index"
417
+
418
+ def init_db():
419
+ """
420
+ Initializes a more robust database schema for multimodal data.
421
+ - 'documents' table tracks the source files.
422
+ - 'chunks' table stores the individual encrypted text/image chunks.
423
+ """
424
+ conn = sqlite3.connect(DB_FILE)
425
+ cursor = conn.cursor()
426
+
427
+ # Table to track the source documents (e.g., 'healthy_maize.txt', 'user_guide.pdf')
428
+ cursor.execute('''
429
+ CREATE TABLE IF NOT EXISTS documents (
430
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
431
+ name TEXT NOT NULL UNIQUE
432
+ )
433
+ ''')
434
+
435
+ # Table to store each chunk of content (text or image)
436
+ # The faiss_id will correspond to the row number in the FAISS index
437
+ cursor.execute('''
438
+ CREATE TABLE IF NOT EXISTS chunks (
439
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
440
+ doc_id INTEGER,
441
+ content_type TEXT NOT NULL, -- 'text' or 'image'
442
+ encrypted_content BLOB NOT NULL,
443
+ page_num INTEGER,
444
+ FOREIGN KEY (doc_id) REFERENCES documents (id)
445
+ )
446
+ ''')
447
+ conn.commit()
448
+ conn.close()
449
+
450
+ def get_db_connection():
451
+ """Establishes a connection to the database."""
452
+ conn = sqlite3.connect(DB_FILE)
453
+ conn.row_factory = sqlite3.Row
454
+ return conn
455
+
456
+ def check_if_indexed():
457
+ """Checks if the initial database and index file exist."""
458
+ # A basic check. A more robust check might query the db for content.
459
+ return os.path.exists(DB_FILE) and os.path.exists(INDEX_FILE)
460
+
461
+ def delete_database_and_index():
462
+ """Deletes existing db and index files for a clean rebuild."""
463
+ if os.path.exists(DB_FILE):
464
+ os.remove(DB_FILE)
465
+ print(f"Removed old database: {DB_FILE}")
466
+ if os.path.exists(INDEX_FILE):
467
+ os.remove(INDEX_FILE)
468
+ print(f"Removed old index: {INDEX_FILE}")
469
+ --------------------------------------------------
470
+
471
+
472
+ # search.py
473
+
474
+ import faiss
475
+ import numpy as np
476
+ from sentence_transformers import SentenceTransformer
477
+ from PIL import Image
478
+ import io
479
+
480
+ from database import get_db_connection, INDEX_FILE, check_if_indexed
481
+ from security import decrypt_data
482
+
483
+ MODEL_NAME = 'clip-ViT-B-32'
484
+
485
+ def search(query, k=1):
486
+ """
487
+ Searches the multimodal FAISS index. The query can be text, and the result can be text or an image.
488
+ """
489
+ if not check_if_indexed():
490
+ return []
491
+
492
+ model = SentenceTransformer(MODEL_NAME)
493
+ index = faiss.read_index(INDEX_FILE)
494
+
495
+ # Create an embedding for the text query
496
+ query_embedding = model.encode([query]).astype('float32')
497
+ distances, indices = index.search(query_embedding, k)
498
+
499
+ results = []
500
+ conn = get_db_connection()
501
+ for i, faiss_id in enumerate(indices[0]):
502
+ if faiss_id != -1:
503
+ # The faiss_id is the row number, which corresponds to the chunk's primary key 'id'
504
+ sql_id = int(faiss_id) + 1
505
+
506
+ chunk_record = conn.execute('SELECT * FROM chunks WHERE id = ?', (sql_id,)).fetchone()
507
+
508
+ if chunk_record:
509
+ content_type = chunk_record['content_type']
510
+ decrypted_content_bytes = decrypt_data(chunk_record['encrypted_content'])
511
+
512
+ # Prepare content based on its type
513
+ if content_type == 'text':
514
+ content = decrypted_content_bytes.decode('utf-8')
515
+ elif content_type == 'image':
516
+ content = Image.open(io.BytesIO(decrypted_content_bytes))
517
+
518
+ results.append({
519
+ 'distance': distances[0][i],
520
+ 'content': content,
521
+ 'type': content_type,
522
+ 'page': chunk_record['page_num']
523
+ })
524
+ conn.close()
525
+ return results
526
+
527
+ -----------------------------------------------------------
528
+
529
+
530
+
531
+ import os
532
+ from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
533
+ from cryptography.hazmat.primitives import padding
534
+ from cryptography.hazmat.backends import default_backend
535
+ import base64
536
+
537
+ # In a real mobile app, this key would be securely managed by
538
+ # the Android Keystore or iOS Keychain. For this skeleton, we'll
539
+ # use an environment variable for demonstration.
540
+ SECRET_KEY = os.environ.get("AURA_MIND_SECRET_KEY", "a_default_secret_key_32_bytes_!!").encode()
541
+
542
+ if len(SECRET_KEY) != 32:
543
+ raise ValueError("SECRET_KEY must be 32 bytes long for AES-256.")
544
+
545
+ def encrypt_data(data: bytes) -> bytes:
546
+ """Encrypts data using AES-CBC."""
547
+ iv = os.urandom(16)
548
+ padder = padding.PKCS7(algorithms.AES.block_size).padder()
549
+ padded_data = padder.update(data) + padder.finalize()
550
+
551
+ cipher = Cipher(algorithms.AES(SECRET_KEY), modes.CBC(iv), backend=default_backend())
552
+ encryptor = cipher.encryptor()
553
+ encrypted_data = encryptor.update(padded_data) + encryptor.finalize()
554
+ return iv + encrypted_data
555
+
556
+ def decrypt_data(encrypted_data_with_iv: bytes) -> bytes:
557
+ """Decrypts data using AES-CBC."""
558
+ iv = encrypted_data_with_iv[:16]
559
+ encrypted_data = encrypted_data_with_iv[16:]
560
+
561
+ cipher = Cipher(algorithms.AES(SECRET_KEY), modes.CBC(iv), backend=default_backend())
562
+ decryptor = cipher.decryptor()
563
+ padded_data = decryptor.update(encrypted_data) + decryptor.finalize()
564
+
565
+ unpadder = padding.PKCS7(algorithms.AES.block_size).unpadder()
566
+ data = unpadder.update(padded_data) + unpadder.finalize()
567
+ return data
568
+
569
+ -------------------------------- ------------------
570
+ # ingest_document.py
571
+
572
+ import faiss
573
+ from sentence_transformers import SentenceTransformer
574
+ import fitz # PyMuPDF
575
+ from PIL import Image
576
+ import io
577
+ import numpy as np
578
+ import os
579
+
580
+ from database import get_db_connection, INDEX_FILE
581
+ from security import encrypt_data
582
+
583
+ MODEL_NAME = 'clip-ViT-B-32'
584
+
585
+ def ingest_pdf(file_path, file_name):
586
+ """Parses a PDF, encrypts its content (text+images), and adds it to the database and FAISS index."""
587
+ print(f"Starting ingestion for: {file_name}")
588
+ model = SentenceTransformer(MODEL_NAME)
589
+ conn = get_db_connection()
590
+ cursor = conn.cursor()
591
+
592
+ # Add document to documents table, or get its ID if it exists
593
+ try:
594
+ cursor.execute("INSERT INTO documents (name) VALUES (?)", (file_name,))
595
+ doc_id = cursor.lastrowid
596
+ except conn.IntegrityError:
597
+ print("Document already exists in DB. Skipping doc table insert.")
598
+ doc_id = cursor.execute("SELECT id FROM documents WHERE name=?", (file_name,)).fetchone()['id']
599
+
600
+ doc = fitz.open(file_path)
601
+ new_embeddings = []
602
+
603
+ # Load existing FAISS index or create a new one
604
+ if os.path.exists(INDEX_FILE):
605
+ index = faiss.read_index(INDEX_FILE)
606
+ else:
607
+ # Get dimension from the model if index is new
608
+ dimension = model.encode(["test"]).shape[1]
609
+ index = faiss.IndexFlatL2(dimension)
610
+
611
+ for page_num, page in enumerate(doc):
612
+ # 1. Process Text
613
+ text = page.get_text()
614
+ if text.strip():
615
+ encrypted_text = encrypt_data(text.encode('utf-8'))
616
+ cursor.execute(
617
+ "INSERT INTO chunks (doc_id, content_type, encrypted_content, page_num) VALUES (?, ?, ?, ?)",
618
+ (doc_id, 'text', encrypted_text, page_num + 1)
619
+ )
620
+ text_embedding = model.encode([text])
621
+ new_embeddings.append(text_embedding)
622
+
623
+ # 2. Process Images
624
+ image_list = page.get_images(full=True)
625
+ for img_index, img in enumerate(image_list):
626
+ xref = img[0]
627
+ base_image = doc.extract_image(xref)
628
+ image_bytes = base_image["image"]
629
+
630
+ encrypted_image = encrypt_data(image_bytes)
631
+ cursor.execute(
632
+ "INSERT INTO chunks (doc_id, content_type, encrypted_content, page_num) VALUES (?, ?, ?, ?)",
633
+ (doc_id, 'image', encrypted_image, page_num + 1)
634
+ )
635
+ pil_image = Image.open(io.BytesIO(image_bytes))
636
+ image_embedding = model.encode(pil_image)
637
+ new_embeddings.append(image_embedding.reshape(1, -1))
638
+
639
+ conn.commit()
640
+ conn.close()
641
+
642
+ if new_embeddings:
643
+ # Add new embeddings to the FAISS index
644
+ embeddings_np = np.vstack(new_embeddings).astype('float32')
645
+ index.add(embeddings_np)
646
+ faiss.write_index(index, INDEX_FILE)
647
+ print(f"Successfully ingested {file_name} and added {len(new_embeddings)} new chunks to the knowledge base.")
648
+ else:
649
+ print(f"No new content found to ingest in {file_name}.")
650
+
651
+
652
+
653
+ --------------------------------------------------------
654
+ maize_phosphorus_deficiency_remedy.txt
655
+
656
+ "How You Fit Solve Phosphorus Problem for Your Corn (Maize)"
657
+ "If your corn leaves de turn purple or dark green, especially when di plant still small, e fit be say phosphorus no reach am. Phosphorus be like power food for di plant root and for making seed."
658
+ "Wetin You Go Do Sharp Sharp (Short-Term Solution)"
659
+ "Bone Meal: Go market, buy bone meal. Na ground-up animal bone and e full with phosphorus. Sprinkle small quantity around di base of your corn plant and mix am small with di soil. No let am touch di plant stem direct."
660
+ "Fish Fertilizer (Fish Tea): If you fit get fish head or bones, soak dem inside water for some days. Di water go turn to strong fertilizer. Mix one cup of this fish tea with ten cups of plain water, and use am water your corn one time in a week."
661
+ "Wetin You Go Do for Future Planting (Long-Term Solution)"
662
+ "Chicken Manure (Fowl Yansh): Before you plant next time, make sure you add well-decayed chicken manure to your soil. Fowl yansh get plenty phosphorus. No use fresh one, e dey too strong and e go burn your plant. Make sure e don dry well well."
663
+ "Plant Legumes: Plant beans (cowpea) or groundnut for di land before you plant corn again. Dis plants de help make di soil rich and e go help free up phosphorus for di next crop."
664
+ "Check Your Soil pH: Sometimes, di phosphorus dey inside di soil but di soil too strong (acidic) for di plant to chop am. You fit add small wood ash to di soil before you plant. E go help balance di soil and make di phosphorus available for di corn."
665
+ "Remember, small small na im dem de take chop hot soup. Start with small quantity of fertilizer, watch your plant, and add more if you need am."
666
+
667
+ ----------------------------------------------------
668
+
669
+ healthy_maize_remedy.txt
670
+
671
+ "So your farm don produce beta corn? Oya, make we see different ways you fit enjoy am with your family. Corn no be for roasting alone!"
672
+ "Better Ways to Enjoy Your Healthy Corn (Maize)"
673
+ "Agbado (Roasted Corn) and Ube (Pear):Dis one na classic street food for Naija. Just roast your fresh corn over charcoal fire until e brown small. Eat am with soft pear wey you don roast small or boil inside hot water. Di sweetness of di corn and di creamy pear na match made in heaven!"
674
+ "Boiled Corn: Simple and sweet. Just remove di husk (di green leaf), put di corn inside pot with water and small salt. Cook am until di corn soft. You fit chop am like dat or with coconut. E dey very sweet and filling."
675
+ "Pap (Akamu or Ogi): For dis one, you go need dry corn. Soak di corn for water for like two or three days until e soft. Grind am well well into a paste. Use clean cloth or sieve to separate di smooth paste from di chaff (di rough part). Allow di smooth paste to siddon and ferment small for one day. To prepare am, just mix small of di paste with cold water, then pour hot water on top and stir fast fast until e thick. Enjoy am with akara, moin moin, or milk and sugar."
676
+ "Tuwo Masara:This na like swallow for northern people. You go grind dry corn into a fine powder (corn flour). Put water for pot and make e boil. Mix small of di corn flour with cold water to make a paste, then pour am inside di boiling water and stir well. As e de thick, de add more of di dry flour small small and de turn am with turning stick until e strong like semo or eba. Serve am with any soup like Miyan Kuka or Miyan Taushe."
677
+ "Egusi and Corn Soup:You fit add fresh corn to your egusi soup! When you don fry your egusi finish and add your meat and fish, just cut fresh corn from di cob and pour am inside di soup. Allow am to cook for like 10-15 minutes. Di sweetness of di corn go make your egusi soup taste different and special."
678
+ ----------------------------------------------------
679
+
680
+ comic_relief.txt
681
+
682
+ "Wetin My Eye See So?"
683
+ "Ah! Oga/Madam farmer, this one pass my power o. I don look this picture soteh my eye dey turn. E be like say this thing no be corn at all o, or maybe na some new style of corn wey dem just invent for another planet. Abeg, you fit try another picture? Make my brain no go knock before you come back. No vex!"
684
+
685
+ ---------------------------------------------------
686
+
687
+
688
+
689
+
690
+