Spaces:
Sleeping
Sleeping
Commit
·
679d006
1
Parent(s):
032080e
ade
Browse files- app.py +4 -4
- config.py +1 -1
- document_analyzer.py +104 -0
- knowledge_base.py +23 -11
- knowledge_base_data/comic_relief.txt +2 -0
- knowledge_base_data/healthy_maize_remedy.txt +7 -0
- knowledge_base_data/maize_phosphorus_deficiency_remedy.txt +10 -0
- miscellaneous/offline.md +690 -0
app.py
CHANGED
|
@@ -260,10 +260,10 @@ if __name__ == "__main__":
|
|
| 260 |
else:
|
| 261 |
print("⚠️ Connected Mode disabled: ADK components not initialized.")
|
| 262 |
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
else:
|
| 268 |
print("⚠️ Farmer's Story Mode disabled: Story LLM not initialized.")
|
| 269 |
else:
|
|
|
|
| 260 |
else:
|
| 261 |
print("⚠️ Connected Mode disabled: ADK components not initialized.")
|
| 262 |
|
| 263 |
+
# Add the new Document Analysis UI
|
| 264 |
+
document_analysis_ui = create_document_analysis_ui()
|
| 265 |
+
interface_list.append(document_analysis_ui)
|
| 266 |
+
tab_titles.append("Document Analysis")
|
| 267 |
else:
|
| 268 |
print("⚠️ Farmer's Story Mode disabled: Story LLM not initialized.")
|
| 269 |
else:
|
config.py
CHANGED
|
@@ -6,7 +6,7 @@ ADAPTER_PATH = "surfiniaburger/maize-health-diagnosis-adapter"
|
|
| 6 |
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
| 7 |
|
| 8 |
# RAG and Knowledge Base
|
| 9 |
-
KNOWLEDGE_BASE_PATH = "
|
| 10 |
FAISS_INDEX_PATH = "faiss_index"
|
| 11 |
|
| 12 |
# Model Parameters
|
|
|
|
| 6 |
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
| 7 |
|
| 8 |
# RAG and Knowledge Base
|
| 9 |
+
KNOWLEDGE_BASE_PATH = "knowledge_base_data"
|
| 10 |
FAISS_INDEX_PATH = "faiss_index"
|
| 11 |
|
| 12 |
# Model Parameters
|
document_analyzer.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from PyPDF2 import PdfReader
|
| 3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 4 |
+
from langchain_community.vectorstores import FAISS
|
| 5 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 6 |
+
from langchain.chains.question_answering import load_qa_chain
|
| 7 |
+
from langchain_community.llms import HuggingFaceHub
|
| 8 |
+
import config
|
| 9 |
+
|
| 10 |
+
def analyze_pdf(file_path):
|
| 11 |
+
"""
|
| 12 |
+
Analyzes a PDF file and returns a question-answering chain.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
file_path: The path to the PDF file.
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
A Langchain QA chain object.
|
| 19 |
+
"""
|
| 20 |
+
try:
|
| 21 |
+
pdf_reader = PdfReader(file_path)
|
| 22 |
+
text = ""
|
| 23 |
+
for page in pdf_reader.pages:
|
| 24 |
+
text += page.extract_text()
|
| 25 |
+
|
| 26 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 27 |
+
chunk_size=1000,
|
| 28 |
+
chunk_overlap=200,
|
| 29 |
+
length_function=len
|
| 30 |
+
)
|
| 31 |
+
chunks = text_splitter.split_text(text=text)
|
| 32 |
+
|
| 33 |
+
embeddings = HuggingFaceEmbeddings(model_name=config.EMBEDDING_MODEL_NAME)
|
| 34 |
+
vector_store = FAISS.from_texts(chunks, embedding=embeddings)
|
| 35 |
+
|
| 36 |
+
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
|
| 37 |
+
chain = load_qa_chain(llm=llm, chain_type="stuff")
|
| 38 |
+
|
| 39 |
+
return chain, vector_store
|
| 40 |
+
|
| 41 |
+
except Exception as e:
|
| 42 |
+
print(f"Error analyzing PDF: {e}")
|
| 43 |
+
return None, None
|
| 44 |
+
|
| 45 |
+
def query_pdf(chain, vector_store, query):
|
| 46 |
+
"""
|
| 47 |
+
Queries the PDF using the QA chain.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
chain: The Langchain QA chain object.
|
| 51 |
+
vector_store: The FAISS vector store.
|
| 52 |
+
query: The question to ask the PDF.
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
The answer to the query.
|
| 56 |
+
"""
|
| 57 |
+
try:
|
| 58 |
+
docs = vector_store.similarity_search(query=query, k=3)
|
| 59 |
+
answer = chain.run(input_documents=docs, question=query)
|
| 60 |
+
return answer
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error querying PDF: {e}")
|
| 63 |
+
return "Sorry, I couldn't find an answer to your question in the PDF."
|
| 64 |
+
|
| 65 |
+
import pandas as pd
|
| 66 |
+
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
|
| 67 |
+
from langchain_community.llms import HuggingFaceHub
|
| 68 |
+
|
| 69 |
+
def analyze_spreadsheet(file_path):
|
| 70 |
+
"""
|
| 71 |
+
Analyzes a spreadsheet file and returns a question-answering agent.
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
file_path: The path to the spreadsheet file.
|
| 75 |
+
|
| 76 |
+
Returns:
|
| 77 |
+
A Langchain agent object.
|
| 78 |
+
"""
|
| 79 |
+
try:
|
| 80 |
+
df = pd.read_csv(file_path)
|
| 81 |
+
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
|
| 82 |
+
agent = create_pandas_dataframe_agent(llm, df, verbose=True)
|
| 83 |
+
return agent
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error analyzing spreadsheet: {e}")
|
| 86 |
+
return None
|
| 87 |
+
|
| 88 |
+
def query_spreadsheet(agent, query):
|
| 89 |
+
"""
|
| 90 |
+
Queries the spreadsheet using the agent.
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
agent: The Langchain agent object.
|
| 94 |
+
query: The question to ask the spreadsheet.
|
| 95 |
+
|
| 96 |
+
Returns:
|
| 97 |
+
The answer to the query.
|
| 98 |
+
"""
|
| 99 |
+
try:
|
| 100 |
+
answer = agent.run(query)
|
| 101 |
+
return answer
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(f"Error querying spreadsheet: {e}")
|
| 104 |
+
return "Sorry, I couldn't find an answer to your question in the spreadsheet."
|
knowledge_base.py
CHANGED
|
@@ -15,18 +15,30 @@ def get_retriever():
|
|
| 15 |
try:
|
| 16 |
embeddings = HuggingFaceEmbeddings(model_name=config.EMBEDDING_MODEL_NAME)
|
| 17 |
|
|
|
|
| 18 |
if os.path.exists(config.FAISS_INDEX_PATH):
|
| 19 |
-
print(f"
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
retriever = db.as_retriever(search_kwargs={"k": 1})
|
| 32 |
print("✅ RAG knowledge base and retriever created successfully!")
|
|
|
|
| 15 |
try:
|
| 16 |
embeddings = HuggingFaceEmbeddings(model_name=config.EMBEDDING_MODEL_NAME)
|
| 17 |
|
| 18 |
+
# Force rebuild of the FAISS index
|
| 19 |
if os.path.exists(config.FAISS_INDEX_PATH):
|
| 20 |
+
print(f"🗑️ Deleting existing FAISS index from {config.FAISS_INDEX_PATH}...")
|
| 21 |
+
import shutil
|
| 22 |
+
shutil.rmtree(config.FAISS_INDEX_PATH)
|
| 23 |
+
|
| 24 |
+
print(f"⚠️ Building a new FAISS index from all files in {config.KNOWLEDGE_BASE_PATH}...")
|
| 25 |
+
|
| 26 |
+
documents = []
|
| 27 |
+
data_path = config.KNOWLEDGE_BASE_PATH
|
| 28 |
+
for file_name in os.listdir(data_path):
|
| 29 |
+
file_path = os.path.join(data_path, file_name)
|
| 30 |
+
if os.path.isfile(file_path) and file_name.endswith('.txt'):
|
| 31 |
+
print(f" - Loading {file_name}...")
|
| 32 |
+
loader = TextLoader(file_path)
|
| 33 |
+
documents.extend(loader.load())
|
| 34 |
+
|
| 35 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 36 |
+
docs = text_splitter.split_documents(documents)
|
| 37 |
+
|
| 38 |
+
print("\n✨ Creating new FAISS index...")
|
| 39 |
+
db = FAISS.from_documents(docs, embeddings)
|
| 40 |
+
db.save_local(config.FAISS_INDEX_PATH)
|
| 41 |
+
print(f"✅ New FAISS index built and saved to {config.FAISS_INDEX_PATH}.")
|
| 42 |
|
| 43 |
retriever = db.as_retriever(search_kwargs={"k": 1})
|
| 44 |
print("✅ RAG knowledge base and retriever created successfully!")
|
knowledge_base_data/comic_relief.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"Wetin My Eye See So?"
|
| 2 |
+
"Ah! Oga/Madam farmer, this one pass my power o. I don look this picture soteh my eye dey turn. E be like say this thing no be corn at all o, or maybe na some new style of corn wey dem just invent for another planet. Abeg, you fit try another picture? Make my brain no go knock before you come back. No vex!"
|
knowledge_base_data/healthy_maize_remedy.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"So your farm don produce beta corn? Oya, make we see different ways you fit enjoy am with your family. Corn no be for roasting alone!"
|
| 2 |
+
"Better Ways to Enjoy Your Healthy Corn (Maize)"
|
| 3 |
+
"Agbado (Roasted Corn) and Ube (Pear):Dis one na classic street food for Naija. Just roast your fresh corn over charcoal fire until e brown small. Eat am with soft pear wey you don roast small or boil inside hot water. Di sweetness of di corn and di creamy pear na match made in heaven!"
|
| 4 |
+
"Boiled Corn: Simple and sweet. Just remove di husk (di green leaf), put di corn inside pot with water and small salt. Cook am until di corn soft. You fit chop am like dat or with coconut. E dey very sweet and filling."
|
| 5 |
+
"Pap (Akamu or Ogi): For dis one, you go need dry corn. Soak di corn for water for like two or three days until e soft. Grind am well well into a paste. Use clean cloth or sieve to separate di smooth paste from di chaff (di rough part). Allow di smooth paste to siddon and ferment small for one day. To prepare am, just mix small of di paste with cold water, then pour hot water on top and stir fast fast until e thick. Enjoy am with akara, moin moin, or milk and sugar."
|
| 6 |
+
"Tuwo Masara:This na like swallow for northern people. You go grind dry corn into a fine powder (corn flour). Put water for pot and make e boil. Mix small of di corn flour with cold water to make a paste, then pour am inside di boiling water and stir well. As e de thick, de add more of di dry flour small small and de turn am with turning stick until e strong like semo or eba. Serve am with any soup like Miyan Kuka or Miyan Taushe."
|
| 7 |
+
"Egusi and Corn Soup:You fit add fresh corn to your egusi soup! When you don fry your egusi finish and add your meat and fish, just cut fresh corn from di cob and pour am inside di soup. Allow am to cook for like 10-15 minutes. Di sweetness of di corn go make your egusi soup taste different and special."
|
knowledge_base_data/maize_phosphorus_deficiency_remedy.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"How You Fit Solve Phosphorus Problem for Your Corn (Maize)"
|
| 2 |
+
"If your corn leaves de turn purple or dark green, especially when di plant still small, e fit be say phosphorus no reach am. Phosphorus be like power food for di plant root and for making seed."
|
| 3 |
+
"Wetin You Go Do Sharp Sharp (Short-Term Solution)"
|
| 4 |
+
"Bone Meal: Go market, buy bone meal. Na ground-up animal bone and e full with phosphorus. Sprinkle small quantity around di base of your corn plant and mix am small with di soil. No let am touch di plant stem direct."
|
| 5 |
+
"Fish Fertilizer (Fish Tea): If you fit get fish head or bones, soak dem inside water for some days. Di water go turn to strong fertilizer. Mix one cup of this fish tea with ten cups of plain water, and use am water your corn one time in a week."
|
| 6 |
+
"Wetin You Go Do for Future Planting (Long-Term Solution)"
|
| 7 |
+
"Chicken Manure (Fowl Yansh): Before you plant next time, make sure you add well-decayed chicken manure to your soil. Fowl yansh get plenty phosphorus. No use fresh one, e dey too strong and e go burn your plant. Make sure e don dry well well."
|
| 8 |
+
"Plant Legumes: Plant beans (cowpea) or groundnut for di land before you plant corn again. Dis plants de help make di soil rich and e go help free up phosphorus for di next crop."
|
| 9 |
+
"Check Your Soil pH: Sometimes, di phosphorus dey inside di soil but di soil too strong (acidic) for di plant to chop am. You fit add small wood ash to di soil before you plant. E go help balance di soil and make di phosphorus available for di corn."
|
| 10 |
+
"Remember, small small na im dem de take chop hot soup. Start with small quantity of fertilizer, watch your plant, and add more if you need am."
|
miscellaneous/offline.md
ADDED
|
@@ -0,0 +1,690 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import os
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 6 |
+
import subprocess
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
from database import check_if_indexed
|
| 10 |
+
from create_index import create_initial_index as build_secure_index
|
| 11 |
+
from search import search as secure_search
|
| 12 |
+
from ingest_document import ingest_pdf
|
| 13 |
+
import streamlit as st
|
| 14 |
+
|
| 15 |
+
st.title("Aura-Mind: Your Offline AI Farming Companion")
|
| 16 |
+
|
| 17 |
+
# --- Knowledge Base Management ---
|
| 18 |
+
with st.sidebar:
|
| 19 |
+
st.header("Knowledge Base")
|
| 20 |
+
if st.button("Rebuild Initial Knowledge Base"):
|
| 21 |
+
with st.spinner("Deleting old base and building new one..."):
|
| 22 |
+
docs = {
|
| 23 |
+
"Healthy Maize Plant": "For a Healthy Maize Plant, ensure proper watering and sunlight. No special remedy is needed. Continue good farming practices.",
|
| 24 |
+
"Maize Phosphorus Deficiency": "Phosphorus deficiency in maize is characterized by stunted growth and purplish discoloration of leaves. To remedy this, apply a phosphorus-rich fertilizer like DAP (Di-Ammonium Phosphate) or bone meal to the soil. Follow package instructions for application rates."
|
| 25 |
+
}
|
| 26 |
+
create_initial_index(docs)
|
| 27 |
+
st.success("Initial knowledge base rebuilt!")
|
| 28 |
+
|
| 29 |
+
st.markdown("---")
|
| 30 |
+
st.subheader("Add Your Own Knowledge")
|
| 31 |
+
uploaded_pdf = st.file_uploader("Upload a PDF document", type="pdf")
|
| 32 |
+
if uploaded_pdf is not None:
|
| 33 |
+
# Save the uploaded file temporarily to pass its path
|
| 34 |
+
temp_file_path = os.path.join(".", uploaded_pdf.name)
|
| 35 |
+
with open(temp_file_path, "wb") as f:
|
| 36 |
+
f.write(uploaded_pdf.getbuffer())
|
| 37 |
+
|
| 38 |
+
with st.spinner(f"Ingesting '{uploaded_pdf.name}'... This may take a while for large documents."):
|
| 39 |
+
ingest_pdf(temp_file_path, uploaded_pdf.name)
|
| 40 |
+
|
| 41 |
+
st.success(f"Successfully added '{uploaded_pdf.name}' to your knowledge base!")
|
| 42 |
+
# Clean up the temporary file
|
| 43 |
+
os.remove(temp_file_path)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# Check if the index exists. If not, offer to build it.
|
| 47 |
+
if not check_if_indexed():
|
| 48 |
+
st.warning("Local knowledge base not found. Please build it from the sidebar to enable recommendations.")
|
| 49 |
+
if st.button("Build Local Knowledge Base"):
|
| 50 |
+
document_files = ["healthy_maize_remedy.txt", "maize_phosphorus_deficiency_remedy.txt", "comic_relief.txt"]
|
| 51 |
+
documents_content = []
|
| 52 |
+
for file_path in document_files:
|
| 53 |
+
try:
|
| 54 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 55 |
+
documents_content.append(f.read())
|
| 56 |
+
except FileNotFoundError:
|
| 57 |
+
st.error(f"Required file not found: {file_path}")
|
| 58 |
+
|
| 59 |
+
with st.spinner("Building secure index... This may take a moment."):
|
| 60 |
+
build_secure_index(documents_content)
|
| 61 |
+
st.success("Secure knowledge base built successfully!")
|
| 62 |
+
st.rerun()
|
| 63 |
+
|
| 64 |
+
# --- Performance Tracking Setup ---
|
| 65 |
+
# Initialize session state for storing performance metrics if it doesn't exist.
|
| 66 |
+
if 'vlm_performance_data' not in st.session_state:
|
| 67 |
+
st.session_state.vlm_performance_data = []
|
| 68 |
+
if 'tts_performance_data' not in st.session_state:
|
| 69 |
+
st.session_state.tts_performance_data = []
|
| 70 |
+
|
| 71 |
+
# Audio input
|
| 72 |
+
audio_file = st.audio_input("Record your audio message")
|
| 73 |
+
|
| 74 |
+
# Image input: upload only (no webcam)
|
| 75 |
+
uploaded_image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
|
| 76 |
+
|
| 77 |
+
# Save files if provided
|
| 78 |
+
image_path = None
|
| 79 |
+
|
| 80 |
+
if uploaded_image:
|
| 81 |
+
image_path = "user_image.png"
|
| 82 |
+
with open(image_path, "wb") as f:
|
| 83 |
+
f.write(uploaded_image.getbuffer())
|
| 84 |
+
st.image(uploaded_image)
|
| 85 |
+
|
| 86 |
+
# Model inference
|
| 87 |
+
if st.button("Run Model") and audio_path and image_path:
|
| 88 |
+
import mlx.core as mx
|
| 89 |
+
import gc
|
| 90 |
+
from mlx_vlm import load, generate
|
| 91 |
+
from mlx_vlm.prompt_utils import apply_chat_template
|
| 92 |
+
|
| 93 |
+
model_path = "./finetuned_model_for_conversion"
|
| 94 |
+
try:
|
| 95 |
+
model, processor = load(model_path)
|
| 96 |
+
config = model.config
|
| 97 |
+
|
| 98 |
+
prompt = "Classify the condition of the maize plant. Choose from Healthy Maize Plant, Maize Phosphorus Deficiency."
|
| 99 |
+
formatted_prompt = apply_chat_template(
|
| 100 |
+
processor, config, prompt,
|
| 101 |
+
num_images=1,
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
output = generate(
|
| 105 |
+
model,
|
| 106 |
+
processor,
|
| 107 |
+
formatted_prompt,
|
| 108 |
+
image=[image_path],
|
| 109 |
+
max_tokens=20,
|
| 110 |
+
verbose=True
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# --- Capture VLM Performance ---
|
| 114 |
+
vlm_stats = {
|
| 115 |
+
"Prompt Tokens": output.prompt_tokens,
|
| 116 |
+
"Generation Tokens": output.generation_tokens,
|
| 117 |
+
"Prompt TPS": output.prompt_tps,
|
| 118 |
+
"Generation TPS": output.generation_tps,
|
| 119 |
+
"Peak Memory (GB)": output.peak_memory
|
| 120 |
+
}
|
| 121 |
+
st.session_state.vlm_performance_data.append(vlm_stats)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
st.markdown("### Diagnosis")
|
| 125 |
+
st.write(output.text)
|
| 126 |
+
|
| 127 |
+
query = output.text.strip()
|
| 128 |
+
search_results = secure_search(query, k=3)
|
| 129 |
+
|
| 130 |
+
rag_text_for_display = None
|
| 131 |
+
tts_text = query # Default to VLM output if no remedy is found
|
| 132 |
+
|
| 133 |
+
if search_results:
|
| 134 |
+
for result in search_results:
|
| 135 |
+
st.markdown("### Recommended Actions")
|
| 136 |
+
if result['type'] == 'text':
|
| 137 |
+
st.markdown(result['content'])
|
| 138 |
+
st.caption(f"Source: Text from page {result['page']}")
|
| 139 |
+
elif result['type'] == 'image':
|
| 140 |
+
st.image(result['content'], caption=f"Source: Image from page {result['page']}")
|
| 141 |
+
else:
|
| 142 |
+
st.warning("No relevant information found in your local knowledge base.")
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
# --- Memory Cleanup ---
|
| 146 |
+
# Explicitly delete the large vision model and processor to free up
|
| 147 |
+
# memory before loading the TTS model. This is crucial on systems
|
| 148 |
+
# with limited RAM to prevent crashes.
|
| 149 |
+
with st.spinner("Clearing vision model from memory..."):
|
| 150 |
+
del model
|
| 151 |
+
del processor
|
| 152 |
+
gc.collect()
|
| 153 |
+
|
| 154 |
+
# --- Text-to-Speech Generation ---
|
| 155 |
+
st.markdown("### Generated Speech")
|
| 156 |
+
try:
|
| 157 |
+
# Get the absolute path to the project directory for robust pathing
|
| 158 |
+
project_root = os.path.dirname(os.path.abspath(__file__))
|
| 159 |
+
# In the Docker container, the TTS virtual environment is at a fixed path.
|
| 160 |
+
tts_env_python = "/app/venv_tts/bin/python"
|
| 161 |
+
tts_script = os.path.join(project_root, "tts_service", "run_tts_service.py")
|
| 162 |
+
|
| 163 |
+
# IMPORTANT: Replace with the actual path to your downloaded model
|
| 164 |
+
# Make model path absolute to avoid ambiguity in the subprocess.
|
| 165 |
+
tts_model_path = os.path.join(project_root, "orpheus-3b-pidgin-voice-v1")
|
| 166 |
+
|
| 167 |
+
# Check if the model path exists
|
| 168 |
+
if not os.path.exists(tts_env_python):
|
| 169 |
+
st.error("TTS virtual environment not found. Please run the setup instructions in Step 3.")
|
| 170 |
+
elif not os.path.exists(tts_model_path):
|
| 171 |
+
st.error(f"TTS model not found at path: {tts_model_path}")
|
| 172 |
+
st.info("Please run `python3 download_model.py` to download the TTS model.")
|
| 173 |
+
else:
|
| 174 |
+
# Make output path absolute to ensure we know where to find it.
|
| 175 |
+
speech_output_path = os.path.join(project_root, "generated_speech.wav")
|
| 176 |
+
|
| 177 |
+
# Sanitize the text for the TTS model by replacing newlines with spaces.
|
| 178 |
+
# This prevents errors with models that can't handle multi-line input.
|
| 179 |
+
sanitized_tts_text = tts_text.replace('\n', ' ')
|
| 180 |
+
|
| 181 |
+
# --- Call the TTS script in the separate environment ---
|
| 182 |
+
command = [
|
| 183 |
+
tts_env_python,
|
| 184 |
+
tts_script,
|
| 185 |
+
"--text", sanitized_tts_text,
|
| 186 |
+
"--model-path", tts_model_path,
|
| 187 |
+
"--output-path", speech_output_path
|
| 188 |
+
]
|
| 189 |
+
with st.spinner("Generating speech..."):
|
| 190 |
+
result = subprocess.run(command, capture_output=True, text=True, check=False)
|
| 191 |
+
|
| 192 |
+
# --- Capture TTS Performance ---
|
| 193 |
+
# Extract performance metrics from the TTS script's stdout.
|
| 194 |
+
tts_log = result.stdout
|
| 195 |
+
if tts_log:
|
| 196 |
+
try:
|
| 197 |
+
# Example of parsing: "Generation Speed: 123.45 tokens/sec"
|
| 198 |
+
speed_line = [line for line in tts_log.split('\n') if "Generation Speed" in line]
|
| 199 |
+
if speed_line:
|
| 200 |
+
tts_speed = float(speed_line[0].split(':')[1].strip().split()[0])
|
| 201 |
+
st.session_state.tts_performance_data.append({"Generation Speed (tokens/sec)": tts_speed})
|
| 202 |
+
except (IndexError, ValueError) as e:
|
| 203 |
+
st.warning(f"Could not parse TTS performance data: {e}")
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
if result.returncode == 0:
|
| 207 |
+
# The TTS script appends `_000` to the filename. We need to account for that.
|
| 208 |
+
base, ext = os.path.splitext(speech_output_path)
|
| 209 |
+
actual_speech_path = f"{base}_000{ext}"
|
| 210 |
+
|
| 211 |
+
# Check if the file was actually created before trying to open it.
|
| 212 |
+
if os.path.exists(actual_speech_path):
|
| 213 |
+
# Read the generated audio file into a bytes object
|
| 214 |
+
# to prevent race conditions with Streamlit's file handling.
|
| 215 |
+
with open(actual_speech_path, "rb") as audio_file:
|
| 216 |
+
audio_bytes = audio_file.read()
|
| 217 |
+
st.audio(audio_bytes, format="audio/wav")
|
| 218 |
+
st.success("Speech generated successfully!")
|
| 219 |
+
if result.stdout:
|
| 220 |
+
with st.expander("See TTS Log"):
|
| 221 |
+
st.code(result.stdout)
|
| 222 |
+
else:
|
| 223 |
+
st.error("Generated speech file not found. The TTS script might have failed silently.")
|
| 224 |
+
st.code(f"Expected file at: {actual_speech_path}")
|
| 225 |
+
st.code(f"TTS Service stdout:\n{result.stdout}")
|
| 226 |
+
st.code(f"TTS Service stderr:\n{result.stderr}")
|
| 227 |
+
else:
|
| 228 |
+
st.error("An error occurred during speech generation.")
|
| 229 |
+
st.code(f"TTS Service Error:\n{result.stderr}")
|
| 230 |
+
|
| 231 |
+
except Exception as e:
|
| 232 |
+
st.error(f"An error occurred during speech generation: {e}")
|
| 233 |
+
except FileNotFoundError:
|
| 234 |
+
st.error(f"Error: Model not found at path '{model_path}'.")
|
| 235 |
+
except Exception as e:
|
| 236 |
+
st.error(f"An error occurred: {e}")
|
| 237 |
+
else:
|
| 238 |
+
st.info("Please record audio and provide an image to run the model.")
|
| 239 |
+
|
| 240 |
+
# --- Performance Dashboard ---
|
| 241 |
+
st.sidebar.title("On-Device Performance Dashboard")
|
| 242 |
+
|
| 243 |
+
if st.session_state.vlm_performance_data:
|
| 244 |
+
st.sidebar.markdown("### Vision & Language Model (VLM) Performance")
|
| 245 |
+
vlm_df = pd.DataFrame(st.session_state.vlm_performance_data)
|
| 246 |
+
st.sidebar.dataframe(vlm_df)
|
| 247 |
+
|
| 248 |
+
st.sidebar.markdown("**VLM Performance Over Time**")
|
| 249 |
+
st.sidebar.line_chart(vlm_df[["Prompt TPS", "Generation TPS"]])
|
| 250 |
+
st.sidebar.line_chart(vlm_df[["Peak Memory (GB)"]])
|
| 251 |
+
|
| 252 |
+
if st.session_state.tts_performance_data:
|
| 253 |
+
st.sidebar.markdown("### Text-to-Speech (TTS) Performance")
|
| 254 |
+
tts_df = pd.DataFrame(st.session_state.tts_performance_data)
|
| 255 |
+
st.sidebar.dataframe(tts_df)
|
| 256 |
+
|
| 257 |
+
st.sidebar.markdown("**TTS Performance Over Time**")
|
| 258 |
+
st.sidebar.line_chart(tts_df)
|
| 259 |
+
|
| 260 |
+
if st.sidebar.button("Clear Performance Data"):
|
| 261 |
+
st.session_state.vlm_performance_data = []
|
| 262 |
+
st.session_state.tts_performance_data = []
|
| 263 |
+
st.rerun()
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
--------------------------------------------------
|
| 267 |
+
|
| 268 |
+
# create_index.py
|
| 269 |
+
|
| 270 |
+
import numpy as np
|
| 271 |
+
import faiss
|
| 272 |
+
from sentence_transformers import SentenceTransformer
|
| 273 |
+
import os
|
| 274 |
+
|
| 275 |
+
from database import init_db, get_db_connection, INDEX_FILE, DB_FILE, delete_database_and_index
|
| 276 |
+
from security import encrypt_data
|
| 277 |
+
|
| 278 |
+
# Use a CLIP model that can handle both text and images
|
| 279 |
+
MODEL_NAME = 'clip-ViT-B-32'
|
| 280 |
+
|
| 281 |
+
def create_initial_index(documents_dict):
|
| 282 |
+
"""
|
| 283 |
+
Creates an initial encrypted, persistent index from a dictionary of text documents.
|
| 284 |
+
This will delete any existing database to ensure a clean start.
|
| 285 |
+
"""
|
| 286 |
+
print("Performing a clean rebuild of the knowledge base...")
|
| 287 |
+
delete_database_and_index()
|
| 288 |
+
init_db()
|
| 289 |
+
|
| 290 |
+
conn = get_db_connection()
|
| 291 |
+
cursor = conn.cursor()
|
| 292 |
+
model = SentenceTransformer(MODEL_NAME)
|
| 293 |
+
|
| 294 |
+
all_chunks = []
|
| 295 |
+
all_embeddings = []
|
| 296 |
+
|
| 297 |
+
for name, content in documents_dict.items():
|
| 298 |
+
# Add document to documents table
|
| 299 |
+
cursor.execute("INSERT INTO documents (name) VALUES (?)", (name,))
|
| 300 |
+
doc_id = cursor.lastrowid
|
| 301 |
+
|
| 302 |
+
# For initial docs, we treat the whole content as one chunk
|
| 303 |
+
chunk_text = content
|
| 304 |
+
all_chunks.append((doc_id, 'text', encrypt_data(chunk_text.encode('utf-8')), 1))
|
| 305 |
+
|
| 306 |
+
# Create text embedding
|
| 307 |
+
text_embedding = model.encode([chunk_text])
|
| 308 |
+
all_embeddings.append(text_embedding)
|
| 309 |
+
|
| 310 |
+
# Batch insert chunks
|
| 311 |
+
cursor.executemany(
|
| 312 |
+
"INSERT INTO chunks (doc_id, content_type, encrypted_content, page_num) VALUES (?, ?, ?, ?)",
|
| 313 |
+
all_chunks
|
| 314 |
+
)
|
| 315 |
+
conn.commit()
|
| 316 |
+
conn.close()
|
| 317 |
+
|
| 318 |
+
if not all_embeddings:
|
| 319 |
+
print("No content to index.")
|
| 320 |
+
return
|
| 321 |
+
|
| 322 |
+
# Create and save the FAISS index
|
| 323 |
+
embeddings_np = np.vstack(all_embeddings).astype('float32')
|
| 324 |
+
dimension = embeddings_np.shape[1]
|
| 325 |
+
index = faiss.IndexFlatL2(dimension)
|
| 326 |
+
index.add(embeddings_np)
|
| 327 |
+
faiss.write_index(index, INDEX_FILE)
|
| 328 |
+
|
| 329 |
+
print(f"Initial encrypted index created with {len(all_chunks)} chunks.")
|
| 330 |
+
print(f"Database: {DB_FILE}, FAISS Index: {INDEX_FILE}")
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
if __name__ == '__main__':
|
| 335 |
+
document_files = ["healthy_maize_remedy.txt", "maize_phosphorus_deficiency_remedy.txt", "comic_relief.txt"]
|
| 336 |
+
documents_content = []
|
| 337 |
+
for file_path in document_files:
|
| 338 |
+
try:
|
| 339 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 340 |
+
documents_content.append(f.read())
|
| 341 |
+
except FileNotFoundError:
|
| 342 |
+
print(f"Warning: File not found, skipping: {file_path}")
|
| 343 |
+
|
| 344 |
+
create_initial_index(documents_content)
|
| 345 |
+
--------------------------------------------------
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
# database.py
|
| 349 |
+
|
| 350 |
+
import sqlite3
|
| 351 |
+
import os
|
| 352 |
+
|
| 353 |
+
DB_FILE = "auramind_local.db"
|
| 354 |
+
INDEX_FILE = "auramind_faiss.index"
|
| 355 |
+
|
| 356 |
+
def init_db():
|
| 357 |
+
"""
|
| 358 |
+
Initializes a more robust database schema for multimodal data.
|
| 359 |
+
- 'documents' table tracks the source files.
|
| 360 |
+
- 'chunks' table stores the individual encrypted text/image chunks.
|
| 361 |
+
"""
|
| 362 |
+
conn = sqlite3.connect(DB_FILE)
|
| 363 |
+
cursor = conn.cursor()
|
| 364 |
+
|
| 365 |
+
# Table to track the source documents (e.g., 'healthy_maize.txt', 'user_guide.pdf')
|
| 366 |
+
cursor.execute('''
|
| 367 |
+
CREATE TABLE IF NOT EXISTS documents (
|
| 368 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 369 |
+
name TEXT NOT NULL UNIQUE
|
| 370 |
+
)
|
| 371 |
+
''')
|
| 372 |
+
|
| 373 |
+
# Table to store each chunk of content (text or image)
|
| 374 |
+
# The faiss_id will correspond to the row number in the FAISS index
|
| 375 |
+
cursor.execute('''
|
| 376 |
+
CREATE TABLE IF NOT EXISTS chunks (
|
| 377 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 378 |
+
doc_id INTEGER,
|
| 379 |
+
content_type TEXT NOT NULL, -- 'text' or 'image'
|
| 380 |
+
encrypted_content BLOB NOT NULL,
|
| 381 |
+
page_num INTEGER,
|
| 382 |
+
FOREIGN KEY (doc_id) REFERENCES documents (id)
|
| 383 |
+
)
|
| 384 |
+
''')
|
| 385 |
+
conn.commit()
|
| 386 |
+
conn.close()
|
| 387 |
+
|
| 388 |
+
def get_db_connection():
|
| 389 |
+
"""Establishes a connection to the database."""
|
| 390 |
+
conn = sqlite3.connect(DB_FILE)
|
| 391 |
+
conn.row_factory = sqlite3.Row
|
| 392 |
+
return conn
|
| 393 |
+
|
| 394 |
+
def check_if_indexed():
|
| 395 |
+
"""Checks if the initial database and index file exist."""
|
| 396 |
+
# A basic check. A more robust check might query the db for content.
|
| 397 |
+
return os.path.exists(DB_FILE) and os.path.exists(INDEX_FILE)
|
| 398 |
+
|
| 399 |
+
def delete_database_and_index():
|
| 400 |
+
"""Deletes existing db and index files for a clean rebuild."""
|
| 401 |
+
if os.path.exists(DB_FILE):
|
| 402 |
+
os.remove(DB_FILE)
|
| 403 |
+
print(f"Removed old database: {DB_FILE}")
|
| 404 |
+
if os.path.exists(INDEX_FILE):
|
| 405 |
+
os.remove(INDEX_FILE)
|
| 406 |
+
print(f"Removed old index: {INDEX_FILE}")
|
| 407 |
+
|
| 408 |
+
---------------------------------------------------------
|
| 409 |
+
|
| 410 |
+
# database.py
|
| 411 |
+
|
| 412 |
+
import sqlite3
|
| 413 |
+
import os
|
| 414 |
+
|
| 415 |
+
DB_FILE = "auramind_local.db"
|
| 416 |
+
INDEX_FILE = "auramind_faiss.index"
|
| 417 |
+
|
| 418 |
+
def init_db():
|
| 419 |
+
"""
|
| 420 |
+
Initializes a more robust database schema for multimodal data.
|
| 421 |
+
- 'documents' table tracks the source files.
|
| 422 |
+
- 'chunks' table stores the individual encrypted text/image chunks.
|
| 423 |
+
"""
|
| 424 |
+
conn = sqlite3.connect(DB_FILE)
|
| 425 |
+
cursor = conn.cursor()
|
| 426 |
+
|
| 427 |
+
# Table to track the source documents (e.g., 'healthy_maize.txt', 'user_guide.pdf')
|
| 428 |
+
cursor.execute('''
|
| 429 |
+
CREATE TABLE IF NOT EXISTS documents (
|
| 430 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 431 |
+
name TEXT NOT NULL UNIQUE
|
| 432 |
+
)
|
| 433 |
+
''')
|
| 434 |
+
|
| 435 |
+
# Table to store each chunk of content (text or image)
|
| 436 |
+
# The faiss_id will correspond to the row number in the FAISS index
|
| 437 |
+
cursor.execute('''
|
| 438 |
+
CREATE TABLE IF NOT EXISTS chunks (
|
| 439 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 440 |
+
doc_id INTEGER,
|
| 441 |
+
content_type TEXT NOT NULL, -- 'text' or 'image'
|
| 442 |
+
encrypted_content BLOB NOT NULL,
|
| 443 |
+
page_num INTEGER,
|
| 444 |
+
FOREIGN KEY (doc_id) REFERENCES documents (id)
|
| 445 |
+
)
|
| 446 |
+
''')
|
| 447 |
+
conn.commit()
|
| 448 |
+
conn.close()
|
| 449 |
+
|
| 450 |
+
def get_db_connection():
|
| 451 |
+
"""Establishes a connection to the database."""
|
| 452 |
+
conn = sqlite3.connect(DB_FILE)
|
| 453 |
+
conn.row_factory = sqlite3.Row
|
| 454 |
+
return conn
|
| 455 |
+
|
| 456 |
+
def check_if_indexed():
|
| 457 |
+
"""Checks if the initial database and index file exist."""
|
| 458 |
+
# A basic check. A more robust check might query the db for content.
|
| 459 |
+
return os.path.exists(DB_FILE) and os.path.exists(INDEX_FILE)
|
| 460 |
+
|
| 461 |
+
def delete_database_and_index():
|
| 462 |
+
"""Deletes existing db and index files for a clean rebuild."""
|
| 463 |
+
if os.path.exists(DB_FILE):
|
| 464 |
+
os.remove(DB_FILE)
|
| 465 |
+
print(f"Removed old database: {DB_FILE}")
|
| 466 |
+
if os.path.exists(INDEX_FILE):
|
| 467 |
+
os.remove(INDEX_FILE)
|
| 468 |
+
print(f"Removed old index: {INDEX_FILE}")
|
| 469 |
+
--------------------------------------------------
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
# search.py
|
| 473 |
+
|
| 474 |
+
import faiss
|
| 475 |
+
import numpy as np
|
| 476 |
+
from sentence_transformers import SentenceTransformer
|
| 477 |
+
from PIL import Image
|
| 478 |
+
import io
|
| 479 |
+
|
| 480 |
+
from database import get_db_connection, INDEX_FILE, check_if_indexed
|
| 481 |
+
from security import decrypt_data
|
| 482 |
+
|
| 483 |
+
MODEL_NAME = 'clip-ViT-B-32'
|
| 484 |
+
|
| 485 |
+
def search(query, k=1):
|
| 486 |
+
"""
|
| 487 |
+
Searches the multimodal FAISS index. The query can be text, and the result can be text or an image.
|
| 488 |
+
"""
|
| 489 |
+
if not check_if_indexed():
|
| 490 |
+
return []
|
| 491 |
+
|
| 492 |
+
model = SentenceTransformer(MODEL_NAME)
|
| 493 |
+
index = faiss.read_index(INDEX_FILE)
|
| 494 |
+
|
| 495 |
+
# Create an embedding for the text query
|
| 496 |
+
query_embedding = model.encode([query]).astype('float32')
|
| 497 |
+
distances, indices = index.search(query_embedding, k)
|
| 498 |
+
|
| 499 |
+
results = []
|
| 500 |
+
conn = get_db_connection()
|
| 501 |
+
for i, faiss_id in enumerate(indices[0]):
|
| 502 |
+
if faiss_id != -1:
|
| 503 |
+
# The faiss_id is the row number, which corresponds to the chunk's primary key 'id'
|
| 504 |
+
sql_id = int(faiss_id) + 1
|
| 505 |
+
|
| 506 |
+
chunk_record = conn.execute('SELECT * FROM chunks WHERE id = ?', (sql_id,)).fetchone()
|
| 507 |
+
|
| 508 |
+
if chunk_record:
|
| 509 |
+
content_type = chunk_record['content_type']
|
| 510 |
+
decrypted_content_bytes = decrypt_data(chunk_record['encrypted_content'])
|
| 511 |
+
|
| 512 |
+
# Prepare content based on its type
|
| 513 |
+
if content_type == 'text':
|
| 514 |
+
content = decrypted_content_bytes.decode('utf-8')
|
| 515 |
+
elif content_type == 'image':
|
| 516 |
+
content = Image.open(io.BytesIO(decrypted_content_bytes))
|
| 517 |
+
|
| 518 |
+
results.append({
|
| 519 |
+
'distance': distances[0][i],
|
| 520 |
+
'content': content,
|
| 521 |
+
'type': content_type,
|
| 522 |
+
'page': chunk_record['page_num']
|
| 523 |
+
})
|
| 524 |
+
conn.close()
|
| 525 |
+
return results
|
| 526 |
+
|
| 527 |
+
-----------------------------------------------------------
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
|
| 531 |
+
import os
|
| 532 |
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
| 533 |
+
from cryptography.hazmat.primitives import padding
|
| 534 |
+
from cryptography.hazmat.backends import default_backend
|
| 535 |
+
import base64
|
| 536 |
+
|
| 537 |
+
# In a real mobile app, this key would be securely managed by
|
| 538 |
+
# the Android Keystore or iOS Keychain. For this skeleton, we'll
|
| 539 |
+
# use an environment variable for demonstration.
|
| 540 |
+
SECRET_KEY = os.environ.get("AURA_MIND_SECRET_KEY", "a_default_secret_key_32_bytes_!!").encode()
|
| 541 |
+
|
| 542 |
+
if len(SECRET_KEY) != 32:
|
| 543 |
+
raise ValueError("SECRET_KEY must be 32 bytes long for AES-256.")
|
| 544 |
+
|
| 545 |
+
def encrypt_data(data: bytes) -> bytes:
|
| 546 |
+
"""Encrypts data using AES-CBC."""
|
| 547 |
+
iv = os.urandom(16)
|
| 548 |
+
padder = padding.PKCS7(algorithms.AES.block_size).padder()
|
| 549 |
+
padded_data = padder.update(data) + padder.finalize()
|
| 550 |
+
|
| 551 |
+
cipher = Cipher(algorithms.AES(SECRET_KEY), modes.CBC(iv), backend=default_backend())
|
| 552 |
+
encryptor = cipher.encryptor()
|
| 553 |
+
encrypted_data = encryptor.update(padded_data) + encryptor.finalize()
|
| 554 |
+
return iv + encrypted_data
|
| 555 |
+
|
| 556 |
+
def decrypt_data(encrypted_data_with_iv: bytes) -> bytes:
|
| 557 |
+
"""Decrypts data using AES-CBC."""
|
| 558 |
+
iv = encrypted_data_with_iv[:16]
|
| 559 |
+
encrypted_data = encrypted_data_with_iv[16:]
|
| 560 |
+
|
| 561 |
+
cipher = Cipher(algorithms.AES(SECRET_KEY), modes.CBC(iv), backend=default_backend())
|
| 562 |
+
decryptor = cipher.decryptor()
|
| 563 |
+
padded_data = decryptor.update(encrypted_data) + decryptor.finalize()
|
| 564 |
+
|
| 565 |
+
unpadder = padding.PKCS7(algorithms.AES.block_size).unpadder()
|
| 566 |
+
data = unpadder.update(padded_data) + unpadder.finalize()
|
| 567 |
+
return data
|
| 568 |
+
|
| 569 |
+
-------------------------------- ------------------
|
| 570 |
+
# ingest_document.py
|
| 571 |
+
|
| 572 |
+
import faiss
|
| 573 |
+
from sentence_transformers import SentenceTransformer
|
| 574 |
+
import fitz # PyMuPDF
|
| 575 |
+
from PIL import Image
|
| 576 |
+
import io
|
| 577 |
+
import numpy as np
|
| 578 |
+
import os
|
| 579 |
+
|
| 580 |
+
from database import get_db_connection, INDEX_FILE
|
| 581 |
+
from security import encrypt_data
|
| 582 |
+
|
| 583 |
+
MODEL_NAME = 'clip-ViT-B-32'
|
| 584 |
+
|
| 585 |
+
def ingest_pdf(file_path, file_name):
|
| 586 |
+
"""Parses a PDF, encrypts its content (text+images), and adds it to the database and FAISS index."""
|
| 587 |
+
print(f"Starting ingestion for: {file_name}")
|
| 588 |
+
model = SentenceTransformer(MODEL_NAME)
|
| 589 |
+
conn = get_db_connection()
|
| 590 |
+
cursor = conn.cursor()
|
| 591 |
+
|
| 592 |
+
# Add document to documents table, or get its ID if it exists
|
| 593 |
+
try:
|
| 594 |
+
cursor.execute("INSERT INTO documents (name) VALUES (?)", (file_name,))
|
| 595 |
+
doc_id = cursor.lastrowid
|
| 596 |
+
except conn.IntegrityError:
|
| 597 |
+
print("Document already exists in DB. Skipping doc table insert.")
|
| 598 |
+
doc_id = cursor.execute("SELECT id FROM documents WHERE name=?", (file_name,)).fetchone()['id']
|
| 599 |
+
|
| 600 |
+
doc = fitz.open(file_path)
|
| 601 |
+
new_embeddings = []
|
| 602 |
+
|
| 603 |
+
# Load existing FAISS index or create a new one
|
| 604 |
+
if os.path.exists(INDEX_FILE):
|
| 605 |
+
index = faiss.read_index(INDEX_FILE)
|
| 606 |
+
else:
|
| 607 |
+
# Get dimension from the model if index is new
|
| 608 |
+
dimension = model.encode(["test"]).shape[1]
|
| 609 |
+
index = faiss.IndexFlatL2(dimension)
|
| 610 |
+
|
| 611 |
+
for page_num, page in enumerate(doc):
|
| 612 |
+
# 1. Process Text
|
| 613 |
+
text = page.get_text()
|
| 614 |
+
if text.strip():
|
| 615 |
+
encrypted_text = encrypt_data(text.encode('utf-8'))
|
| 616 |
+
cursor.execute(
|
| 617 |
+
"INSERT INTO chunks (doc_id, content_type, encrypted_content, page_num) VALUES (?, ?, ?, ?)",
|
| 618 |
+
(doc_id, 'text', encrypted_text, page_num + 1)
|
| 619 |
+
)
|
| 620 |
+
text_embedding = model.encode([text])
|
| 621 |
+
new_embeddings.append(text_embedding)
|
| 622 |
+
|
| 623 |
+
# 2. Process Images
|
| 624 |
+
image_list = page.get_images(full=True)
|
| 625 |
+
for img_index, img in enumerate(image_list):
|
| 626 |
+
xref = img[0]
|
| 627 |
+
base_image = doc.extract_image(xref)
|
| 628 |
+
image_bytes = base_image["image"]
|
| 629 |
+
|
| 630 |
+
encrypted_image = encrypt_data(image_bytes)
|
| 631 |
+
cursor.execute(
|
| 632 |
+
"INSERT INTO chunks (doc_id, content_type, encrypted_content, page_num) VALUES (?, ?, ?, ?)",
|
| 633 |
+
(doc_id, 'image', encrypted_image, page_num + 1)
|
| 634 |
+
)
|
| 635 |
+
pil_image = Image.open(io.BytesIO(image_bytes))
|
| 636 |
+
image_embedding = model.encode(pil_image)
|
| 637 |
+
new_embeddings.append(image_embedding.reshape(1, -1))
|
| 638 |
+
|
| 639 |
+
conn.commit()
|
| 640 |
+
conn.close()
|
| 641 |
+
|
| 642 |
+
if new_embeddings:
|
| 643 |
+
# Add new embeddings to the FAISS index
|
| 644 |
+
embeddings_np = np.vstack(new_embeddings).astype('float32')
|
| 645 |
+
index.add(embeddings_np)
|
| 646 |
+
faiss.write_index(index, INDEX_FILE)
|
| 647 |
+
print(f"Successfully ingested {file_name} and added {len(new_embeddings)} new chunks to the knowledge base.")
|
| 648 |
+
else:
|
| 649 |
+
print(f"No new content found to ingest in {file_name}.")
|
| 650 |
+
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
--------------------------------------------------------
|
| 654 |
+
maize_phosphorus_deficiency_remedy.txt
|
| 655 |
+
|
| 656 |
+
"How You Fit Solve Phosphorus Problem for Your Corn (Maize)"
|
| 657 |
+
"If your corn leaves de turn purple or dark green, especially when di plant still small, e fit be say phosphorus no reach am. Phosphorus be like power food for di plant root and for making seed."
|
| 658 |
+
"Wetin You Go Do Sharp Sharp (Short-Term Solution)"
|
| 659 |
+
"Bone Meal: Go market, buy bone meal. Na ground-up animal bone and e full with phosphorus. Sprinkle small quantity around di base of your corn plant and mix am small with di soil. No let am touch di plant stem direct."
|
| 660 |
+
"Fish Fertilizer (Fish Tea): If you fit get fish head or bones, soak dem inside water for some days. Di water go turn to strong fertilizer. Mix one cup of this fish tea with ten cups of plain water, and use am water your corn one time in a week."
|
| 661 |
+
"Wetin You Go Do for Future Planting (Long-Term Solution)"
|
| 662 |
+
"Chicken Manure (Fowl Yansh): Before you plant next time, make sure you add well-decayed chicken manure to your soil. Fowl yansh get plenty phosphorus. No use fresh one, e dey too strong and e go burn your plant. Make sure e don dry well well."
|
| 663 |
+
"Plant Legumes: Plant beans (cowpea) or groundnut for di land before you plant corn again. Dis plants de help make di soil rich and e go help free up phosphorus for di next crop."
|
| 664 |
+
"Check Your Soil pH: Sometimes, di phosphorus dey inside di soil but di soil too strong (acidic) for di plant to chop am. You fit add small wood ash to di soil before you plant. E go help balance di soil and make di phosphorus available for di corn."
|
| 665 |
+
"Remember, small small na im dem de take chop hot soup. Start with small quantity of fertilizer, watch your plant, and add more if you need am."
|
| 666 |
+
|
| 667 |
+
----------------------------------------------------
|
| 668 |
+
|
| 669 |
+
healthy_maize_remedy.txt
|
| 670 |
+
|
| 671 |
+
"So your farm don produce beta corn? Oya, make we see different ways you fit enjoy am with your family. Corn no be for roasting alone!"
|
| 672 |
+
"Better Ways to Enjoy Your Healthy Corn (Maize)"
|
| 673 |
+
"Agbado (Roasted Corn) and Ube (Pear):Dis one na classic street food for Naija. Just roast your fresh corn over charcoal fire until e brown small. Eat am with soft pear wey you don roast small or boil inside hot water. Di sweetness of di corn and di creamy pear na match made in heaven!"
|
| 674 |
+
"Boiled Corn: Simple and sweet. Just remove di husk (di green leaf), put di corn inside pot with water and small salt. Cook am until di corn soft. You fit chop am like dat or with coconut. E dey very sweet and filling."
|
| 675 |
+
"Pap (Akamu or Ogi): For dis one, you go need dry corn. Soak di corn for water for like two or three days until e soft. Grind am well well into a paste. Use clean cloth or sieve to separate di smooth paste from di chaff (di rough part). Allow di smooth paste to siddon and ferment small for one day. To prepare am, just mix small of di paste with cold water, then pour hot water on top and stir fast fast until e thick. Enjoy am with akara, moin moin, or milk and sugar."
|
| 676 |
+
"Tuwo Masara:This na like swallow for northern people. You go grind dry corn into a fine powder (corn flour). Put water for pot and make e boil. Mix small of di corn flour with cold water to make a paste, then pour am inside di boiling water and stir well. As e de thick, de add more of di dry flour small small and de turn am with turning stick until e strong like semo or eba. Serve am with any soup like Miyan Kuka or Miyan Taushe."
|
| 677 |
+
"Egusi and Corn Soup:You fit add fresh corn to your egusi soup! When you don fry your egusi finish and add your meat and fish, just cut fresh corn from di cob and pour am inside di soup. Allow am to cook for like 10-15 minutes. Di sweetness of di corn go make your egusi soup taste different and special."
|
| 678 |
+
----------------------------------------------------
|
| 679 |
+
|
| 680 |
+
comic_relief.txt
|
| 681 |
+
|
| 682 |
+
"Wetin My Eye See So?"
|
| 683 |
+
"Ah! Oga/Madam farmer, this one pass my power o. I don look this picture soteh my eye dey turn. E be like say this thing no be corn at all o, or maybe na some new style of corn wey dem just invent for another planet. Abeg, you fit try another picture? Make my brain no go knock before you come back. No vex!"
|
| 684 |
+
|
| 685 |
+
---------------------------------------------------
|
| 686 |
+
|
| 687 |
+
|
| 688 |
+
|
| 689 |
+
|
| 690 |
+
|