Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import chromadb
|
4 |
+
import os
|
5 |
+
from langgraph.graph import StateGraph
|
6 |
+
from fpdf import FPDF
|
7 |
+
import json
|
8 |
+
from groq import Groq
|
9 |
+
|
10 |
+
# Securely load API key from environment variables
|
11 |
+
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
12 |
+
|
13 |
+
if not GROQ_API_KEY:
|
14 |
+
st.error("Please set GROQ_API_KEY environment variable.")
|
15 |
+
st.stop()
|
16 |
+
|
17 |
+
# Groq API Setup
|
18 |
+
try:
|
19 |
+
client = Groq(api_key=GROQ_API_KEY)
|
20 |
+
except Exception as e:
|
21 |
+
st.error(f"Error initializing Groq API: {e}")
|
22 |
+
st.stop()
|
23 |
+
|
24 |
+
# ChromaDB Setup
|
25 |
+
try:
|
26 |
+
chroma_client = chromadb.PersistentClient(path="./chromadb_store")
|
27 |
+
collection = chroma_client.get_or_create_collection(name="dna_analysis")
|
28 |
+
except Exception as e:
|
29 |
+
st.error(f"Error initializing ChromaDB: {e}")
|
30 |
+
st.stop()
|
31 |
+
|
32 |
+
def load_and_preprocess(file):
|
33 |
+
"""Load and preprocess the uploaded genomic data."""
|
34 |
+
try:
|
35 |
+
if file.name.endswith('.csv'):
|
36 |
+
df = pd.read_csv(file)
|
37 |
+
elif file.name.endswith('.xlsx'):
|
38 |
+
df = pd.read_excel(file)
|
39 |
+
elif file.name.endswith('.txt'):
|
40 |
+
df = pd.read_csv(file, delimiter="\t")
|
41 |
+
else:
|
42 |
+
return None
|
43 |
+
return df
|
44 |
+
except Exception as e:
|
45 |
+
st.error(f"Error loading file: {e}")
|
46 |
+
return None
|
47 |
+
|
48 |
+
def query_llm(category, data):
|
49 |
+
"""Query Groq LLM with retrieved DNA data insights."""
|
50 |
+
try:
|
51 |
+
prompt = f"Analyze the following DNA data under the category {category}: {data}"
|
52 |
+
chat_completion = client.chat.completions.create(
|
53 |
+
messages=[
|
54 |
+
{
|
55 |
+
"role": "user",
|
56 |
+
"content": prompt,
|
57 |
+
}
|
58 |
+
],
|
59 |
+
model="llama-3.3-70b-versatile", # or mixtral-8x7b-390ms
|
60 |
+
stream=False,
|
61 |
+
)
|
62 |
+
return chat_completion.choices[0].message.content
|
63 |
+
except Exception as e:
|
64 |
+
st.error(f"Error querying LLM: {e}")
|
65 |
+
return "Error occurred during analysis."
|
66 |
+
|
67 |
+
# Define Graph for LangGraph
|
68 |
+
class DNAAnalysisState:
|
69 |
+
def __init__(self, data, results=None):
|
70 |
+
self.data = data
|
71 |
+
self.results = results or {}
|
72 |
+
|
73 |
+
graph = StateGraph(DNAAnalysisState)
|
74 |
+
|
75 |
+
# Define Analysis Nodes
|
76 |
+
def analyze_genomic_disorders(state):
|
77 |
+
insights = query_llm("Genomic Disorders", state.data)
|
78 |
+
state.results["Genomic Disorders"] = insights
|
79 |
+
return state
|
80 |
+
|
81 |
+
def analyze_physical_traits(state):
|
82 |
+
insights = query_llm("Physical Characteristics", state.data)
|
83 |
+
state.results["Physical Characteristics"] = insights
|
84 |
+
return state
|
85 |
+
|
86 |
+
def analyze_disease_risk(state):
|
87 |
+
insights = query_llm("Future Disease Risks", state.data)
|
88 |
+
state.results["Future Disease Risks"] = insights
|
89 |
+
return state
|
90 |
+
|
91 |
+
def analyze_ancestry(state):
|
92 |
+
insights = query_llm("Ancestry & Heritage", state.data)
|
93 |
+
state.results["Ancestry & Heritage"] = insights
|
94 |
+
return state
|
95 |
+
|
96 |
+
def analyze_dna_matching(state, second_data):
|
97 |
+
"""Analyze relationship between two DNA datasets."""
|
98 |
+
try:
|
99 |
+
prompt = f"Compare the following two DNA datasets and determine the relationship: {state.data} and {second_data}"
|
100 |
+
chat_completion = client.chat.completions.create(
|
101 |
+
messages=[
|
102 |
+
{
|
103 |
+
"role": "user",
|
104 |
+
"content": prompt,
|
105 |
+
}
|
106 |
+
],
|
107 |
+
model="llama-3.3-70b-versatile", # or mixtral-8x7b-390ms
|
108 |
+
stream=False,
|
109 |
+
)
|
110 |
+
insights = chat_completion.choices[0].message.content
|
111 |
+
state.results["DNA Matching"] = insights
|
112 |
+
return state
|
113 |
+
except Exception as e:
|
114 |
+
st.error(f"Error comparing DNA: {e}")
|
115 |
+
state.results["DNA Matching"] = "Error during comparison."
|
116 |
+
return state
|
117 |
+
|
118 |
+
# Add Nodes to Graph
|
119 |
+
graph.add_node("genomic_disorders", analyze_genomic_disorders)
|
120 |
+
graph.add_node("physical_traits", analyze_physical_traits)
|
121 |
+
graph.add_node("disease_risk", analyze_disease_risk)
|
122 |
+
graph.add_node("ancestry", analyze_ancestry)
|
123 |
+
|
124 |
+
graph.add_edge("genomic_disorders", "physical_traits")
|
125 |
+
graph.add_edge("physical_traits", "disease_risk")
|
126 |
+
graph.add_edge("disease_risk", "ancestry")
|
127 |
+
|
128 |
+
graph.set_entry_point("genomic_disorders")
|
129 |
+
|
130 |
+
# Streamlit UI
|
131 |
+
st.title("DNA Analysis Using AI")
|
132 |
+
uploaded_file = st.file_uploader("Upload your genomic data (CSV, XLSX, TXT)", type=["csv", "xlsx", "txt"])
|
133 |
+
|
134 |
+
if uploaded_file:
|
135 |
+
df = load_and_preprocess(uploaded_file)
|
136 |
+
if df is not None:
|
137 |
+
st.dataframe(df.head())
|
138 |
+
if st.button("Start Analysis"):
|
139 |
+
state = DNAAnalysisState(df.to_json())
|
140 |
+
try:
|
141 |
+
result = graph.run(state)
|
142 |
+
st.session_state["analysis_results"] = result.results
|
143 |
+
st.success("Analysis completed!")
|
144 |
+
except Exception as e:
|
145 |
+
st.error(f"Error during analysis: {e}")
|
146 |
+
else:
|
147 |
+
st.error("Invalid file format.")
|
148 |
+
|
149 |
+
if "analysis_results" in st.session_state:
|
150 |
+
results = st.session_state["analysis_results"]
|
151 |
+
for category, insight in results.items():
|
152 |
+
with st.expander(f"{category}"):
|
153 |
+
st.write(insight)
|
154 |
+
|
155 |
+
if st.button("Download Report as PDF"):
|
156 |
+
pdf = FPDF()
|
157 |
+
pdf.set_auto_page_break(auto=True, margin=15)
|
158 |
+
pdf.add_page()
|
159 |
+
pdf.set_font("Arial", size=12)
|
160 |
+
pdf.cell(200, 10, "DNA Analysis Report", ln=True, align="C")
|
161 |
+
for category, insight in results.items():
|
162 |
+
pdf.add_page()
|
163 |
+
pdf.cell(200, 10, category, ln=True, align="C")
|
164 |
+
pdf.multi_cell(0, 10, insight)
|
165 |
+
pdf_path = "DNA_Analysis_Report.pdf"
|
166 |
+
try:
|
167 |
+
pdf.output(pdf_path)
|
168 |
+
with open(pdf_path, "rb") as f:
|
169 |
+
st.download_button("Download PDF", f, file_name=pdf_path, mime="application/pdf")
|
170 |
+
except Exception as e:
|
171 |
+
st.error(f"Error creating PDF: {e}")
|
172 |
+
|
173 |
+
st.header("DNA Matching")
|
174 |
+
file1 = st.file_uploader("Upload First DNA Dataset", type=["csv", "xlsx", "txt"], key="file1")
|
175 |
+
file2 = st.file_uploader("Upload Second DNA Dataset", type=["csv", "xlsx", "txt"], key="file2")
|
176 |
+
|
177 |
+
if file1 and file2:
|
178 |
+
df1 = load_and_preprocess(file1)
|
179 |
+
df2 = load_and_preprocess(file2)
|
180 |
+
if df1 is not None and df2 is not None:
|
181 |
+
if st.button("Compare DNA"):
|
182 |
+
state = DNAAnalysisState(df1.to_json())
|
183 |
+
result = analyze_dna_matching(state, df2.to_json())
|
184 |
+
st.session_state["dna_matching_result"] = result.results["DNA Matching"]
|
185 |
+
st.success("DNA Matching completed!")
|
186 |
+
|
187 |
+
if "dna_matching_result" in st.session_state:
|
188 |
+
st.subheader("DNA Matching Results")
|
189 |
+
st.write(st.session_state["dna_matching_result"])
|