samiee2213 commited on
Commit
a207b64
·
verified ·
1 Parent(s): 2571ddf

Upload 5 files

Browse files
views/define_query.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def CreatePage():
4
+ st.header("Define Your Custom Query")
5
+
6
+ if "data" not in st.session_state or st.session_state["data"] is None:
7
+ st.warning("Please upload data first! Use the 'Upload Data' section to upload your data.")
8
+ else:
9
+ column = st.selectbox(
10
+ "Select entity column",
11
+ st.session_state["data"].columns,
12
+ help="Select the column that contains the entities for which you want to define queries."
13
+ )
14
+
15
+ st.markdown("""
16
+ <style>
17
+ div[data-baseweb="select"] div[data-id="select"] {{
18
+ background-color: #f0f8ff;
19
+ }}
20
+ </style>
21
+ """, unsafe_allow_html=True)
22
+
23
+ st.subheader("Define Fields to Extract")
24
+ num_fields = st.number_input(
25
+ "Number of fields to extract",
26
+ min_value=1,
27
+ value=1,
28
+ step=1,
29
+ help="Specify how many fields you want to extract from each entity."
30
+ )
31
+
32
+ fields = []
33
+ for i in range(num_fields):
34
+ field = st.text_input(
35
+ f"Field {i+1} name",
36
+ key=f"field_{i}",
37
+ placeholder=f"Enter field name for {i+1}",
38
+ help="Name the field you want to extract from the entity."
39
+ )
40
+ if field:
41
+ fields.append(field)
42
+
43
+ if fields:
44
+ st.subheader("Query Template")
45
+ query_template = st.text_area(
46
+ "Enter query template (Use '{entity}' to represent each entity)",
47
+ value=f"Find the {', '.join(fields)} for {{entity}}",
48
+ help="You can use {entity} as a placeholder to represent each entity in the query."
49
+ )
50
+
51
+ if "{entity}" in query_template:
52
+ example_entity = str(st.session_state["data"][column].iloc[0])
53
+ example_query = query_template.replace("{entity}", example_entity)
54
+ st.write("### Example Query Preview")
55
+ st.code(example_query)
56
+
57
+ if st.button("Save Query Configuration"):
58
+ if not fields:
59
+ st.error("Please define at least one field to extract.")
60
+ elif not query_template:
61
+ st.error("Please enter a query template.")
62
+ else:
63
+ st.session_state["column_selection"] = column
64
+ st.session_state["query_template"] = query_template
65
+ st.session_state["extraction_fields"] = fields
66
+ st.success("Query configuration saved successfully!")
views/extract_information.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from funcs.llm import LLM
3
+ class ExtractInformation:
4
+ def __init__(self,llm :LLM):
5
+ self.llm = llm
6
+
7
+ def CreatePage(self):
8
+ st.header("Extract Information")
9
+ if "query_template" in st.session_state and "data" in st.session_state:
10
+ st.write("### Using Query Template:")
11
+ st.code(st.session_state["query_template"])
12
+
13
+ column_selection = st.session_state["column_selection"]
14
+ entities_column = st.session_state["data"][column_selection]
15
+
16
+ col1, col2 = st.columns([2, 1])
17
+ with col1:
18
+ st.write("### Selected Entity Column:")
19
+ st.dataframe(entities_column, use_container_width=True)
20
+
21
+ with col2:
22
+ start_button = st.button("Start Extraction", type="primary", use_container_width=True)
23
+
24
+ results_container = st.empty()
25
+
26
+ if start_button:
27
+ with st.spinner("Extracting information..."):
28
+ progress_bar = st.progress(0)
29
+ progress_text = st.empty()
30
+
31
+ try:
32
+ results = []
33
+ for i, selected_entity in enumerate(entities_column):
34
+ user_query = st.session_state["query_template"].replace("{entity}", str(selected_entity))
35
+ final_answer, search_results = self.llm.refine_answer_with_searches(selected_entity, user_query)
36
+ results.append({
37
+ "Entity": selected_entity,
38
+ "Extracted Information": final_answer,
39
+ "Search Results": search_results
40
+ })
41
+
42
+ progress = (i + 1) / len(entities_column)
43
+ progress_bar.progress(progress)
44
+ progress_text.text(f"Processing {i+1}/{len(entities_column)} entities...")
45
+
46
+ st.session_state["results"] = results
47
+
48
+ progress_bar.empty()
49
+ progress_text.empty()
50
+ st.success("Extraction completed successfully!")
51
+
52
+ except Exception as e:
53
+ st.error(f"An error occurred during extraction: {str(e)}")
54
+ st.session_state.pop("results", None)
55
+
56
+ if "results" in st.session_state and st.session_state["results"]:
57
+ with results_container:
58
+ results = st.session_state["results"]
59
+
60
+ search_query = st.text_input("🔍 Search results", "")
61
+
62
+ tab1, tab2 = st.tabs(["Compact View", "Detailed View"])
63
+
64
+ with tab1:
65
+ found_results = False
66
+ for result in results:
67
+ if search_query.lower() in str(result["Entity"]).lower() or \
68
+ search_query.lower() in str(result["Extracted Information"]).lower():
69
+ found_results = True
70
+ with st.expander(f"📋 {result['Entity']}", expanded=False):
71
+ st.markdown("#### Extracted Information")
72
+ st.write(result["Extracted Information"])
73
+
74
+ if not found_results and search_query:
75
+ st.info("No results found for your search.")
76
+
77
+ with tab2:
78
+ found_results = False
79
+ for i, result in enumerate(results):
80
+ if search_query.lower() in str(result["Entity"]).lower() or \
81
+ search_query.lower() in str(result["Extracted Information"]).lower():
82
+ found_results = True
83
+ st.markdown(f"### Entity {i+1}: {result['Entity']}")
84
+
85
+ col1, col2 = st.columns(2)
86
+
87
+ with col1:
88
+ st.markdown("#### 📝 Extracted Information")
89
+ st.info(result["Extracted Information"])
90
+
91
+ with col2:
92
+ st.markdown("#### 🔍 Search Results")
93
+ st.warning(result["Search Results"])
94
+
95
+ st.divider()
96
+
97
+ if not found_results and search_query:
98
+ st.info("No results found for your search.")
99
+ else:
100
+ st.warning("Please upload your data and define the query template.")
views/home.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ def CreatePage():
3
+ st.markdown("""
4
+ <h1 style="text-align:center; color:#4CAF50; font-size: 40px;">🚀 Welcome to DataScribe</h1>
5
+ <p style="text-align:center; font-size: 18px; color:#333;">An AI-powered information extraction tool to streamline data retrieval and analysis.</p>
6
+ """, unsafe_allow_html=True)
7
+
8
+ st.markdown("""---""")
9
+
10
+ def feature_card(title, description, icon, page):
11
+ col1, col2 = st.columns([1, 4])
12
+ with col1:
13
+ st.markdown(f"<div style='font-size: 40px; text-align:center;'>{icon}</div>", unsafe_allow_html=True)
14
+ with col2:
15
+ if st.button(f"{title}", key=title, help=description):
16
+ st.session_state.selected_page = page
17
+ st.markdown(f"<p style='font-size: 14px; color:#555;'>{description}</p>", unsafe_allow_html=True)
18
+
19
+ col1, col2 = st.columns([1, 1])
20
+
21
+ with col1:
22
+ feature_card(
23
+ title="Upload Data",
24
+ description="Upload data from CSV or Google Sheets to get started with your extraction.",
25
+ icon="📄",
26
+ page="Upload Data"
27
+ )
28
+
29
+ with col2:
30
+ feature_card(
31
+ title="Define Custom Queries",
32
+ description="Set custom search queries for each entity in your dataset for specific information retrieval.",
33
+ icon="🔍",
34
+ page="Define Query"
35
+ )
36
+
37
+ col1, col2 = st.columns([1, 1])
38
+
39
+ with col1:
40
+ feature_card(
41
+ title="Run Automated Searches",
42
+ description="Execute automated web searches and extract relevant information using an AI-powered agent.",
43
+ icon="🤖",
44
+ page="Extract Information"
45
+ )
46
+
47
+ with col2:
48
+ feature_card(
49
+ title="View & Download Results",
50
+ description="View extracted data in a structured format and download as a CSV or update Google Sheets.",
51
+ icon="📊",
52
+ page="View & Download"
53
+ )
54
+ return True
views/upload_data.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from funcs.googlesheet import get_google_sheet_data
3
+ import pandas as pd
4
+
5
+ def CreatePage():
6
+
7
+ st.header("Upload or Connect Your Data")
8
+ data_source = st.radio("Choose data source:", ["CSV Files", "Google Sheets"])
9
+
10
+ if data_source == "CSV Files":
11
+ if "data" in st.session_state:
12
+ st.success("Data uploaded successfully! Here is a preview:")
13
+ st.dataframe(st.session_state["data"].head(10)) # Display only the first 10 rows for a cleaner view
14
+ else:
15
+ uploaded_files = st.file_uploader("Upload your CSV files", type=["csv"], accept_multiple_files=True)
16
+
17
+ if uploaded_files is not None:
18
+ dfs = []
19
+ for uploaded_file in uploaded_files:
20
+ try:
21
+ df = pd.read_csv(uploaded_file)
22
+ dfs.append(df)
23
+ except Exception as e:
24
+ st.error(f"Error reading file {uploaded_file.name}: {e}")
25
+
26
+ if dfs:
27
+ full_data = pd.concat(dfs, ignore_index=True)
28
+ st.session_state["data"] = full_data
29
+ st.success("Data uploaded successfully! Here is a preview:")
30
+ st.dataframe(full_data.head(10)) # Show preview of first 10 rows
31
+ else:
32
+ st.warning("No valid data found in the uploaded files.")
33
+
34
+ if st.button("Clear Data"):
35
+ del st.session_state["data"]
36
+ st.success("Data has been cleared!")
37
+
38
+ elif data_source == "Google Sheets":
39
+ sheet_id = st.text_input("Enter Google Sheet ID")
40
+ range_name = st.text_input("Enter the data range (e.g., Sheet1!A1:C100)")
41
+
42
+ if sheet_id and range_name:
43
+ if st.button("Fetch Data"):
44
+ with st.spinner("Fetching data from Google Sheets..."):
45
+ try:
46
+ data = get_google_sheet_data(sheet_id, range_name)
47
+ st.session_state["data"] = data
48
+ st.success("Data fetched successfully! Here is a preview:")
49
+ st.dataframe(data.head(10)) # Show preview of first 10 rows
50
+ except Exception as e:
51
+ st.error(f"Error fetching data: {e}")
52
+ else:
53
+ st.warning("Please enter both Sheet ID and Range name before fetching data.")
54
+
views/view_and_download.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import gspread
3
+ from google.oauth2.service_account import Credentials
4
+ import pandas as pd
5
+
6
+ def CreatePage():
7
+ st.header("View & Download Results")
8
+
9
+ if "results" in st.session_state and st.session_state["results"]:
10
+ results_df = pd.DataFrame(st.session_state["results"])
11
+ st.write("### Results Preview")
12
+
13
+ # Display the results preview
14
+ if "Extracted Information" in results_df.columns and "Search Results" in results_df.columns:
15
+ st.dataframe(results_df.style.map(lambda val: 'background-color: #d3f4ff' if isinstance(val, str) else '', subset=["Extracted Information", "Search Results"]))
16
+ else:
17
+ st.warning("Required columns are missing in results data.")
18
+
19
+ # Download options
20
+ download_option = st.selectbox(
21
+ "Select data to download:",
22
+ ["All Results", "Extracted Information", "Web Results"]
23
+ )
24
+
25
+ if download_option == "All Results":
26
+ data_to_download = results_df
27
+ elif download_option == "Extracted Information":
28
+ data_to_download = results_df[["Entity", "Extracted Information"]]
29
+ elif download_option == "Web Results":
30
+ data_to_download = results_df[["Entity", "Search Results"]]
31
+
32
+ st.download_button(
33
+ label=f"Download {download_option} as CSV",
34
+ data=data_to_download.to_csv(index=False),
35
+ file_name=f"{download_option.lower().replace(' ', '_')}.csv",
36
+ mime="text/csv"
37
+ )
38
+
39
+ # Option to update Google Sheets
40
+ update_option = st.selectbox(
41
+ "Do you want to update Google Sheets?",
42
+ ["No", "Yes"]
43
+ )
44
+
45
+ if update_option == "Yes":
46
+ if 'sheet_id' not in st.session_state:
47
+ st.session_state.sheet_id = ''
48
+ if 'range_name' not in st.session_state:
49
+ st.session_state.range_name = ''
50
+
51
+ # Input fields for Google Sheets ID and Range
52
+ sheet_id = st.text_input("Enter Google Sheet ID", value=st.session_state.sheet_id)
53
+ range_name = st.text_input("Enter Range (e.g., 'Sheet1!A1')", value=st.session_state.range_name)
54
+
55
+ if sheet_id and range_name:
56
+ st.session_state.sheet_id = sheet_id
57
+ st.session_state.range_name = range_name
58
+
59
+ # Prepare data for update
60
+ data_to_update = [results_df.columns.tolist()] + results_df.values.tolist()
61
+
62
+ # Update Google Sheets button
63
+ if st.button("Update Google Sheet"):
64
+ try:
65
+ if '!' not in range_name:
66
+ st.error("Invalid range format. Please use the format 'SheetName!Range'.")
67
+ else:
68
+ sheet_name, cell_range = range_name.split('!', 1)
69
+ scopes = ["https://www.googleapis.com/auth/spreadsheets"]
70
+ creds = Credentials.from_service_account_file("/Users/sam22ridhi/Desktop/data/DataScribe/credentials/credentials.json", scopes=scopes)
71
+ client = gspread.authorize(creds)
72
+ sheet = client.open_by_key(sheet_id).worksheet(sheet_name)
73
+ sheet.clear()
74
+ sheet.update(f"{cell_range}", data_to_update)
75
+ st.success("Data updated in the Google Sheet!")
76
+ except Exception as e:
77
+ st.error(f"Error updating Google Sheet: {e}")
78
+ else:
79
+ st.warning("Please enter both the Sheet ID and Range name before updating.")
80
+ else:
81
+ st.warning("No results available to view. Please run the extraction process.")