engrphoenix commited on
Commit
6cbadc6
·
verified ·
1 Parent(s): 7f034fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -65
app.py CHANGED
@@ -1,17 +1,26 @@
1
  import streamlit as st
2
  from PyPDF2 import PdfReader
3
- import pandas as pd
4
- from transformers import pipeline
5
- import random
 
6
 
7
- # Load the Hugging Face model for text generation and summarization (FLAN-T5 or T5-Small)
8
  @st.cache_resource
9
  def load_text_generator():
10
- return pipeline("text2text-generation", model="google/flan-t5-base") # Efficient and professional model
 
 
 
 
 
 
 
11
 
12
  text_generator = load_text_generator()
 
13
 
14
- # Function to extract text from a PDF file
15
  def extract_pdf_content(pdf_file):
16
  reader = PdfReader(pdf_file)
17
  content = ""
@@ -19,84 +28,88 @@ def extract_pdf_content(pdf_file):
19
  content += page.extract_text()
20
  return content
21
 
22
- # Function to extract content from a text file
23
- def extract_text_file(file):
24
- return file.read().decode("utf-8")
 
 
 
 
 
 
 
 
 
 
25
 
26
- # Function to load a CSV file
27
- def read_csv_file(file):
28
- df = pd.read_csv(file)
29
- return df.to_string()
 
 
30
 
31
- # Function to search for a topic in the extracted content
32
- def search_topic_in_content(content, topic):
33
- sentences = content.split(".") # Break content into sentences
34
- topic_sentences = [s for s in sentences if topic.lower() in s.lower()] # Filter sentences containing the topic
35
- return ". ".join(topic_sentences) if topic_sentences else None
36
 
37
- # Function to generate structured content using Hugging Face model
38
  def generate_professional_content(topic):
39
- prompt = f"Explain '{topic}' in bullet points, highlighting the key concepts, examples, and applications in a professional manner for electrical engineering students."
40
  response = text_generator(prompt, max_length=300, num_return_sequences=1)
41
  return response[0]['generated_text']
42
 
43
- # Function to generate a quiz question
44
- def generate_quiz(topic):
45
- questions = [
46
- f"What is the fundamental principle of {topic}?",
47
- f"Name a practical application of {topic}.",
48
- f"What are the key equations associated with {topic}?",
49
- f"Describe how {topic} is used in real-world scenarios.",
50
- f"List common problems and solutions related to {topic}.",
51
- ]
52
- return random.choice(questions)
53
-
54
- # Streamlit App
55
- st.title("Generative AI for Electrical Engineering Education")
56
- st.sidebar.header("AI-Based Tutor")
57
 
58
  # File upload section
59
- uploaded_file = st.sidebar.file_uploader("Upload Study Material (PDF/TXT/CSV)", type=["pdf", "txt", "csv"])
60
- topic = st.sidebar.text_input("Enter a topic (e.g., Newton's Third Law, DC Motors)")
61
 
62
- # Process uploaded file
63
- content = ""
64
  if uploaded_file:
65
- file_type = uploaded_file.name.split(".")[-1]
 
 
66
 
67
- if file_type == "pdf":
68
- content = extract_pdf_content(uploaded_file)
69
- elif file_type == "txt":
70
- content = extract_text_file(uploaded_file)
71
- elif file_type == "csv":
72
- content = read_csv_file(uploaded_file)
73
 
74
- st.sidebar.success(f"{uploaded_file.name} uploaded successfully!")
75
- st.write("**Extracted Content from File:**")
76
- st.write(content[:1000] + "...") # Display a snippet of the content
 
 
77
 
78
  # Generate study material
79
  if st.button("Generate Study Material"):
80
  if topic:
81
  st.header(f"Study Material: {topic}")
82
- # Extract relevant content from the uploaded material
83
- filtered_content = search_topic_in_content(content, topic) if content else ""
84
- if filtered_content:
85
- st.write("**Relevant Extracted Content from Uploaded Material:**")
86
- st.write(filtered_content)
 
 
 
 
 
87
  else:
88
- st.warning("No relevant content found in the uploaded material. Generating AI-based content instead.")
89
- ai_content = generate_professional_content(topic)
90
- st.write("**AI-Generated Content:**")
91
- st.write(ai_content)
 
 
92
  else:
93
  st.warning("Please enter a topic!")
94
 
95
- # Generate quiz
96
- if st.button("Generate Quiz"):
97
- if topic:
98
- st.header("Quiz Question")
99
- question = generate_quiz(topic)
100
- st.write(question)
101
- else:
102
- st.warning("Please enter a topic!")
 
1
  import streamlit as st
2
  from PyPDF2 import PdfReader
3
+ from transformers import pipeline, AutoTokenizer, AutoModel
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ import faiss
6
+ import numpy as np
7
 
8
+ # Load the Hugging Face model for text generation
9
  @st.cache_resource
10
  def load_text_generator():
11
+ return pipeline("text2text-generation", model="google/flan-t5-base")
12
+
13
+ # Load the Hugging Face model for embeddings
14
+ @st.cache_resource
15
+ def load_embedding_model():
16
+ tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
17
+ model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
18
+ return tokenizer, model
19
 
20
  text_generator = load_text_generator()
21
+ embedding_tokenizer, embedding_model = load_embedding_model()
22
 
23
+ # Function to extract text from PDF
24
  def extract_pdf_content(pdf_file):
25
  reader = PdfReader(pdf_file)
26
  content = ""
 
28
  content += page.extract_text()
29
  return content
30
 
31
+ # Function to split content into chunks
32
+ def chunk_text(text, chunk_size=500):
33
+ words = text.split()
34
+ return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
35
+
36
+ # Function to compute embeddings
37
+ def compute_embeddings(text_chunks):
38
+ embeddings = []
39
+ for chunk in text_chunks:
40
+ inputs = embedding_tokenizer(chunk, return_tensors="pt", truncation=True, padding=True)
41
+ outputs = embedding_model(**inputs)
42
+ embeddings.append(outputs.pooler_output.detach().numpy()[0])
43
+ return np.array(embeddings)
44
 
45
+ # Function to build FAISS index
46
+ def build_faiss_index(embeddings):
47
+ dimension = embeddings.shape[1]
48
+ index = faiss.IndexFlatL2(dimension) # L2 distance for similarity
49
+ index.add(embeddings)
50
+ return index
51
 
52
+ # Function to search in FAISS index
53
+ def search_faiss_index(index, query_embedding, text_chunks, top_k=3):
54
+ distances, indices = index.search(query_embedding, top_k)
55
+ return [(text_chunks[idx], distances[0][i]) for i, idx in enumerate(indices[0])]
 
56
 
57
+ # Function to generate structured content
58
  def generate_professional_content(topic):
59
+ prompt = f"Explain '{topic}' in bullet points, highlighting key concepts, examples, and applications."
60
  response = text_generator(prompt, max_length=300, num_return_sequences=1)
61
  return response[0]['generated_text']
62
 
63
+ # Function to compute query embedding
64
+ def compute_query_embedding(query):
65
+ inputs = embedding_tokenizer(query, return_tensors="pt", truncation=True, padding=True)
66
+ outputs = embedding_model(**inputs)
67
+ return outputs.pooler_output.detach().numpy()
68
+
69
+ # Streamlit app
70
+ st.title("Generative AI for Electrical Engineering Education with FAISS")
71
+ st.sidebar.header("AI-Based Tutor with Vector Search")
 
 
 
 
 
72
 
73
  # File upload section
74
+ uploaded_file = st.sidebar.file_uploader("Upload Study Material (PDF)", type=["pdf"])
75
+ topic = st.sidebar.text_input("Enter a topic (e.g., Newton's Third Law)")
76
 
 
 
77
  if uploaded_file:
78
+ # Extract and process file content
79
+ content = extract_pdf_content(uploaded_file)
80
+ st.sidebar.success(f"{uploaded_file.name} uploaded successfully!")
81
 
82
+ # Chunk and compute embeddings
83
+ chunks = chunk_text(content)
84
+ embeddings = compute_embeddings(chunks)
 
 
 
85
 
86
+ # Build FAISS index
87
+ index = build_faiss_index(embeddings)
88
+
89
+ st.write("**File Processed and Indexed for Search**")
90
+ st.write(f"Total chunks created: {len(chunks)}")
91
 
92
  # Generate study material
93
  if st.button("Generate Study Material"):
94
  if topic:
95
  st.header(f"Study Material: {topic}")
96
+
97
+ # Compute query embedding
98
+ query_embedding = compute_query_embedding(topic)
99
+
100
+ # Search FAISS index
101
+ if uploaded_file:
102
+ results = search_faiss_index(index, query_embedding, chunks, top_k=3)
103
+ st.write("**Relevant Content from Uploaded File:**")
104
+ for result, distance in results:
105
+ st.write(f"- {result} (Similarity: {distance:.2f})")
106
  else:
107
+ st.warning("No file uploaded. Generating AI-based content instead.")
108
+
109
+ # Generate AI content
110
+ ai_content = generate_professional_content(topic)
111
+ st.write("**AI-Generated Content:**")
112
+ st.write(ai_content)
113
  else:
114
  st.warning("Please enter a topic!")
115