suriya7 commited on
Commit
7dfa9c0
1 Parent(s): 65f6708

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -5
app.py CHANGED
@@ -5,7 +5,8 @@ from dotenv import load_dotenv
5
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
  from llama_index.core import Settings
7
  import os
8
- import base64
 
9
  import time
10
 
11
  # Load environment variables
@@ -46,7 +47,32 @@ def data_ingestion():
46
  storage_context = StorageContext.from_defaults()
47
  index = VectorStoreIndex.from_documents(documents,show_progress=True)
48
  index.storage_context.persist(persist_dir=PERSIST_DIR)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
 
 
 
 
 
 
 
 
 
 
50
  def handle_query(query):
51
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
52
  index = load_index_from_storage(storage_context)
@@ -96,12 +122,22 @@ for message in st.session_state.messages:
96
  with st.sidebar:
97
  st.title("Menu:")
98
  uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
 
99
  if st.button("Submit & Process"):
100
  with st.spinner("Processing..."):
101
- filepath = "data/saved_pdf.pdf"
102
- with open(filepath, "wb") as f:
103
- f.write(uploaded_file.getbuffer())
104
- # displayPDF(filepath) # Display the uploaded PDF
 
 
 
 
 
 
 
 
 
105
  data_ingestion() # Process PDF every time new file is uploaded
106
  st.success("Done")
107
 
 
5
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
  from llama_index.core import Settings
7
  import os
8
+ from youtube_transcript_api import YouTubeTranscriptApi
9
+ import shutil
10
  import time
11
 
12
  # Load environment variables
 
47
  storage_context = StorageContext.from_defaults()
48
  index = VectorStoreIndex.from_documents(documents,show_progress=True)
49
  index.storage_context.persist(persist_dir=PERSIST_DIR)
50
+
51
+ def extract_transcript_details(youtube_video_url):
52
+ try:
53
+ video_id=youtube_video_url.split("=")[1]
54
+
55
+ transcript_text=YouTubeTranscriptApi.get_transcript(video_id)
56
+
57
+ transcript = ""
58
+ for i in transcript_text:
59
+ transcript += " " + i["text"]
60
+
61
+ return transcript
62
+
63
+ except Exception as e:
64
+ st.error(e)
65
 
66
+ def remove_old_files():
67
+ # Specify the directory path you want to clear
68
+ directory_path = "data"
69
+
70
+ # Remove all files and subdirectories in the specified directory
71
+ shutil.rmtree(directory_path)
72
+
73
+ # Recreate an empty directory if needed
74
+ os.makedirs(directory_path)
75
+
76
  def handle_query(query):
77
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
78
  index = load_index_from_storage(storage_context)
 
122
  with st.sidebar:
123
  st.title("Menu:")
124
  uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
125
+ video_url = st.text_input("Enter Youtube Video Link: ")
126
  if st.button("Submit & Process"):
127
  with st.spinner("Processing..."):
128
+ if len(os.listdir("data")) !=0:
129
+ remove_old_files()
130
+
131
+ if uploaded_file:
132
+ filepath = "data/saved_pdf.pdf"
133
+ with open(filepath, "wb") as f:
134
+ f.write(uploaded_file.getbuffer())
135
+
136
+ if video_url:
137
+ extracted_text = extract_transcript_details(video_url)
138
+ with open("data/saved_text.txt", "w") as file:
139
+ file.write(extracted_text)
140
+
141
  data_ingestion() # Process PDF every time new file is uploaded
142
  st.success("Done")
143