Spaces:
Sleeping
Sleeping
Ayush Mangal
commited on
Commit
•
9b89f54
1
Parent(s):
e9240f6
Add resume loader
Browse files- app.py +14 -1
- requirements.txt +2 -1
app.py
CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
|
|
2 |
from langchain import PromptTemplate
|
3 |
from langchain.llms import Replicate
|
4 |
import os
|
5 |
-
from langchain.document_loaders import YoutubeLoader
|
6 |
import requests
|
7 |
import re
|
8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -68,6 +68,19 @@ def get_query_chain():
|
|
68 |
loader = WebBaseLoader(links)
|
69 |
data = loader.load()
|
70 |
video_data.extend(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
# print(data)
|
72 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 0)
|
73 |
all_splits = text_splitter.split_documents(video_data)
|
|
|
2 |
from langchain import PromptTemplate
|
3 |
from langchain.llms import Replicate
|
4 |
import os
|
5 |
+
from langchain.document_loaders import YoutubeLoader, PyPDFLoader
|
6 |
import requests
|
7 |
import re
|
8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
68 |
loader = WebBaseLoader(links)
|
69 |
data = loader.load()
|
70 |
video_data.extend(data)
|
71 |
+
|
72 |
+
url = 'https://huggingface.co/spaces/ayushtues/personal-assistant/resolve/main/resume.pdf'
|
73 |
+
r = requests.get(url, stream=True)
|
74 |
+
|
75 |
+
with open('resume.pdf', 'wb') as fd:
|
76 |
+
for chunk in r.iter_content(2000):
|
77 |
+
fd.write(chunk)
|
78 |
+
|
79 |
+
loader = PyPDFLoader("resume.pdf")
|
80 |
+
pages = loader.load()
|
81 |
+
video_data.extend(pages)
|
82 |
+
|
83 |
+
|
84 |
# print(data)
|
85 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 0)
|
86 |
all_splits = text_splitter.split_documents(video_data)
|
requirements.txt
CHANGED
@@ -8,4 +8,5 @@ sentence_transformers
|
|
8 |
youtube-transcript-api
|
9 |
pytube
|
10 |
bs4
|
11 |
-
lxml
|
|
|
|
8 |
youtube-transcript-api
|
9 |
pytube
|
10 |
bs4
|
11 |
+
lxml
|
12 |
+
pypdf
|