Spaces:
Build error
Build error
File size: 3,002 Bytes
98639ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import streamlit as st
from langchain.docstore.document import Document
from chromadb.config import Settings
from load_model import load_embedding
from load_vectors import load_from_file, load_and_split, create_and_add, load_from_web
from utils import retrieve_collections, get_chroma_client
def llm_module():
pass
def load_files():
client = get_chroma_client()
option = st.radio(
"",
options=["Add Documents", "Start new collection"],
)
if option == "Add Documents":
collections = retrieve_collections()
selected_collection = st.selectbox(
'Add to exsisting collection or create a new one',
collections )
if st.button('Delete Collection (⚠️ This is destructive and not reversible)'):
client.delete_collection(name=selected_collection["name"])
#retrieve_collections.clear()
collections = retrieve_collections()
if selected_collection:
st.write("Selected Vectorstore:", selected_collection)
option = st.radio(
"",
options=["Upload Files from Local", "Upload Files from Web"],
)
if option == "Upload Files from Local":
st.write('Source Documents:')
uploaded_files = st.file_uploader("Choose a PDF file", accept_multiple_files=True)
chunk_size = st.text_area('chunk Size:', 1000)
if st.button('Upload'):
docs = load_from_file(uploaded_files)
sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
vec1 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata'])
st.write("Upload succesful")
else:
st.write('Urls of Source Documents (Comma separated):')
urls = chunk_size = st.text_area('Urls:', '')
chunk_size = st.text_area('chunk Size:', 1000)
urls = urls.replace(",", "" ).replace('"', "" ).split(',')
if st.button('Upload'):
docs = load_from_web(urls)
sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
vec2 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata'])
st.write("Upload succesful")
else:
collection = st.text_area('Name of your new collection:', '')
model_name = st.text_area('Choose the embedding function:', "hkunlp/instructor-large")
if st.button('Create'):
if len(collection)>3:
ef = load_embedding(model_name)
metadata= {"loaded_docs":[], "Subject":"Terms Example", "model_name": ef.model_name}
client.create_collection(collection, embedding_function=ef, metadata=metadata)
# retrieve_collections.clear()
st.write("Collection " +collection+" succesfully created.") |