Spaces:
Build error
Build error
import streamlit as st | |
import latex2markdown | |
from langchain.docstore.document import Document | |
import chromadb | |
from chromadb.config import Settings | |
import load_model | |
from load_vectors import load_from_file, load_and_split, create_and_add, load_from_web | |
persist_directory = load_model.persist_directory | |
def format_document(document: Document): | |
"""TODO: Implement a nice style""" | |
return document.dict() | |
def format_result_set(result): | |
st.write(latex2markdown.LaTeX2Markdown(result["result"]).to_markdown()) | |
agree = st.checkbox('Show source documents') | |
source_documents = result["source_documents"] | |
if agree: | |
st.write('Source Documents:') | |
for document in source_documents: | |
st.write(format_document(document)) | |
def get_chroma_client(): | |
return chromadb.Client(Settings(chroma_db_impl="duckdb+parquet", | |
persist_directory=persist_directory | |
)) | |
def retrieve_collections(): | |
client = get_chroma_client() | |
collections = tuple( [collection.name for collection in client.list_collections()] ) | |
return collections | |
def load_files(): | |
client = get_chroma_client() | |
option = st.radio( | |
"", | |
options=["Add Documents", "Start new collection"], | |
) | |
collections = retrieve_collections() | |
if option == "Add Documents": | |
selected_collection = st.selectbox( | |
'Add to exsisting collection or create a new one', | |
collections ) | |
if st.button('Delete Collection (⚠️ This is destructive and not reversible)'): | |
client.delete_collection(name=selected_collection) | |
retrieve_collections.clear() | |
collections = retrieve_collections() | |
option = st.radio( | |
"", | |
options=["Upload Files", "Download Files"], | |
) | |
if option == "Upload Files": | |
st.write('Source Documents:') | |
uploaded_files = st.file_uploader("Choose a PDF file", accept_multiple_files=True) | |
chunk_size = st.text_area('chunk Size:', 1000) | |
if st.button('Upload'): | |
docs = load_from_file(uploaded_files) | |
sub_docs = load_and_split(docs, chunk_size=int(chunk_size)) | |
create_and_add(selected_collection, sub_docs, "hkunlp/instructor-large") | |
uploaded_files=None | |
else: | |
st.write('Source Documents (Comma separated):') | |
urls = chunk_size = st.text_area('Urls:', '') | |
chunk_size = st.text_area('chunk Size:', 1000) | |
urls = urls.replace(",", "" ).replace('"', "" ).split(',') | |
if st.button('Upload'): | |
docs = load_from_web(urls) | |
sub_docs = load_and_split(docs, chunk_size=int(chunk_size)) | |
create_and_add(selected_collection, sub_docs, "hkunlp/instructor-large") | |
uploaded_files=None | |
else: | |
collection = st.text_area('Name of your new collection:', '') | |
if st.button('Create'): | |
if len(collection)>3: | |
client.create_collection(collection) #collection_name + "_" + re.sub('[^A-Za-z0-9]+', '', model_name) --Problem i added the model to the name -> Better use Metadata :) | |
retrieve_collections.clear() | |
st.write("Collection " +collection+" succesfully created.") |