Spaces:
Sleeping
Sleeping
# Import the necessary libraries | |
import subprocess | |
import sys | |
# Function to install a package using pip | |
def install(package): | |
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) | |
# Install required packages | |
try: | |
install("gradio") | |
install("openai==1.23.2") | |
install("tiktoken==0.6.0") | |
install("pypdf==4.0.1") | |
install("langchain==0.1.1") | |
install("langchain-community==0.0.13") | |
install("chromadb==0.4.22") | |
install("sentence-transformers==2.3.1") | |
except subprocess.CalledProcessError as e: | |
print(f"An error occurred: {e}") | |
import gradio as gr | |
import os | |
import uuid | |
import json | |
import pandas as pd | |
import subprocess | |
from openai import OpenAI | |
from huggingface_hub import HfApi | |
from huggingface_hub import CommitScheduler | |
from huggingface_hub import hf_hub_download | |
import zipfile | |
from langchain_community.vectorstores import Chroma | |
from langchain_community.embeddings.sentence_transformer import ( | |
SentenceTransformerEmbeddings | |
) | |
#from google.colab import userdata, drive | |
from pathlib import Path | |
from langchain.document_loaders import PyPDFDirectoryLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import tiktoken | |
# Create Client | |
#os.environ['HUGGINGFACE_TOKEN'] = hf_token | |
#api = HfApi(token=hf_token) | |
# Define the embedding model and the vectorstore | |
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large') | |
#""" If dataset directory exixts, remove it and all of the contents within | |
""" | |
if os.path.exists('dataset'): | |
!rm -rf dataset | |
# If collection_db exists, remove it and all of the contents within | |
if os.path.exists('collection_db'): | |
!rm -rf dataset | |
""" | |
print(f"about to unzio") | |
# Command to unzip the file | |
command = "unzip Dataset-10k.zip -d dataset" | |
# Execute the command | |
try: | |
subprocess.run(command, check=True, shell=True) | |
except subprocess.CalledProcessError as e: | |
print(f"An error occurred: {e}") | |