kgauvin603's picture
Update app.py
5f846a8 verified
raw
history blame
1.94 kB
# Import the necessary libraries
import subprocess
import sys
# Function to install a package using pip
def install(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
# Install required packages
try:
install("gradio")
install("openai==1.23.2")
install("tiktoken==0.6.0")
install("pypdf==4.0.1")
install("langchain==0.1.1")
install("langchain-community==0.0.13")
install("chromadb==0.4.22")
install("sentence-transformers==2.3.1")
except subprocess.CalledProcessError as e:
print(f"An error occurred: {e}")
import gradio as gr
import os
import uuid
import json
import pandas as pd
import subprocess
from openai import OpenAI
from huggingface_hub import HfApi
from huggingface_hub import CommitScheduler
from huggingface_hub import hf_hub_download
import zipfile
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.sentence_transformer import (
SentenceTransformerEmbeddings
)
#from google.colab import userdata, drive
from pathlib import Path
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import tiktoken
# Create Client
#os.environ['HUGGINGFACE_TOKEN'] = hf_token
#api = HfApi(token=hf_token)
# Define the embedding model and the vectorstore
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
#""" If dataset directory exixts, remove it and all of the contents within
"""
if os.path.exists('dataset'):
!rm -rf dataset
# If collection_db exists, remove it and all of the contents within
if os.path.exists('collection_db'):
!rm -rf dataset
"""
print(f"about to unzio")
# Command to unzip the file
command = "unzip Dataset-10k.zip -d dataset"
# Execute the command
try:
subprocess.run(command, check=True, shell=True)
except subprocess.CalledProcessError as e:
print(f"An error occurred: {e}")