Spaces:

kgauvin603
/

rag-10k-analysis

Sleeping

App Files Files Community

rag-10k-analysis / app.py

kgauvin603

Update app.py

5f846a8 verified 9 months ago

raw

history blame

1.94 kB

	# Import the necessary libraries
	import subprocess
	import sys

	# Function to install a package using pip
	def install(package):
	subprocess.check_call([sys.executable, "-m", "pip", "install", package])

	# Install required packages
	try:
	install("gradio")
	install("openai==1.23.2")
	install("tiktoken==0.6.0")
	install("pypdf==4.0.1")
	install("langchain==0.1.1")
	install("langchain-community==0.0.13")
	install("chromadb==0.4.22")
	install("sentence-transformers==2.3.1")
	except subprocess.CalledProcessError as e:
	print(f"An error occurred: {e}")

	import gradio as gr
	import os
	import uuid
	import json
	import pandas as pd
	import subprocess
	from openai import OpenAI
	from huggingface_hub import HfApi
	from huggingface_hub import CommitScheduler
	from huggingface_hub import hf_hub_download
	import zipfile
	from langchain_community.vectorstores import Chroma
	from langchain_community.embeddings.sentence_transformer import (
	SentenceTransformerEmbeddings
	)
	#from google.colab import userdata, drive
	from pathlib import Path
	from langchain.document_loaders import PyPDFDirectoryLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	import tiktoken

	# Create Client
	#os.environ['HUGGINGFACE_TOKEN'] = hf_token
	#api = HfApi(token=hf_token)

	# Define the embedding model and the vectorstore
	embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')

	#""" If dataset directory exixts, remove it and all of the contents within
	"""
	if os.path.exists('dataset'):
	!rm -rf dataset

	# If collection_db exists, remove it and all of the contents within

	if os.path.exists('collection_db'):
	!rm -rf dataset
	"""
	print(f"about to unzio")
	# Command to unzip the file
	command = "unzip Dataset-10k.zip -d dataset"

	# Execute the command
	try:
	subprocess.run(command, check=True, shell=True)
	except subprocess.CalledProcessError as e:
	print(f"An error occurred: {e}")