LangchainBot-space-creator

Running

App Files Files Community

LangchainBot-space-creator / app.py

ysharma HF staff

update librarries

1e192c1 over 1 year ago

raw history blame

No virus

6.13 kB

	from langchain.llms import OpenAI
	from langchain.chains.qa_with_sources import load_qa_with_sources_chain
	from langchain.docstore.document import Document
	import requests
	import pathlib
	import subprocess
	import tempfile
	import os
	import gradio as gr
	import pickle
	from huggingface_hub import HfApi, upload_folder
	from huggingface_hub import whoami, list_models

	# using a vector space for our search
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores.faiss import FAISS
	from langchain.text_splitter import CharacterTextSplitter


	#Code for extracting the markdown fies from a Repo
	#To get markdowns from github for any/your repo
	def get_github_docs(repo_link):
	repo_owner, repo_name = repo_link.split('/')[-2], repo_link.split('/')[-1]

	with tempfile.TemporaryDirectory() as d:
	subprocess.check_call(
	f"git clone https://github.com/{repo_owner}/{repo_name}.git .",
	cwd=d,
	shell=True,
	)
	git_sha = (
	subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
	.decode("utf-8")
	.strip()
	)
	repo_path = pathlib.Path(d)
	markdown_files = list(repo_path.rglob("*.md")) + list(
	repo_path.rglob("*.mdx")
	)
	for markdown_file in markdown_files:
	try:
	with open(markdown_file, "r") as f:
	relative_path = markdown_file.relative_to(repo_path)
	github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}"
	yield Document(page_content=f.read(), metadata={"source": github_url})
	except FileNotFoundError:
	print(f"Could not open file: {markdown_file}")

	#Code for creating a new space for the user
	def create_space(repo_link, hf_token):
	print("*********INSIDE CREATE SPACE*************")
	repo_name = repo_link.split('/')[-1]
	api = HfApi(token=hf_token)
	repo_url = api.create_repo(
	repo_id=f'LangChain_{repo_name}Bot', #example - ysharma/LangChain_GradioBot
	repo_type="space",
	space_sdk="gradio",
	private=False)

	#Code for creating the search index
	#Saving search index to disk
	def create_search_index(repo_link, openai_api_key):
	print("*********INSIDE CREATE SEARCH INDEX*************")
	#openai = OpenAI(temperature=0, openai_api_key=openai_api_key )
	sources = get_github_docs(repo_link) #"gradio-app", "gradio"
	source_chunks = []
	splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0)
	for source in sources:
	for chunk in splitter.split_text(source.page_content):
	source_chunks.append(Document(page_content=chunk, metadata=source.metadata))

	search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings(openai_api_key=openai_api_key))

	#saving FAISS search index to disk
	with open("search_index.pickle", "wb") as f:
	pickle.dump(search_index, f)
	return "search_index.pickle"

	def upload_files_to_space(repo_link, hf_token):
	print("*********INSIDE UPLOAD FILES TO SPACE*************")
	repo_name = repo_link.split('/')[-1]
	#Replacing the repo namein app.py
	with open("template/app_og.py", "r") as f:
	app = f.read()
	app = app.replace("$RepoName", reponame)
	#app = app.replace("$space_id", whoami(token=token)["name"] + "/" + model_id.split("/")[-1])

	#Saving the new app.py file to disk
	with open("template/app.py", "w") as f:
	f.write(app)

	#Uploading the new app.py to the new space
	api.upload_file(
	path_or_fileobj = "template/app.py",
	path_in_repo = "app.py",
	repo_id = f'LangChain_{repo_name}Bot', #model_id,
	token = hf_token,
	repo_type="space",)
	#Uploading the new search_index file to the new space
	api.upload_file(
	path_or_fileobj = "search_index.pickle",
	path_in_repo = "search_index.pickle",
	repo_id = f'LangChain_{repo_name}Bot', #model_id,
	token = hf_token,
	repo_type="space",)
	#Upload requirements.txt to the space
	api.upload_file(
	path_or_fileobj="template/requirements.txt",
	path_in_repo="requirements.txt",
	repo_id=model_id,
	token=token,
	repo_type="space",)
	#Deleting the files - search_index and app.py file
	os.remove("template/app.py")
	os.remove("search_index.pickle")

	user_name = whoami(token=hf_token)['name']
	repo_url = f"https://huggingface.co/spaces/{user_name}/LangChain_{repo_name}Bot"
	space_name = f"{user_name}/LangChain_{repo_name}Bot"
	return f"Successfully created the Chatbot at: <a href="+ repo_url + " target='_blank'>" + space_name + "</a>"

	def driver(repo_link, hf_token):
	#create search index openai_api_key=openai_api_key
	#search_index_pickle = create_search_index(repo_link, openai_api_key)
	#create a new space
	print("*********INSIDE DRIVER*************")
	create_space(repo_link, hf_token)
	#upload files to the new space
	html_tag = upload_files_to_space(repo_link, hf_token)
	print(f"html tag is : {html_tag}")
	return html_tag



	#Gradio code for Repo as input and search index as output file
	with gr.Blocks() as demo:
	with gr.Row():
	repo_link = gr.Textbox(label="Enter Github repo name")
	hf_token_in = gr.Textbox(type='password', label="Enter hf-token name")
	openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
	with gr.Row():
	btn_faiss = gr.Button("Create Search index")
	btn_create_space = gr.Button("Create YOur Chatbot")
	html_out = gr.HTML()
	search_index_file = gr.File()
	btn_faiss.click(create_search_index, [repo_link, openai_api_key],search_index_file )
	btn_create_space.click(driver, [repo_link, hf_token_in], html_out)

	demo.queue()
	demo.launch(debug=True)