LangchainBot-space-creator

Running

File size: 6,266 Bytes

from langchain.llms import OpenAI
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.docstore.document import Document
import requests
import pathlib
import subprocess
import tempfile
import os
import gradio as gr
import pickle
from huggingface_hub import HfApi, upload_folder
from huggingface_hub import whoami, list_models

# using a vector space for our search
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.text_splitter import CharacterTextSplitter


#Code for extracting the markdown fies from a Repo
#To get markdowns from github for any/your repo
def get_github_docs(repo_link):
    repo_owner, repo_name = repo_link.split('/')[-2], repo_link.split('/')[-1]

    with tempfile.TemporaryDirectory() as d:
        subprocess.check_call(
            f"git clone https://github.com/{repo_owner}/{repo_name}.git .",
            cwd=d,
            shell=True,
        )
        git_sha = (
            subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
            .decode("utf-8")
            .strip()
        )
        repo_path = pathlib.Path(d)
        markdown_files = list(repo_path.rglob("*.md")) + list(
            repo_path.rglob("*.mdx")
        )
        for markdown_file in markdown_files:
            try:
                with open(markdown_file, "r") as f:
                    relative_path = markdown_file.relative_to(repo_path)
                    github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}"
                    yield Document(page_content=f.read(), metadata={"source": github_url})
            except FileNotFoundError:
                print(f"Could not open file: {markdown_file}")

#Code for  creating a new space for the user 
def create_space(repo_link, hf_token):
    print("***********INSIDE CREATE SPACE***************")
    repo_name = repo_link.split('/')[-1]
    api = HfApi(token=hf_token)
    repo_url = api.create_repo(
                repo_id=f'LangChain_{repo_name}Bot',  #example - ysharma/LangChain_GradioBot
                exist_ok = True,
                repo_type="space",
                space_sdk="gradio",
                private=False)

#Code for creating the search index
#Saving search index to disk
def create_search_index(repo_link, openai_api_key):
    print("***********INSIDE CREATE SEARCH INDEX***************")
    #openai = OpenAI(temperature=0, openai_api_key=openai_api_key )
    sources = get_github_docs(repo_link)  #"gradio-app", "gradio"
    source_chunks = []
    splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0)
    for source in sources:
        for chunk in splitter.split_text(source.page_content):
            source_chunks.append(Document(page_content=chunk, metadata=source.metadata))
            
    search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings(openai_api_key=openai_api_key)) 

    #saving FAISS search index to disk
    with open("search_index.pickle", "wb") as f:
            pickle.dump(search_index, f)
    return "search_index.pickle"

def upload_files_to_space(repo_link, hf_token):
    print("***********INSIDE UPLOAD FILES TO SPACE***************")
    repo_name = repo_link.split('/')[-1]
    api = HfApi(token=hf_token)
    user_name = whoami(token=hf_token)['name']
    
    #Replacing the repo namein app.py 
    with open("template/app_og.py", "r") as f:
        app = f.read()
    app = app.replace("$RepoName", repo_name)
    #app = app.replace("$space_id", whoami(token=token)["name"] + "/" + model_id.split("/")[-1])
    
    #Saving the new app.py file to disk 
    with open("template/app.py", "w") as f:
        f.write(app)
    
    #Uploading the new app.py to the new space 
    api.upload_file(
                path_or_fileobj = "template/app.py",
                path_in_repo = "app.py",
                repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
                token = hf_token,
                repo_type="space",)
    #Uploading the new search_index file to the new space
    api.upload_file(
                path_or_fileobj = "search_index.pickle",
                path_in_repo = "search_index.pickle",
                repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
                token = hf_token,
                repo_type="space",)
    #Upload requirements.txt to the space
    api.upload_file(
                path_or_fileobj="template/requirements.txt",
                path_in_repo="requirements.txt",
                repo_id=f'{user_name}/LangChain_{repo_name}Bot', #model_id,
                token=token,
                repo_type="space",)
    #Deleting the files - search_index and app.py file
    os.remove("template/app.py")
    os.remove("search_index.pickle")

    repo_url = f"https://huggingface.co/spaces/{user_name}/LangChain_{repo_name}Bot"
    space_name = f"{user_name}/LangChain_{repo_name}Bot"
    return f"Successfully created the Chatbot at: <a href="+ repo_url + " target='_blank'>" + space_name + "</a>"

def driver(repo_link, hf_token):
    #create search index openai_api_key=openai_api_key
    #search_index_pickle = create_search_index(repo_link, openai_api_key)
    #create a new space
    print("***********INSIDE DRIVER***************")
    create_space(repo_link, hf_token)
    #upload files to the new space
    html_tag = upload_files_to_space(repo_link, hf_token)
    print(f"html tag is : {html_tag}")
    return html_tag
    
    

#Gradio code for Repo as input and search index as output file 
with gr.Blocks() as demo:
    with gr.Row():
        repo_link = gr.Textbox(label="Enter Github repo name")
        hf_token_in = gr.Textbox(type='password', label="Enter hf-token name")
        openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
    with gr.Row():
        btn_faiss = gr.Button("Create Search index")
        btn_create_space = gr.Button("Create YOur Chatbot")
    html_out = gr.HTML()
    search_index_file = gr.File()
    btn_faiss.click(create_search_index, [repo_link, openai_api_key],search_index_file )
    btn_create_space.click(driver, [repo_link, hf_token_in], html_out)

demo.queue()
demo.launch(debug=True)