|
from langchain.llms import OpenAI |
|
from langchain.chains.qa_with_sources import load_qa_with_sources_chain |
|
from langchain.docstore.document import Document |
|
import requests |
|
import pathlib |
|
import subprocess |
|
import tempfile |
|
import os |
|
import gradio as gr |
|
import pickle |
|
from huggingface_hub import HfApi, upload_folder |
|
from huggingface_hub import whoami, list_models |
|
|
|
|
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.vectorstores.faiss import FAISS |
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
|
|
|
|
|
|
|
def get_github_docs(repo_link): |
|
repo_owner, repo_name = repo_link.split('/')[-2], repo_link.split('/')[-1] |
|
|
|
with tempfile.TemporaryDirectory() as d: |
|
subprocess.check_call( |
|
f"git clone https://github.com/{repo_owner}/{repo_name}.git .", |
|
cwd=d, |
|
shell=True, |
|
) |
|
git_sha = ( |
|
subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d) |
|
.decode("utf-8") |
|
.strip() |
|
) |
|
repo_path = pathlib.Path(d) |
|
markdown_files = list(repo_path.rglob("*.md")) + list( |
|
repo_path.rglob("*.mdx") |
|
) |
|
for markdown_file in markdown_files: |
|
try: |
|
with open(markdown_file, "r") as f: |
|
relative_path = markdown_file.relative_to(repo_path) |
|
github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}" |
|
yield Document(page_content=f.read(), metadata={"source": github_url}) |
|
except FileNotFoundError: |
|
print(f"Could not open file: {markdown_file}") |
|
|
|
|
|
def create_space(repo_link, hf_token): |
|
print("***********INSIDE CREATE SPACE***************") |
|
repo_name = repo_link.split('/')[-1] |
|
api = HfApi(token=hf_token) |
|
repo_url = api.create_repo( |
|
repo_id=f'LangChain_{repo_name}Bot', |
|
exist_ok = True, |
|
repo_type="space", |
|
space_sdk="gradio", |
|
private=False) |
|
|
|
|
|
|
|
def create_search_index(repo_link, openai_api_key): |
|
print("***********INSIDE CREATE SEARCH INDEX***************") |
|
|
|
sources = get_github_docs(repo_link) |
|
source_chunks = [] |
|
splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0) |
|
for source in sources: |
|
for chunk in splitter.split_text(source.page_content): |
|
source_chunks.append(Document(page_content=chunk, metadata=source.metadata)) |
|
|
|
search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings(openai_api_key=openai_api_key)) |
|
|
|
|
|
with open("search_index.pickle", "wb") as f: |
|
pickle.dump(search_index, f) |
|
return "search_index.pickle" |
|
|
|
def upload_files_to_space(repo_link, hf_token): |
|
print("***********INSIDE UPLOAD FILES TO SPACE***************") |
|
repo_name = repo_link.split('/')[-1] |
|
api = HfApi(token=hf_token) |
|
|
|
with open("template/app_og.py", "r") as f: |
|
app = f.read() |
|
app = app.replace("$RepoName", repo_name) |
|
|
|
|
|
|
|
with open("template/app.py", "w") as f: |
|
f.write(app) |
|
|
|
|
|
api.upload_file( |
|
path_or_fileobj = "template/app.py", |
|
path_in_repo = "app.py", |
|
repo_id = f'LangChain_{repo_name}Bot', |
|
token = hf_token, |
|
repo_type="space",) |
|
|
|
api.upload_file( |
|
path_or_fileobj = "search_index.pickle", |
|
path_in_repo = "search_index.pickle", |
|
repo_id = f'LangChain_{repo_name}Bot', |
|
token = hf_token, |
|
repo_type="space",) |
|
|
|
api.upload_file( |
|
path_or_fileobj="template/requirements.txt", |
|
path_in_repo="requirements.txt", |
|
repo_id=model_id, |
|
token=token, |
|
repo_type="space",) |
|
|
|
os.remove("template/app.py") |
|
os.remove("search_index.pickle") |
|
|
|
user_name = whoami(token=hf_token)['name'] |
|
repo_url = f"https://huggingface.co/spaces/{user_name}/LangChain_{repo_name}Bot" |
|
space_name = f"{user_name}/LangChain_{repo_name}Bot" |
|
return f"Successfully created the Chatbot at: <a href="+ repo_url + " target='_blank'>" + space_name + "</a>" |
|
|
|
def driver(repo_link, hf_token): |
|
|
|
|
|
|
|
print("***********INSIDE DRIVER***************") |
|
create_space(repo_link, hf_token) |
|
|
|
html_tag = upload_files_to_space(repo_link, hf_token) |
|
print(f"html tag is : {html_tag}") |
|
return html_tag |
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
repo_link = gr.Textbox(label="Enter Github repo name") |
|
hf_token_in = gr.Textbox(type='password', label="Enter hf-token name") |
|
openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here") |
|
with gr.Row(): |
|
btn_faiss = gr.Button("Create Search index") |
|
btn_create_space = gr.Button("Create YOur Chatbot") |
|
html_out = gr.HTML() |
|
search_index_file = gr.File() |
|
btn_faiss.click(create_search_index, [repo_link, openai_api_key],search_index_file ) |
|
btn_create_space.click(driver, [repo_link, hf_token_in], html_out) |
|
|
|
demo.queue() |
|
demo.launch(debug=True) |