Spaces:
Sleeping
Sleeping
from llama_index import VectorStoreIndex,download_loader, VectorStoreIndex, ServiceContext, StorageContext, load_index_from_storage | |
from pathlib import Path | |
from github import Github | |
import os | |
import shutil | |
import openai | |
import gradio as gr | |
from pathlib import Path | |
from llama_index import download_loader | |
"""# Github Configeration""" | |
openai.api_key = os.environ.get("OPENAPI_API_KEY") | |
# username = 'Akhil-Sharma30' | |
"""# Reading the Files for LLM Model""" | |
# Specify the path to the repository | |
repo_dir = "/content/Akhil-Sharma30.github.io" | |
# Check if the repository exists and delete it if it does | |
if os.path.exists(repo_dir): | |
shutil.rmtree(repo_dir) | |
# def combine_md_files(folder_path): | |
# MarkdownReader = download_loader("MarkdownReader") | |
# loader = MarkdownReader() | |
# md_files = [file for file in folder_path.glob('*.md')] | |
# documents = None | |
# for file_path in md_files: | |
# document = loader.load_data(file=file_path) | |
# documents += document | |
# return documents | |
# folder_path = Path('/content/Akhil-Sharma30.github.io/content') | |
#combined_documents = combine_md_files(folder_path) | |
# combined_documents will be a list containing the contents of all .md files in the folder | |
RemoteReader = download_loader("RemoteReader") | |
loader = RemoteReader() | |
document1 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/assets/README.md") | |
document2 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/about.md") | |
document3 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/cv.md") | |
document4 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/post.md") | |
document5 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/opensource.md") | |
document6 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/supervised.md") | |
data = document1+ document2 + document3+ document4 + document5+document6 | |
"""# Vector Embedding""" | |
index = VectorStoreIndex.from_documents(data) | |
query_engine = index.as_query_engine() | |
response = query_engine.query("know akhil?") | |
print(response) | |
response = query_engine.query("what is name of the person?") | |
print(response) | |
"""# ChatBot Interface""" | |
def chat(chat_history, user_input): | |
bot_response = query_engine.query(user_input) | |
#print(bot_response) | |
response = "" | |
for letter in ''.join(bot_response.response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]: | |
response += letter + "" | |
yield chat_history + [(user_input, response)] | |
with gr.Blocks() as demo: | |
gr.Markdown('# Robotic Akhil') | |
gr.Markdown('## "Innovating Intelligence - Unveil the secrets of a cutting-edge ChatBot project that introduces you to the genius behind the machine. π¨π»βπ»π') | |
gr.Markdown('> Hint: Akhil 2.0') | |
gr.Markdown('## Some question you can ask to test Bot:') | |
gr.Markdown('#### :) know akhil?') | |
gr.Markdown('#### :) write about my work at Agnisys?') | |
gr.Markdown('#### :) write about my work at IIT Delhi?') | |
gr.Markdown('#### :) was work in P1 Virtual Civilization Initiative opensource?') | |
gr.Markdown('#### many more......') | |
with gr.Tab("Knowledge Bot"): | |
#inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....") | |
chatbot = gr.Chatbot() | |
message = gr.Textbox ("know akhil?") | |
message.submit(chat, [chatbot, message], chatbot) | |
demo.queue().launch() | |
"""# **Github Setup**""" | |
"""## Launch Phoenix | |
Define your knowledge base dataset with a schema that specifies the meaning of each column (features, predictions, actuals, tags, embeddings, etc.). See the [docs](https://docs.arize.com/phoenix/) for guides on how to define your own schema and API reference on `phoenix.Schema` and `phoenix.EmbeddingColumnNames`. | |
""" | |
# # get a random sample of 500 documents (including retrieved documents) | |
# # this will be handled by by the application in a coming release | |
# num_sampled_point = 500 | |
# retrieved_document_ids = set( | |
# [ | |
# doc_id | |
# for doc_ids in query_df[":feature.[str].retrieved_document_ids:prompt"].to_list() | |
# for doc_id in doc_ids | |
# ] | |
# ) | |
# retrieved_document_mask = database_df["document_id"].isin(retrieved_document_ids) | |
# num_retrieved_documents = len(retrieved_document_ids) | |
# num_additional_samples = num_sampled_point - num_retrieved_documents | |
# unretrieved_document_mask = ~retrieved_document_mask | |
# sampled_unretrieved_document_ids = set( | |
# database_df[unretrieved_document_mask]["document_id"] | |
# .sample(n=num_additional_samples, random_state=0) | |
# .to_list() | |
# ) | |
# sampled_unretrieved_document_mask = database_df["document_id"].isin( | |
# sampled_unretrieved_document_ids | |
# ) | |
# sampled_document_mask = retrieved_document_mask | sampled_unretrieved_document_mask | |
# sampled_database_df = database_df[sampled_document_mask] | |
# database_schema = px.Schema( | |
# prediction_id_column_name="document_id", | |
# prompt_column_names=px.EmbeddingColumnNames( | |
# vector_column_name="text_vector", | |
# raw_data_column_name="text", | |
# ), | |
# ) | |
# database_ds = px.Dataset( | |
# dataframe=sampled_database_df, | |
# schema=database_schema, | |
# name="database", | |
# ) | |
"""Define your query dataset. Because the query dataframe is in OpenInference format, Phoenix is able to infer the meaning of each column without a user-defined schema by using the `phoenix.Dataset.from_open_inference` class method.""" | |
# query_ds = px.Dataset.from_open_inference(query_df) | |
"""Launch Phoenix. Follow the instructions in the cell output to open the Phoenix UI.""" | |
# session = px.launch_app(primary=query_ds, corpus=database_ds) | |