Spaces:

DeveloperAkhil
/

Personal-Chatbot

Runtime error

App Files Files Community

Personal-Chatbot / app.py

DeveloperAkhil

Update app.py

572a55c verified 11 months ago

raw

history blame contribute delete

5.86 kB


	from llama_index.core import VectorStoreIndex,download_loader, VectorStoreIndex, ServiceContext, StorageContext, load_index_from_storage
	from pathlib import Path
	from github import Github
	import os
	import shutil
	import openai
	import gradio as gr

	from pathlib import Path

	"""# Github Configeration"""

	openai.api_key = os.environ.get("OPENAPI_API_KEY")

	# username = 'Akhil-Sharma30'


	"""# Reading the Files for LLM Model"""


	# Specify the path to the repository
	repo_dir = "/content/Akhil-Sharma30.github.io"

	# Check if the repository exists and delete it if it does
	if os.path.exists(repo_dir):
	shutil.rmtree(repo_dir)


	# def combine_md_files(folder_path):
	# MarkdownReader = download_loader("MarkdownReader")
	# loader = MarkdownReader()

	# md_files = [file for file in folder_path.glob('*.md')]
	# documents = None

	# for file_path in md_files:
	# document = loader.load_data(file=file_path)
	# documents += document

	# return documents

	# folder_path = Path('/content/Akhil-Sharma30.github.io/content')
	#combined_documents = combine_md_files(folder_path)

	# combined_documents will be a list containing the contents of all .md files in the folder

	RemoteReader = download_loader("RemoteReader")

	loader = RemoteReader()


	document1 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/assets/README.md")
	document2 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/about.md")
	document3 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/cv.md")
	document4 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/post.md")
	document5 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/opensource.md")
	document6 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/supervised.md")

	data = document1+ document2 + document3+ document4 + document5+document6


	"""# Vector Embedding"""

	index = VectorStoreIndex.from_documents(data)

	query_engine = index.as_query_engine()
	response = query_engine.query("know akhil?")
	print(response)

	response = query_engine.query("what is name of the person?")
	print(response)

	"""# ChatBot Interface"""

	def chat(chat_history, user_input):

	bot_response = query_engine.query(user_input)
	#print(bot_response)
	response = ""
	for letter in ''.join(bot_response.response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
	response += letter + ""
	yield chat_history + [(user_input, response)]

	with gr.Blocks() as demo:
	gr.Markdown('# Robotic Akhil')
	gr.Markdown('## "Innovating Intelligence - Unveil the secrets of a cutting-edge ChatBot project that introduces you to the genius behind the machine. 👨🏻‍💻😎')
	gr.Markdown('> Hint: Akhil 2.0')
	gr.Markdown('## Some question you can ask to test Bot:')
	gr.Markdown('#### :) know akhil?')
	gr.Markdown('#### :) write about my work at Agnisys?')
	gr.Markdown('#### :) write about my work at IIT Delhi?')
	gr.Markdown('#### :) was work in P1 Virtual Civilization Initiative opensource?')
	gr.Markdown('#### many more......')
	with gr.Tab("Knowledge Bot"):
	#inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....")
	chatbot = gr.Chatbot()
	message = gr.Textbox ("know akhil?")
	message.submit(chat, [chatbot, message], chatbot)

	demo.queue().launch()


	"""# Github Setup"""



	"""## Launch Phoenix

	Define your knowledge base dataset with a schema that specifies the meaning of each column (features, predictions, actuals, tags, embeddings, etc.). See the [docs](https://docs.arize.com/phoenix/) for guides on how to define your own schema and API reference on `phoenix.Schema` and `phoenix.EmbeddingColumnNames`.
	"""

	# # get a random sample of 500 documents (including retrieved documents)
	# # this will be handled by by the application in a coming release
	# num_sampled_point = 500
	# retrieved_document_ids = set(
	# [
	# doc_id
	# for doc_ids in query_df[":feature.[str].retrieved_document_ids:prompt"].to_list()
	# for doc_id in doc_ids
	# ]
	# )
	# retrieved_document_mask = database_df["document_id"].isin(retrieved_document_ids)
	# num_retrieved_documents = len(retrieved_document_ids)
	# num_additional_samples = num_sampled_point - num_retrieved_documents
	# unretrieved_document_mask = ~retrieved_document_mask
	# sampled_unretrieved_document_ids = set(
	# database_df[unretrieved_document_mask]["document_id"]
	# .sample(n=num_additional_samples, random_state=0)
	# .to_list()
	# )
	# sampled_unretrieved_document_mask = database_df["document_id"].isin(
	# sampled_unretrieved_document_ids
	# )
	# sampled_document_mask = retrieved_document_mask \| sampled_unretrieved_document_mask
	# sampled_database_df = database_df[sampled_document_mask]

	# database_schema = px.Schema(
	# prediction_id_column_name="document_id",
	# prompt_column_names=px.EmbeddingColumnNames(
	# vector_column_name="text_vector",
	# raw_data_column_name="text",
	# ),
	# )
	# database_ds = px.Dataset(
	# dataframe=sampled_database_df,
	# schema=database_schema,
	# name="database",
	# )

	"""Define your query dataset. Because the query dataframe is in OpenInference format, Phoenix is able to infer the meaning of each column without a user-defined schema by using the `phoenix.Dataset.from_open_inference` class method."""

	# query_ds = px.Dataset.from_open_inference(query_df)

	"""Launch Phoenix. Follow the instructions in the cell output to open the Phoenix UI."""

	# session = px.launch_app(primary=query_ds, corpus=database_ds)