Spaces:
Sleeping
Sleeping
version 2 commit
Browse files- Code/app.py +44 -0
- app.py +73 -0
- requirements.txt +5 -0
- util.py +115 -0
Code/app.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
|
3 |
+
# Sample dataset and transition matrix
|
4 |
+
dataset = [
|
5 |
+
"Once upon a time",
|
6 |
+
"there was a cat",
|
7 |
+
"The cat loved to",
|
8 |
+
"explore the forest",
|
9 |
+
"One day, it found",
|
10 |
+
"a hidden treasure",
|
11 |
+
"The treasure was",
|
12 |
+
"guarded by a dragon",
|
13 |
+
]
|
14 |
+
|
15 |
+
transition_matrix = {
|
16 |
+
"Once upon a time": {"there was a cat": 1.0},
|
17 |
+
"there was a cat": {"The cat loved to": 1.0},
|
18 |
+
"The cat loved to": {"explore the forest": 1.0},
|
19 |
+
"explore the forest": {"One day, it found": 1.0},
|
20 |
+
"One day, it found": {"a hidden treasure": 1.0},
|
21 |
+
"a hidden treasure": {"The treasure was": 1.0},
|
22 |
+
"The treasure was": {"guarded by a dragon": 1.0},
|
23 |
+
"guarded by a dragon": {"The end": 1.0},
|
24 |
+
}
|
25 |
+
|
26 |
+
# Generate a story
|
27 |
+
def generate_story(transition_matrix, initial_state, length=5):
|
28 |
+
current_state = initial_state
|
29 |
+
story = [current_state]
|
30 |
+
|
31 |
+
for _ in range(length):
|
32 |
+
next_state_options = transition_matrix.get(current_state, {})
|
33 |
+
if not next_state_options:
|
34 |
+
break
|
35 |
+
next_state = random.choices(list(next_state_options.keys()), list(next_state_options.values()))[0]
|
36 |
+
story.append(next_state)
|
37 |
+
current_state = next_state
|
38 |
+
|
39 |
+
return " ".join(story)
|
40 |
+
|
41 |
+
# Generate a story starting from a specific state
|
42 |
+
initial_state = "Once upon a time"
|
43 |
+
generated_story = generate_story(transition_matrix, initial_state)
|
44 |
+
print(generated_story)
|
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import random
|
3 |
+
import time
|
4 |
+
import git
|
5 |
+
import os
|
6 |
+
from util import clone_repo
|
7 |
+
from util import generate_assistant_response
|
8 |
+
from google.api_core.exceptions import InternalServerError
|
9 |
+
|
10 |
+
flag = False
|
11 |
+
repo = st.text_input(label="Paste the .git link of repository.")
|
12 |
+
btn = st.button(label="Submit")
|
13 |
+
if btn and not flag:
|
14 |
+
clone_repo(repo=repo)
|
15 |
+
# if os.path.exists("githubCode") and os.path.isdir("githubCode"):
|
16 |
+
# print("File already exists!!")
|
17 |
+
# pass
|
18 |
+
# else:
|
19 |
+
# git.Repo.clone_from(repo,"githubCode")
|
20 |
+
flag = True
|
21 |
+
|
22 |
+
# def response_generator():
|
23 |
+
# response = random.choice(
|
24 |
+
# [
|
25 |
+
# "Hello there! How can I assist you today?",
|
26 |
+
# "Hi, human! Is there anything I can help you with?",
|
27 |
+
# "Do you need help?",
|
28 |
+
# ]
|
29 |
+
# )
|
30 |
+
# for word in response.split():
|
31 |
+
# yield word + " "
|
32 |
+
# time.sleep(0.05)
|
33 |
+
|
34 |
+
|
35 |
+
st.title("Simple chat")
|
36 |
+
|
37 |
+
# Initialize chat history
|
38 |
+
if "messages" not in st.session_state:
|
39 |
+
st.session_state.messages = []
|
40 |
+
|
41 |
+
# Display chat messages from history on app rerun
|
42 |
+
for message in st.session_state.messages:
|
43 |
+
with st.chat_message(message["role"]):
|
44 |
+
st.markdown(message["content"])
|
45 |
+
|
46 |
+
# Accept user input
|
47 |
+
if prompt := st.chat_input("What's your question ?"):
|
48 |
+
# Add user message to chat history
|
49 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
50 |
+
# Display user message in chat message container
|
51 |
+
with st.chat_message("user"):
|
52 |
+
st.markdown(prompt)
|
53 |
+
|
54 |
+
# Display assistant response in chat message container
|
55 |
+
with st.chat_message("assistant"):
|
56 |
+
|
57 |
+
try:
|
58 |
+
response = generate_assistant_response(prompt)
|
59 |
+
except InternalServerError as err:
|
60 |
+
retry_attempts = 0
|
61 |
+
MAX_RETRIES = 2
|
62 |
+
if retry_attempts < MAX_RETRIES:
|
63 |
+
retry_attempts += 1
|
64 |
+
time.sleep(2)
|
65 |
+
response = generate_assistant_response(prompt)
|
66 |
+
else:
|
67 |
+
# If retries are exhausted, log the error for further investigation
|
68 |
+
response = "500 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting"
|
69 |
+
|
70 |
+
print(response)
|
71 |
+
st.markdown(response)
|
72 |
+
# Add assistant response to chat history
|
73 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
langchain_community
|
3 |
+
google_generativeai
|
4 |
+
sentence_transformers
|
5 |
+
chromadb
|
util.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
+
from langchain_community.vectorstores import Chroma
|
5 |
+
# from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
6 |
+
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
|
7 |
+
from langchain.chains.question_answering import load_qa_chain
|
8 |
+
from langchain.prompts import PromptTemplate
|
9 |
+
|
10 |
+
# import os
|
11 |
+
import google.generativeai as genai
|
12 |
+
|
13 |
+
import git # pip install gitpython
|
14 |
+
|
15 |
+
os.environ['GOOGLE_API_KEY'] = "AIzaSyCS1bV6_bfizb1tAcQEB9BvCTtqCCLlGFo"
|
16 |
+
genai.configure(api_key = os.environ['GOOGLE_API_KEY'])
|
17 |
+
|
18 |
+
# quantization_config = BitsAndBytesConfig(
|
19 |
+
# load_in_4bit=True,
|
20 |
+
# bnb_4bit_compute_dtype=torch.bfloat16
|
21 |
+
# )
|
22 |
+
|
23 |
+
|
24 |
+
model_kwargs = {'device': 'cpu'}
|
25 |
+
embeddings = HuggingFaceEmbeddings(model_kwargs=model_kwargs)
|
26 |
+
|
27 |
+
# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
|
28 |
+
# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map='auto', quantization_config = quantization_config)
|
29 |
+
|
30 |
+
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens = 1000)
|
31 |
+
# llm = HuggingFacePipeline(pipeline=pipe)
|
32 |
+
|
33 |
+
|
34 |
+
def clone_repo(repo):
|
35 |
+
if os.path.exists("githubCode") and os.path.isdir("githubCode"):
|
36 |
+
print("File already exists!!")
|
37 |
+
pass
|
38 |
+
else:
|
39 |
+
print("Cloning repo!!")
|
40 |
+
git.Repo.clone_from(repo,"githubCode")
|
41 |
+
|
42 |
+
# git.Repo.clone_from("https://github.com/Divyansh3021/Github_code_assistant.git", "githubCode")
|
43 |
+
|
44 |
+
llm = genai.GenerativeModel('gemini-pro')
|
45 |
+
|
46 |
+
def get_folder_paths(directory = "githubCode"):
|
47 |
+
folder_paths = []
|
48 |
+
for root, dirs, files in os.walk(directory):
|
49 |
+
if '.git' in dirs:
|
50 |
+
# Skip the directory if a .git folder is found
|
51 |
+
dirs.remove('.git')
|
52 |
+
for dir_name in dirs:
|
53 |
+
folder_paths.append(os.path.join(root, dir_name))
|
54 |
+
return folder_paths
|
55 |
+
|
56 |
+
directory_paths = get_folder_paths()
|
57 |
+
directory_paths.append("Code")
|
58 |
+
|
59 |
+
with open("Code.txt", "w", encoding='utf-8') as output:
|
60 |
+
for directory_path in directory_paths:
|
61 |
+
for filename in os.listdir(directory_path):
|
62 |
+
if filename.endswith((".py",".ipynb",".js", ".ts")):
|
63 |
+
filepath = os.path.join(directory_path, filename)
|
64 |
+
with open(filepath, "r", encoding='utf-8') as file:
|
65 |
+
code = file.read()
|
66 |
+
output.write(f"Filepath: {filepath}:\n\n")
|
67 |
+
output.write(code + "\n\n")
|
68 |
+
elif filename.endswith((".txt")):
|
69 |
+
filepath = os.path.join(directory_path, filename)
|
70 |
+
with open(filepath, "r", encoding="utf-8") as file:
|
71 |
+
code = file.read()
|
72 |
+
output.write(f"Documentation list:\n\n")
|
73 |
+
output.write(code + "\n\n")
|
74 |
+
|
75 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
76 |
+
from langchain_community.document_loaders import TextLoader
|
77 |
+
|
78 |
+
# for filename in os.listdir(directory_path):
|
79 |
+
# if filename.endswith(".txt"): # Only process PD files
|
80 |
+
# file_path = os.path.join(directory_path, filename)
|
81 |
+
loader = TextLoader("Code.txt", encoding="utf-8")
|
82 |
+
pages = loader.load_and_split()
|
83 |
+
|
84 |
+
# Split data into chunks
|
85 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
86 |
+
chunk_size = 4000,
|
87 |
+
chunk_overlap = 200,
|
88 |
+
length_function = len,
|
89 |
+
add_start_index = True,
|
90 |
+
)
|
91 |
+
chunks = text_splitter.split_documents(pages)
|
92 |
+
|
93 |
+
# Store data into database
|
94 |
+
db=Chroma.from_documents(chunks,embedding=embeddings,persist_directory="test_index")
|
95 |
+
db.persist()
|
96 |
+
|
97 |
+
# Load the database
|
98 |
+
vectordb = Chroma(persist_directory="test_index", embedding_function = embeddings)
|
99 |
+
|
100 |
+
# Load the retriver
|
101 |
+
retriever = vectordb.as_retriever()
|
102 |
+
|
103 |
+
# Function to generate assistant's response using ask function
|
104 |
+
def generate_assistant_response(question):
|
105 |
+
context = retriever.get_relevant_documents(question)
|
106 |
+
qna_prompt_template= f"""### [INST] Instruction: You will be provided with questions and context. Your task is to find the answers to the questions using the given data. If the data doesn't contain the answer to the question, then you must return 'Not enough information.'
|
107 |
+
Context: ```
|
108 |
+
{context}
|
109 |
+
```
|
110 |
+
### Question: {question} [/INST]"""
|
111 |
+
print("Context: ", context)
|
112 |
+
answer = llm.generate_content(qna_prompt_template).text
|
113 |
+
return answer
|
114 |
+
|
115 |
+
# print(generate_assistant_response("Tell me about the instructor_embeddings function."))
|