Divyanshh commited on
Commit
670064c
1 Parent(s): 2d6bddd

version 2 commit

Browse files
Files changed (4) hide show
  1. Code/app.py +44 -0
  2. app.py +73 -0
  3. requirements.txt +5 -0
  4. util.py +115 -0
Code/app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ # Sample dataset and transition matrix
4
+ dataset = [
5
+ "Once upon a time",
6
+ "there was a cat",
7
+ "The cat loved to",
8
+ "explore the forest",
9
+ "One day, it found",
10
+ "a hidden treasure",
11
+ "The treasure was",
12
+ "guarded by a dragon",
13
+ ]
14
+
15
+ transition_matrix = {
16
+ "Once upon a time": {"there was a cat": 1.0},
17
+ "there was a cat": {"The cat loved to": 1.0},
18
+ "The cat loved to": {"explore the forest": 1.0},
19
+ "explore the forest": {"One day, it found": 1.0},
20
+ "One day, it found": {"a hidden treasure": 1.0},
21
+ "a hidden treasure": {"The treasure was": 1.0},
22
+ "The treasure was": {"guarded by a dragon": 1.0},
23
+ "guarded by a dragon": {"The end": 1.0},
24
+ }
25
+
26
+ # Generate a story
27
+ def generate_story(transition_matrix, initial_state, length=5):
28
+ current_state = initial_state
29
+ story = [current_state]
30
+
31
+ for _ in range(length):
32
+ next_state_options = transition_matrix.get(current_state, {})
33
+ if not next_state_options:
34
+ break
35
+ next_state = random.choices(list(next_state_options.keys()), list(next_state_options.values()))[0]
36
+ story.append(next_state)
37
+ current_state = next_state
38
+
39
+ return " ".join(story)
40
+
41
+ # Generate a story starting from a specific state
42
+ initial_state = "Once upon a time"
43
+ generated_story = generate_story(transition_matrix, initial_state)
44
+ print(generated_story)
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import random
3
+ import time
4
+ import git
5
+ import os
6
+ from util import clone_repo
7
+ from util import generate_assistant_response
8
+ from google.api_core.exceptions import InternalServerError
9
+
10
+ flag = False
11
+ repo = st.text_input(label="Paste the .git link of repository.")
12
+ btn = st.button(label="Submit")
13
+ if btn and not flag:
14
+ clone_repo(repo=repo)
15
+ # if os.path.exists("githubCode") and os.path.isdir("githubCode"):
16
+ # print("File already exists!!")
17
+ # pass
18
+ # else:
19
+ # git.Repo.clone_from(repo,"githubCode")
20
+ flag = True
21
+
22
+ # def response_generator():
23
+ # response = random.choice(
24
+ # [
25
+ # "Hello there! How can I assist you today?",
26
+ # "Hi, human! Is there anything I can help you with?",
27
+ # "Do you need help?",
28
+ # ]
29
+ # )
30
+ # for word in response.split():
31
+ # yield word + " "
32
+ # time.sleep(0.05)
33
+
34
+
35
+ st.title("Simple chat")
36
+
37
+ # Initialize chat history
38
+ if "messages" not in st.session_state:
39
+ st.session_state.messages = []
40
+
41
+ # Display chat messages from history on app rerun
42
+ for message in st.session_state.messages:
43
+ with st.chat_message(message["role"]):
44
+ st.markdown(message["content"])
45
+
46
+ # Accept user input
47
+ if prompt := st.chat_input("What's your question ?"):
48
+ # Add user message to chat history
49
+ st.session_state.messages.append({"role": "user", "content": prompt})
50
+ # Display user message in chat message container
51
+ with st.chat_message("user"):
52
+ st.markdown(prompt)
53
+
54
+ # Display assistant response in chat message container
55
+ with st.chat_message("assistant"):
56
+
57
+ try:
58
+ response = generate_assistant_response(prompt)
59
+ except InternalServerError as err:
60
+ retry_attempts = 0
61
+ MAX_RETRIES = 2
62
+ if retry_attempts < MAX_RETRIES:
63
+ retry_attempts += 1
64
+ time.sleep(2)
65
+ response = generate_assistant_response(prompt)
66
+ else:
67
+ # If retries are exhausted, log the error for further investigation
68
+ response = "500 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting"
69
+
70
+ print(response)
71
+ st.markdown(response)
72
+ # Add assistant response to chat history
73
+ st.session_state.messages.append({"role": "assistant", "content": response})
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain
2
+ langchain_community
3
+ google_generativeai
4
+ sentence_transformers
5
+ chromadb
util.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import Chroma
5
+ # from langchain.llms.huggingface_pipeline import HuggingFacePipeline
6
+ # from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain.prompts import PromptTemplate
9
+
10
+ # import os
11
+ import google.generativeai as genai
12
+
13
+ import git # pip install gitpython
14
+
15
+ os.environ['GOOGLE_API_KEY'] = "AIzaSyCS1bV6_bfizb1tAcQEB9BvCTtqCCLlGFo"
16
+ genai.configure(api_key = os.environ['GOOGLE_API_KEY'])
17
+
18
+ # quantization_config = BitsAndBytesConfig(
19
+ # load_in_4bit=True,
20
+ # bnb_4bit_compute_dtype=torch.bfloat16
21
+ # )
22
+
23
+
24
+ model_kwargs = {'device': 'cpu'}
25
+ embeddings = HuggingFaceEmbeddings(model_kwargs=model_kwargs)
26
+
27
+ # tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
28
+ # model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map='auto', quantization_config = quantization_config)
29
+
30
+ # pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens = 1000)
31
+ # llm = HuggingFacePipeline(pipeline=pipe)
32
+
33
+
34
+ def clone_repo(repo):
35
+ if os.path.exists("githubCode") and os.path.isdir("githubCode"):
36
+ print("File already exists!!")
37
+ pass
38
+ else:
39
+ print("Cloning repo!!")
40
+ git.Repo.clone_from(repo,"githubCode")
41
+
42
+ # git.Repo.clone_from("https://github.com/Divyansh3021/Github_code_assistant.git", "githubCode")
43
+
44
+ llm = genai.GenerativeModel('gemini-pro')
45
+
46
+ def get_folder_paths(directory = "githubCode"):
47
+ folder_paths = []
48
+ for root, dirs, files in os.walk(directory):
49
+ if '.git' in dirs:
50
+ # Skip the directory if a .git folder is found
51
+ dirs.remove('.git')
52
+ for dir_name in dirs:
53
+ folder_paths.append(os.path.join(root, dir_name))
54
+ return folder_paths
55
+
56
+ directory_paths = get_folder_paths()
57
+ directory_paths.append("Code")
58
+
59
+ with open("Code.txt", "w", encoding='utf-8') as output:
60
+ for directory_path in directory_paths:
61
+ for filename in os.listdir(directory_path):
62
+ if filename.endswith((".py",".ipynb",".js", ".ts")):
63
+ filepath = os.path.join(directory_path, filename)
64
+ with open(filepath, "r", encoding='utf-8') as file:
65
+ code = file.read()
66
+ output.write(f"Filepath: {filepath}:\n\n")
67
+ output.write(code + "\n\n")
68
+ elif filename.endswith((".txt")):
69
+ filepath = os.path.join(directory_path, filename)
70
+ with open(filepath, "r", encoding="utf-8") as file:
71
+ code = file.read()
72
+ output.write(f"Documentation list:\n\n")
73
+ output.write(code + "\n\n")
74
+
75
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
76
+ from langchain_community.document_loaders import TextLoader
77
+
78
+ # for filename in os.listdir(directory_path):
79
+ # if filename.endswith(".txt"): # Only process PD files
80
+ # file_path = os.path.join(directory_path, filename)
81
+ loader = TextLoader("Code.txt", encoding="utf-8")
82
+ pages = loader.load_and_split()
83
+
84
+ # Split data into chunks
85
+ text_splitter = RecursiveCharacterTextSplitter(
86
+ chunk_size = 4000,
87
+ chunk_overlap = 200,
88
+ length_function = len,
89
+ add_start_index = True,
90
+ )
91
+ chunks = text_splitter.split_documents(pages)
92
+
93
+ # Store data into database
94
+ db=Chroma.from_documents(chunks,embedding=embeddings,persist_directory="test_index")
95
+ db.persist()
96
+
97
+ # Load the database
98
+ vectordb = Chroma(persist_directory="test_index", embedding_function = embeddings)
99
+
100
+ # Load the retriver
101
+ retriever = vectordb.as_retriever()
102
+
103
+ # Function to generate assistant's response using ask function
104
+ def generate_assistant_response(question):
105
+ context = retriever.get_relevant_documents(question)
106
+ qna_prompt_template= f"""### [INST] Instruction: You will be provided with questions and context. Your task is to find the answers to the questions using the given data. If the data doesn't contain the answer to the question, then you must return 'Not enough information.'
107
+ Context: ```
108
+ {context}
109
+ ```
110
+ ### Question: {question} [/INST]"""
111
+ print("Context: ", context)
112
+ answer = llm.generate_content(qna_prompt_template).text
113
+ return answer
114
+
115
+ # print(generate_assistant_response("Tell me about the instructor_embeddings function."))