pkraman06 commited on
Commit
8dbf12b
·
verified ·
1 Parent(s): 14b6034

Create code_indexer.py

Browse files
Files changed (1) hide show
  1. memory/code_indexer.py +34 -0
memory/code_indexer.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from langchain.docstore.document import Document
4
+
5
+ from memory.vector_store import create_vector_store
6
+
7
+
8
+ SUPPORTED_EXTENSIONS = [".py", ".js", ".ts", ".java", ".cpp"]
9
+
10
+
11
+ def load_code_files(repo_path):
12
+ documents = []
13
+
14
+ for root, _, files in os.walk(repo_path):
15
+ for file in files:
16
+ if any(file.endswith(ext) for ext in SUPPORTED_EXTENSIONS):
17
+ path = os.path.join(root, file)
18
+
19
+ with open(path, "r", encoding="utf-8") as f:
20
+ content = f.read()
21
+
22
+ documents.append(
23
+ Document(
24
+ page_content=content,
25
+ metadata={"source": path},
26
+ )
27
+ )
28
+
29
+ return documents
30
+
31
+
32
+ def index_codebase(repo_path):
33
+ docs = load_code_files(repo_path)
34
+ return create_vector_store(docs)