Aniun commited on
Commit
6443dbc
1 Parent(s): 3bf7b69

ADD: 1. Initialize the repository and add basic info for GitHub projects with over 1k stars. 2. Add basic semantic search functionality for the GitHub repository.

Browse files
Files changed (1) hide show
  1. chat.py +78 -0
chat.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ from langchain_community.vectorstores import FAISS
3
+
4
+ import os
5
+ import gradio as gr
6
+ import json
7
+ import os
8
+
9
+ # get the root path of the project
10
+ current_file_path = os.path.dirname(os.path.abspath(__file__))
11
+ root_path = os.path.abspath(current_file_path)
12
+
13
+ class RepoSearch:
14
+ def __init__(self):
15
+
16
+ db_path = os.path.join(root_path, "database", "faiss_index")
17
+ embeddings = OpenAIEmbeddings(api_key="sk-Mo5K9m2hKXjV1DeGeBAIzXLZFxxiOTvSwUoemKmfMXdmE9Bs",
18
+ base_url="https://api.wlai.vip/v1",
19
+ model="text-embedding-3-small")
20
+
21
+ assert os.path.exists(db_path), f"Database not found: {db_path}"
22
+ self.vector_db = FAISS.load_local(db_path, embeddings,
23
+ allow_dangerous_deserialization=True)
24
+
25
+ def search(self, query, k=10):
26
+ '''
27
+ name + description + html_url + topics
28
+ '''
29
+ results = self.vector_db.similarity_search(query + " technology", k=k)
30
+
31
+ simple_str = ""
32
+ for i, doc in enumerate(results):
33
+ content = json.loads(doc.page_content)
34
+ if content["description"] is None:
35
+ content["description"] = ""
36
+ desc = content["description"] if len(content["description"]) < 300 else content["description"][:300] + "..."
37
+ simple_str += f"\t**{i+1}. {content['name']}** || **Description:** {desc} || **Url:** {content['html_url']} \n"
38
+
39
+ return simple_str
40
+
41
+
42
+ def main():
43
+ search = RepoSearch()
44
+
45
+ def respond(
46
+ prompt: str,
47
+ history,
48
+ ):
49
+ if not history:
50
+ history = [{"role": "system", "content": "You are a friendly chatbot"}]
51
+ history.append({"role": "user", "content": prompt})
52
+
53
+ yield history
54
+
55
+ response = {"role": "assistant", "content": ""}
56
+ response["content"] = search.search(prompt)
57
+ yield history + [response]
58
+
59
+ with gr.Blocks() as demo:
60
+ gr.Markdown("## Semantic github search (基于语义的 github 仓库搜索) 🌐")
61
+ chatbot = gr.Chatbot(
62
+ label="Agent",
63
+ type="messages",
64
+ avatar_images=(
65
+ None,
66
+ "https://img1.baidu.com/it/u=2193901176,1740242983&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=500",
67
+ ),
68
+ height="65vh"
69
+ )
70
+ prompt = gr.Textbox(max_lines=2, label="Chat Message")
71
+ prompt.submit(respond, [prompt, chatbot], [chatbot])
72
+ prompt.submit(lambda: "", None, [prompt])
73
+
74
+ demo.launch(share=True)
75
+
76
+
77
+ if __name__ == "__main__":
78
+ main()