Chen commited on
Commit
182cbb1
1 Parent(s): 374292c

app.py fixable to match hugging-face's app portal

Browse files
Files changed (3) hide show
  1. Makefile +1 -1
  2. app.py +59 -34
  3. pyproject.toml +1 -2
Makefile CHANGED
@@ -8,7 +8,7 @@ format:
8
  lint:
9
  mypy .
10
  black . --check
11
- ruff check .
12
 
13
  test:
14
  pytest tests
 
8
  lint:
9
  mypy .
10
  black . --check
11
+ ruff check . --fix
12
 
13
  test:
14
  pytest tests
app.py CHANGED
@@ -1,13 +1,11 @@
1
- import os
2
- from llama_index import SimpleDirectoryReader
3
- from llama_index.node_parser import SimpleNodeParser
4
- from llama_index.data_structs.node import Node, DocumentRelationship
5
- from llama_index import VectorStoreIndex
6
- from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
7
  from langchain.llms import AzureOpenAI
8
  from langchain.embeddings.openai import OpenAIEmbeddings
9
- from llama_index import LangchainEmbedding, ServiceContext
10
- from llama_index import StorageContext, load_index_from_storage
 
11
 
12
  import logging
13
  import sys
@@ -18,24 +16,32 @@ logging.basicConfig(
18
  ) # logging.DEBUG for more verbose output
19
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- def main() -> None:
23
- documents = SimpleDirectoryReader("./data").load_data()
24
-
25
- # index = VectorStoreIndex.from_documents(documents)
26
-
27
- # parser = SimpleNodeParser()
28
- # nodes = parser.get_nodes_from_documents(documents)
29
- # index = VectorStoreIndex(nodes)
30
 
 
 
31
  # define embedding
32
  embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1))
33
  # define LLM
34
  llm_predictor = LLMPredictor(
35
  llm=AzureOpenAI(
36
- client=None,
37
  deployment_name="text-davinci-003",
38
  model="text-davinci-003",
 
39
  )
40
  )
41
 
@@ -43,26 +49,45 @@ def main() -> None:
43
  service_context = ServiceContext.from_defaults(
44
  llm_predictor=llm_predictor, embed_model=embedding
45
  )
 
 
 
 
 
46
 
47
- # build index
48
- index = VectorStoreIndex.from_documents(
49
- documents,
50
- service_context=service_context,
51
- )
 
 
 
 
 
 
 
 
 
52
 
53
- index.storage_context.persist(persist_dir="./dataset")
54
- storage_context = StorageContext.from_defaults(persist_dir="./dataset")
55
- index = load_index_from_storage(
56
- storage_context=storage_context, service_context=service_context
57
- )
58
 
59
- # index.vector_store.persist("./dataset")
60
- # query with embed_model specified
61
- query_engine = index.as_query_engine(
62
- retriever_mode="embedding", verbose=True, service_context=service_context
63
- )
64
- response = query_engine.query("请帮忙推荐一杯咖啡给我,我喜欢咖啡因")
65
- print(response)
 
 
 
 
 
 
 
 
 
66
 
67
 
68
  if __name__ == "__main__":
 
1
+ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
2
+ from llama_index import download_loader, GPTVectorStoreIndex
3
+ from llama_index import LLMPredictor, ServiceContext, LangchainEmbedding
 
 
 
4
  from langchain.llms import AzureOpenAI
5
  from langchain.embeddings.openai import OpenAIEmbeddings
6
+ import os
7
+ import pickle
8
+ import streamlit as st
9
 
10
  import logging
11
  import sys
 
16
  ) # logging.DEBUG for more verbose output
17
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
18
 
19
+ # Sidebar contents
20
+ with st.sidebar:
21
+ st.title("🤗💬 LLM Chat App")
22
+ st.markdown(
23
+ """
24
+ ## About
25
+ This app is an LLM-powered chatbot built using:
26
+ - [Streamlit](https://streamlit.io/)
27
+ - [LangChain](https://python.langchain.com/)
28
+ - [X-Pipe](https://github.com/ctripcorp/x-pipe)
29
+ """
30
+ )
31
+ # add_vertical_space(5)
32
+ st.write("Made by Nick")
33
 
 
 
 
 
 
 
 
 
34
 
35
+ def main() -> None:
36
+ st.header("X-Pipe Wiki 机器人 💬")
37
  # define embedding
38
  embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1))
39
  # define LLM
40
  llm_predictor = LLMPredictor(
41
  llm=AzureOpenAI(
 
42
  deployment_name="text-davinci-003",
43
  model="text-davinci-003",
44
+ client=None,
45
  )
46
  )
47
 
 
49
  service_context = ServiceContext.from_defaults(
50
  llm_predictor=llm_predictor, embed_model=embedding
51
  )
52
+ download_loader("GithubRepositoryReader")
53
+ docs = None
54
+ if os.path.exists("docs/docs.pkl"):
55
+ with open("docs/docs.pkl", "rb") as f:
56
+ docs = pickle.load(f)
57
 
58
+ if docs is None:
59
+ github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
60
+ loader = GithubRepositoryReader(
61
+ github_client,
62
+ owner="ctripcorp",
63
+ repo="x-pipe",
64
+ filter_directories=(
65
+ [".", "doc"],
66
+ GithubRepositoryReader.FilterType.INCLUDE,
67
+ ),
68
+ filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
69
+ verbose=True,
70
+ concurrent_requests=10,
71
+ )
72
 
73
+ docs = loader.load_data(branch="master")
 
 
 
 
74
 
75
+ with open("docs/docs.pkl", "wb") as f:
76
+ pickle.dump(docs, f)
77
+
78
+ index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
79
+
80
+ query_engine = index.as_query_engine(service_context=service_context)
81
+
82
+ query = st.text_input("X-Pipe Wiki 问题:")
83
+ if query:
84
+ index = GPTVectorStoreIndex.from_documents(
85
+ docs, service_context=service_context
86
+ )
87
+
88
+ query_engine = index.as_query_engine(service_context=service_context)
89
+ response = query_engine.query(query)
90
+ st.write(response)
91
 
92
 
93
  if __name__ == "__main__":
pyproject.toml CHANGED
@@ -1,7 +1,7 @@
1
  [tool.mypy]
2
  ignore_missing_imports = "True"
3
  disallow_untyped_defs = "True"
4
- exclude = ["notebooks", "build", "examples", "docs", "dataset", "app.py", "github_retriever.py"]
5
 
6
  [tool.ruff]
7
  exclude = [
@@ -14,6 +14,5 @@ exclude = [
14
  "notebooks",
15
  "docs",
16
  "dataset",
17
- "app.py",
18
  "github_retriever.py"
19
  ]
 
1
  [tool.mypy]
2
  ignore_missing_imports = "True"
3
  disallow_untyped_defs = "True"
4
+ exclude = ["notebooks", "build", "examples", "docs", "dataset", "github_retriever.py"]
5
 
6
  [tool.ruff]
7
  exclude = [
 
14
  "notebooks",
15
  "docs",
16
  "dataset",
 
17
  "github_retriever.py"
18
  ]