Luhan1 commited on
Commit
9475cd2
1 Parent(s): faeba93

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain.chat_models import AzureChatOpenAI
4
+ from langchain.schema import format_document
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from langchain.schema.output_parser import StrOutputParser
9
+ from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
10
+ from operator import itemgetter
11
+ # import socks
12
+ # import socket
13
+ # import requests
14
+
15
+ # # 设置 SOCKS5 代理和认证信息
16
+ # socks.set_default_proxy(socks.SOCKS5, "sftp-v-proxy.szh.internet.bosch.com", 1080, True, 'zfn3wx_ftp', 'Bosch@123')
17
+ #
18
+ # # 将 socket 的默认连接重定向到 SOCKS5 代理
19
+ # socket.socket = socks.socksocket
20
+
21
+ os.environ["OPENAI_API_KEY"] = '8b3bb832d6ef4a019a6fbddb4986cb9b'
22
+ os.environ["OPENAI_API_TYPE"] = 'azure'
23
+ os.environ["OPENAI_API_VERSION"] = '2023-07-01-preview'
24
+ os.environ["OPENAI_API_BASE"] = 'https://ostingpteu.openai.azure.com/'
25
+
26
+ llm = AzureChatOpenAI(deployment_name='OstinAIEU', model_name="gpt-35-turbo")
27
+ import time
28
+ from langchain.vectorstores import Weaviate
29
+ import weaviate
30
+
31
+ WEAVIATE_URL = 'http://40.81.20.137:8080'
32
+ client = weaviate.Client(
33
+ url=WEAVIATE_URL
34
+ )
35
+
36
+ embedding = OpenAIEmbeddings(deployment="ostinembedding")
37
+ vectordb = Weaviate(client=client, index_name="GS_data", text_key="text")
38
+
39
+ from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever
40
+ from langchain.schema import Document
41
+
42
+ # 定义元数据的过滤条件
43
+ retriever = WeaviateHybridSearchRetriever(
44
+ client=client,
45
+ index_name="GS_data",
46
+ text_key="text",
47
+ attributes=['title', 'update_time', 'source_name', 'url'],
48
+ create_schema_if_missing=True,
49
+ k=5,
50
+ )
51
+
52
+ from typing import List
53
+
54
+
55
+ def _format_docs(docs: List[Document]) -> str:
56
+ buffer = ''
57
+ for doc in docs:
58
+ # Start with the document's title if available
59
+ # doc_string = f"Title: {doc.metadata.get('title', 'No Title')}\n"
60
+ # Iterate over all metadata key-value pairs
61
+ doc_string = ''
62
+ for key, value in doc.metadata.items():
63
+ doc_string += f"{key.capitalize()}: {value}\n"
64
+
65
+ # Adding this document's string to the buffer
66
+ buffer += doc_string + '\n' # Added an extra newline for separation between documents
67
+
68
+ return buffer
69
+
70
+
71
+ DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
72
+
73
+
74
+ def _combine_documents(
75
+ docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
76
+ ):
77
+ doc_strings = [format_document(doc, document_prompt) for doc in docs]
78
+ return document_separator.join(doc_strings)
79
+
80
+
81
+
82
+ template = """"You are an expert, tasked to answer any question about Global Business Services (GS) . Using the
83
+ provided context, answer the user's question to the best of your ability using the resources provided. Generate a
84
+ comprehensive and informative answer (but no more than 80 words) for a given question based solely on the context.
85
+ Use an unbiased and journalistic tone. Combine search results together into a coherent answer. Do not repeat text
86
+ If there is nothing in the context relevant to the question at hand, just say "Sorry, I'm not sure. Could you provide
87
+ more information?" Don't try to make up an answer. You should use bullet points in your answer for readability."
88
+ {context}
89
+
90
+ Question: {question}
91
+ """
92
+ ANSWER_PROMPT = ChatPromptTemplate.from_template(template)
93
+
94
+
95
+ def ans_format(ans) -> str:
96
+ answer = ans['answer']
97
+ sources = ans['sources']
98
+ return f"{answer} \n\n \n\nHere are the sources:\n{sources}"
99
+
100
+ # Now we retrieve the documents
101
+ retrieved_documents = RunnablePassthrough.assign(docs=itemgetter('question') | retriever)
102
+
103
+ # Now we construct the inputs for the final prompt
104
+ final_inputs = {
105
+ "context": lambda x: _combine_documents(x["docs"]),
106
+ "question": itemgetter("question"),
107
+ }
108
+
109
+ # And finally, we do the part that returns the answers
110
+ answer = {
111
+ "answer": final_inputs | ANSWER_PROMPT | llm,
112
+ "docs": itemgetter("docs"),
113
+ }
114
+
115
+ organized_ans = {
116
+ 'ans': {
117
+ 'answer': lambda x: x["answer"].content,
118
+ 'sources': lambda x: _format_docs(x["docs"]),
119
+ }
120
+ | RunnableLambda(ans_format)
121
+ | StrOutputParser()
122
+
123
+ }
124
+
125
+ # And now we put it all together!
126
+ final_chain = retrieved_documents | answer | organized_ans | RunnablePassthrough()
127
+
128
+
129
+ def response(msg: str) -> str:
130
+ inp = {'question': msg}
131
+ return final_chain.invoke(inp)['ans']
132
+
133
+
134
+ gr.Interface(fn=response, inputs=gr.Textbox(lines=2, placeholder="Ask Here..."), outputs="text").launch()