File size: 4,584 Bytes
9475cd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import gradio as gr
from langchain.chat_models import AzureChatOpenAI
from langchain.schema import format_document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from operator import itemgetter
# import socks
# import socket
# import requests

# # 设置 SOCKS5 代理和认证信息
# socks.set_default_proxy(socks.SOCKS5, "sftp-v-proxy.szh.internet.bosch.com", 1080, True, 'zfn3wx_ftp', 'Bosch@123')
#
# # 将 socket 的默认连接重定向到 SOCKS5 代理
# socket.socket = socks.socksocket

os.environ["OPENAI_API_KEY"] = '8b3bb832d6ef4a019a6fbddb4986cb9b'
os.environ["OPENAI_API_TYPE"] = 'azure'
os.environ["OPENAI_API_VERSION"] = '2023-07-01-preview'
os.environ["OPENAI_API_BASE"] = 'https://ostingpteu.openai.azure.com/'

llm = AzureChatOpenAI(deployment_name='OstinAIEU', model_name="gpt-35-turbo")
import time
from langchain.vectorstores import Weaviate
import weaviate

WEAVIATE_URL = 'http://40.81.20.137:8080'
client = weaviate.Client(
    url=WEAVIATE_URL
)

embedding = OpenAIEmbeddings(deployment="ostinembedding")
vectordb = Weaviate(client=client, index_name="GS_data", text_key="text")

from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever
from langchain.schema import Document

# 定义元数据的过滤条件
retriever = WeaviateHybridSearchRetriever(
    client=client,
    index_name="GS_data",
    text_key="text",
    attributes=['title', 'update_time', 'source_name', 'url'],
    create_schema_if_missing=True,
    k=5,
)

from typing import List


def _format_docs(docs: List[Document]) -> str:
    buffer = ''
    for doc in docs:
        # Start with the document's title if available
        # doc_string = f"Title: {doc.metadata.get('title', 'No Title')}\n"
        # Iterate over all metadata key-value pairs
        doc_string = ''
        for key, value in doc.metadata.items():
            doc_string += f"{key.capitalize()}: {value}\n"

        # Adding this document's string to the buffer
        buffer += doc_string + '\n'  # Added an extra newline for separation between documents

    return buffer


DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


def _combine_documents(
        docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)



template = """"You are an expert, tasked to answer any question about Global Business Services (GS) . Using the 
provided context, answer the user's question to the best of your ability using the resources provided. Generate a 
comprehensive and informative answer (but no more than 80 words) for a given question based solely on the context. 
Use an unbiased and journalistic tone. Combine search results together into a coherent answer. Do not repeat text 
If there is nothing in the context relevant to the question at hand, just say "Sorry, I'm not sure. Could you provide
more information?" Don't try to make up an answer. You should use bullet points in your answer for readability."
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)


def ans_format(ans) -> str:
    answer = ans['answer']
    sources = ans['sources']
    return f"{answer} \n\n \n\nHere are the sources:\n{sources}"

# Now we retrieve the documents
retrieved_documents = RunnablePassthrough.assign(docs=itemgetter('question') | retriever)

# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}

# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | ANSWER_PROMPT | llm,
    "docs": itemgetter("docs"),
}

organized_ans = {
    'ans': {
               'answer': lambda x: x["answer"].content,
               'sources': lambda x: _format_docs(x["docs"]),
           }
           | RunnableLambda(ans_format)
           | StrOutputParser()

}

# And now we put it all together!
final_chain = retrieved_documents | answer | organized_ans | RunnablePassthrough()


def response(msg: str) -> str:
    inp = {'question': msg}
    return final_chain.invoke(inp)['ans']


gr.Interface(fn=response, inputs=gr.Textbox(lines=2, placeholder="Ask Here..."), outputs="text").launch()