File size: 6,941 Bytes
fbac160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
from openai import OpenAI
import streamlit as st
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
import markdown
from operator import itemgetter
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema import Document
from dotenv import load_dotenv
from langchain_community.vectorstores import Qdrant
#from langchain_qdrant import Qdrant
import os


load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
base_llm = ChatOpenAI(model="gpt-4o")
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
#========= DATA ================
# UPLOAD DOC
with open(f'./data/sentiment_index_traffic_index_final.md', "r", encoding="utf-8") as file_content:
    docs=file_content.read()

docs = [Document(page_content=markdown.markdown(docs))]
#docs = [Document(page_content=doc) for doc in docs]
print(docs)
split_documents = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size = 1000,
        chunk_overlap = 20
    ).split_documents(docs)

max_chunk_length = 0


vectorstore = Qdrant.from_documents(
        split_documents,
        embedding_model,
        location=":memory:",
        collection_name="langchainblogs")

retriever = vectorstore.as_retriever()

print("Loaded Vectorstore")

# Ste up ur retriever using LangChain
retriever = vectorstore.as_retriever()



#========== APP
st.set_page_config(page_title="LangChain Agent", layout="wide")

st.title("Narrativ 📰")
st.image('./data/Sentiment_index_traffic.png')
st.write('Start by entering topic into the sidebar.')

sideb=st.sidebar
with st.sidebar:
    prompt=st.text_input("Insert topic: ")

check1=sideb.button(f"Submit")

if 'messages' not in st.session_state:
    st.session_state.messages = []

if check1:
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})
    # Display user message in chat message container
    with st.chat_message("user"):
        st.markdown(prompt)

    #Generate summarized message rationalize dominant sentiment
    RAG_PROMPT = """# Traffic Analyst - Transurban Prompt

    You are a Transurban traffic consultant focusing on the I-495 and I-95 express lanes in the Greater Washington area (GWA). Your task is to analyze news articles provided by a client on a specific topic. You will receive the full text of the relevant articles for the assigned topic, along with key data points.

    ## Your Tasks:

    ### 1. Summarize Opinions:
    - Extract the key opinions and perspectives from the provided news articles.
    - The news articles will include: title, URL, date, text, article source, sentiment index created by Transurban, sentiment index using HF (Hugging Face) model, and confidence for the HF index.
    - Highlight any significant patterns, agreements, or disagreements across the sources.

    ### 2. Analyze Sentiment:
    - Determine the overall sentiment (positive, negative, neutral) about the topic based on the extracted opinions.
    - Provide a clear explanation of your sentiment conclusion, referencing specific points or trends from the articles.

    ### 3. Provide Chain-of-Thought Reasoning:
    - Detail your reasoning process step by step. Explain how you interpreted the articles, derived insights, and reached your sentiment conclusion.
    - Ensure the reasoning is logical, transparent, and grounded in the content provided.

    ### 4. Collect URL Sources:
    - From the provided context, select 5 critical and recent URL sources related to the topic.

    ## Output Format:

    - **Summary of Opinions:** [Concise summary of key opinions]
    - **Sentiment Analysis:**
    - Sentiment: [Positive/Negative/Neutral]
    - Reasoning: [Detailed explanation here]
    - **Chain-of-Thought Reasoning:** [Step-by-step explanation]
    - **Sources:** [URLs for 5 most critical and recent articles on this topic]

    ## Guidelines:
    - Maintain objectivity and precision in your analysis.
    - Focus on the context specific to the Greater Washington Area.
    - Use professional and analytical language suitable for client reports.
    - Respond in the language of the article (mostly English).

    CONTEXT:
    {context}

    QUERY:
    {question}
    Use the provide context to answer the provided user question. Only use the provided context to answer the question. If you do not know the answer, response with "I don't know"
    """
    rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
    # RAG CHAIN
    lcel_rag_chain = (
            # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
            # "question" : populated by getting the value of the "question" key
            # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
            {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
            # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
            #              by getting the value of the "context" key from the previous step
            | RunnablePassthrough.assign(context=itemgetter("context"))
            # "response" : the "context" and "question" values are used to format our prompt object and then piped
            #              into the LLM and stored in a key called "response"
            # "context"  : populated by getting the value of the "context" key from the previous step
            | {"response": rag_prompt | base_llm, "context": itemgetter("context")}
        )
    
    summary = lcel_rag_chain.invoke({"question" : prompt})
    print(summary)
    st.chat_message("assistant").write((summary['response'].content))
    st.session_state.messages.append({"role": "assistant", "content": summary['response'].content})
    #answers=np.append(res["messages"][-1].content,summary)

client = OpenAI(api_key=OPENAI_API_KEY)

if "openai_model" not in st.session_state:
    st.session_state["openai_model"] = "gpt-4o"

if prompt := st.chat_input("Nějaké další otázky? "):
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})
    # Display user message in chat message container
    with st.chat_message("user"):
        st.markdown(prompt)
    # Display assistant response in chat message container
    with st.chat_message("assistant"):
        stream = client.chat.completions.create(
            model=st.session_state["openai_model"],
            messages=[
                {"role": m["role"], "content": m["content"]}
                for m in st.session_state.messages
            ],
            stream=True,
        )
        response = st.write_stream(stream)
    st.session_state.messages.append({"role": "assistant", "content": response})