|
|
|
|
|
|
|
import os |
|
import uuid |
|
import json |
|
import gradio as gr |
|
from openai import OpenAI |
|
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings |
|
from langchain_community.vectorstores import Chroma |
|
from huggingface_hub import CommitScheduler |
|
from pathlib import Path |
|
|
|
|
|
anyscale_api_key = userdata.get('anyscale_api_key') |
|
|
|
client = OpenAI( |
|
base_url="https://api.endpoints.anyscale.com/v1", |
|
api_key=anyscale_api_key |
|
) |
|
|
|
|
|
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large') |
|
|
|
|
|
collection_name = 'finsights-grey-10k-2023' |
|
|
|
vectorstore_persisted = Chroma( |
|
collection_name=collection_name, |
|
embedding_function=embedding_model, |
|
persist_directory='/content/finsightsgrey_db' |
|
) |
|
|
|
retriever = vectorstore_persisted.as_retriever( |
|
search_type="similarity", |
|
search_kwargs={'k': 5}, |
|
) |
|
|
|
|
|
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" |
|
log_folder = log_file.parent |
|
|
|
|
|
qna_system_message = """ |
|
You are an assistant to a financial technology firm who answers user queries on 10-K reports from various industry players which contain detailed information about financial performance, risk factors, market trends, and strategic initiatives. |
|
User input will have the context required by you to answer user questions. |
|
This context will begin with the token: ###Context. |
|
|
|
When crafting your response,select the most relevant context or contexts to answer the question. |
|
|
|
User questions will begin with the token: ###Question. |
|
|
|
Please answer only using the context provided in the input. Do not mention anything about the context in your final answer. |
|
|
|
If the answer is not found in the context, respond "I don't know". |
|
""" |
|
|
|
|
|
qna_user_message_template = """ |
|
###Context |
|
Here are some documents that are relevant to the question mentioned below.- |
|
{context} |
|
|
|
###Question |
|
{question} |
|
""" |
|
|
|
|
|
def predict(user_input, company): |
|
filter = {"source": f"/content/dataset/{company}-10-k-2023.pdf"} |
|
relevant_document_chunks = vectorstore_persisted.similarity_search(user_input, k=5, filter=filter) |
|
|
|
|
|
context_list = [f"Page {doc.metadata['page']}: {doc.page_content}" for doc in relevant_document_chunks] |
|
context_for_query = ".".join(context_list) |
|
|
|
|
|
prompt = [ |
|
{'role': 'system', 'content': qna_system_message}, |
|
{'role': 'user', 'content': qna_user_message_template.format(context=context_for_query, question=user_input)} |
|
] |
|
|
|
|
|
try: |
|
response = client.chat.completions.create( |
|
model="mlabonne/NeuralHermes-2.5-Mistral-7B", |
|
messages=prompt, |
|
temperature=0 |
|
) |
|
prediction = response.choices[0].message.content |
|
except Exception as e: |
|
prediction = f'Sorry, I encountered the following error: \n {e}' |
|
|
|
print(prediction) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return prediction |
|
|
|
|
|
|
|
|
|
textbox = gr.Textbox(placeholder="Enter your query here") |
|
company = gr.Radio(choices=["IBM", "META", "aws", "google", "msft"], label="Company") |
|
|
|
|
|
demo = gr.Interface( |
|
inputs=[textbox, company], |
|
fn=predict, |
|
outputs="text", |
|
description="This web API presents an interface to ask questions on contents of IBM, META, AWS, GOOGLE and MSFT 10-K reports for the year 2023", |
|
article="Note that questions that are not relevant to the aforementioned companies' 10-K reports will not be answered", |
|
title="Q&A for IBM, META, AWS, GOOG & MSFT 10-K Statements", |
|
examples=[ |
|
["Has the company made any significant acquisitions in the AI space, and how are these acquisitions being integrated into the company's strategy?", "IBM"], |
|
["How much capital has been allocated towards AI research and development?", "META"], |
|
["What initiatives has the company implemented to address ethical concerns surrounding AI, such as fairness, accountability, and privacy?", "aws"], |
|
["How does the company plan to differentiate itself in the AI space relative to competitors?", "google"] |
|
], |
|
concurrency_limit=16 |
|
) |
|
|
|
demo.queue() |
|
demo.launch(share=True, debug=False) |
|
|