bubuuunel commited on
Commit
d01dce1
1 Parent(s): 57079d8

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -3
app.py CHANGED
@@ -3,18 +3,46 @@
3
  ## Setup
4
  # Import the necessary Libraries
5
 
 
 
 
 
6
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
 
10
 
11
  # Create Client
 
12
 
 
 
 
 
 
 
13
 
14
  # Define the embedding model and the vectorstore
 
15
 
16
  # Load the persisted vectorDB
17
 
 
 
 
 
 
18
 
19
  # Prepare the logging functionality
20
 
@@ -22,7 +50,7 @@ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
22
  log_folder = log_file.parent
23
 
24
  scheduler = CommitScheduler(
25
- repo_id="---------",
26
  repo_type="dataset",
27
  folder_path=log_folder,
28
  path_in_repo="data",
@@ -31,12 +59,30 @@ scheduler = CommitScheduler(
31
 
32
  # Define the Q&A system message
33
 
 
 
 
 
 
 
 
34
 
 
 
 
 
35
 
36
 
37
 
38
  # Define the user message template
 
 
 
 
39
 
 
 
 
40
 
41
 
42
 
@@ -48,12 +94,33 @@ def predict(user_input,company):
48
 
49
  # Create context_for_query
50
 
 
 
 
51
 
52
- # Create messages
 
 
 
 
 
 
 
53
 
54
 
55
- # Get response from the LLM
56
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  # While the prediction is made, log both the inputs and outputs to a local log file
59
  # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
@@ -82,6 +149,7 @@ company = gr.Radio()
82
  # Create the interface
83
  # For the inputs parameter of Interface provide [textbox,company]
84
 
 
85
 
86
  demo.queue()
87
  demo.launch()
 
3
  ## Setup
4
  # Import the necessary Libraries
5
 
6
+ import json
7
+ import tiktoken
8
+ import gradio as gr
9
+ import uuid
10
 
11
+ import pandas as pd
12
+
13
+ from openai import OpenAI
14
+
15
+ from langchain_community.embeddings.sentence_transformer import (
16
+ SentenceTransformerEmbeddings
17
+ )
18
+ from langchain_community.vectorstores import Chroma
19
+
20
+ from google.colab import userdata, drive
21
+ from huggingface_hub import CommitScheduler
22
 
23
 
24
 
25
 
26
  # Create Client
27
+ load_dotenv()
28
 
29
+ os.environ["ANYSCALE_API_KEY"]=os.getenv("ANYSCALE_API_KEY")
30
+
31
+ client = OpenAI(
32
+ base_url="https://api.endpoints.anyscale.com/v1",
33
+ api_key=os.environ['ANYSCALE_API_KEY']
34
+ )
35
 
36
  # Define the embedding model and the vectorstore
37
+ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
38
 
39
  # Load the persisted vectorDB
40
 
41
+ reportdb = Chroma(
42
+ collection_name=collection_name,
43
+ persist_directory='./report_db1',
44
+ embedding_function=embedding_model
45
+ )
46
 
47
  # Prepare the logging functionality
48
 
 
50
  log_folder = log_file.parent
51
 
52
  scheduler = CommitScheduler(
53
+ repo_id="report-logs",
54
  repo_type="dataset",
55
  folder_path=log_folder,
56
  path_in_repo="data",
 
59
 
60
  # Define the Q&A system message
61
 
62
+ qna_system_message = """
63
+ You are an assistant to a financial services firm who answers user queries on annual reports.
64
+ User input will have the context required by you to answer user questions.
65
+ This context will begin with the token: ###Context.
66
+ The context contains references to specific portions of a document relevant to the user query.
67
+
68
+ User questions will begin with the token: ###Question.
69
 
70
+ Please answer only using the context provided in the input. Do not mention anything about the context in your final answer.
71
+
72
+ If the answer is not found in the context, respond "I don't know".
73
+ """
74
 
75
 
76
 
77
  # Define the user message template
78
+ qna_user_message_template = """
79
+ ###Context
80
+ Here are some documents that are relevant to the question mentioned below.
81
+ {context}
82
 
83
+ ###Question
84
+ {question}
85
+ """
86
 
87
 
88
 
 
94
 
95
  # Create context_for_query
96
 
97
+ relevant_document_chunks = retriever.get_relevant_documents(user_question)
98
+ context_list = [d.page_content for d in relevant_document_chunks]
99
+ context_for_query = ". ".join(context_list)
100
 
101
+ prompt = [
102
+ {'role':'system', 'content': qna_system_message},
103
+ {'role': 'user', 'content': qna_user_message_template.format(
104
+ context=context_for_query,
105
+ question=user_question
106
+ )
107
+ }
108
+ ]
109
 
110
 
 
111
 
112
+ print(prompt)
113
+
114
+ # Create messages
115
+ response = client.chat.completions.create(
116
+ model=model_name,
117
+ messages=prompt,
118
+ temperature=0
119
+ )
120
+
121
+ # Get response from the LLM
122
+ answer = response.choices[0].message.content.strip()
123
+ print (answer)
124
 
125
  # While the prediction is made, log both the inputs and outputs to a local log file
126
  # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
 
149
  # Create the interface
150
  # For the inputs parameter of Interface provide [textbox,company]
151
 
152
+ demo = gr.Interface(inputs=[textbox,company], fn = predict, output ='text')
153
 
154
  demo.queue()
155
  demo.launch()