Spaces:
Sleeping
Sleeping
import streamlit as st | |
import json | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.document_loaders import JSONLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import pandas as pd | |
from langchain.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import SentenceTransformerEmbeddings | |
import os | |
import cohere | |
from dotenv import load_dotenv | |
load_dotenv() | |
api_key = os.getenv('API_KEY') | |
client = cohere.Client(api_key) | |
# Define functions | |
def split_docs(documents, chunk_size=1000, chunk_overlap=20): | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) | |
docs = text_splitter.split_documents(documents) | |
return docs | |
def make_api_call(gpt_assistant_prompt, gpt_user_prompt): | |
message = f"{gpt_assistant_prompt}\n\n{gpt_user_prompt}" | |
temperature = 0.0 | |
max_tokens = 1000 | |
#frequency_penalty = 0.0 | |
response = client.chat( | |
model="command", | |
message=message, | |
temperature=temperature, | |
max_tokens=max_tokens, | |
) | |
return response.text | |
def condition_split(query): | |
structure = { | |
"condition_for_eligibility": { | |
"logic": "form the logic from query above like (A and B) or C", | |
"definitions": { | |
"condition": "the text Condition extracted from the query" | |
} | |
}, | |
"promotion_offered": "Description of Promotion", | |
"scheduling": "Offer Scheduling Details" | |
} | |
template_initial = f""" | |
Given a text description of a customer offer that outlines eligibility criteria, the promotion offered, and the scheduling of the offer, your task is to parse the text and organize the information into a structured JSON format. The JSON structure should include three main components: condition_for_eligibility, promotion_offered, and scheduling. Use logical expressions to detail the conditions for eligibility. | |
Text Description: | |
{query} | |
Your Objectives: | |
Extract and Label Conditions for Eligibility: | |
Identify specific metrics or actions (e.g., number of calls) that define eligibility. | |
Use labels (A, B, C) for distinct conditions. | |
Determine the logical relationship between these conditions (e.g., (A and B) or C). | |
Outline the Promotion Offered: | |
Identify the key benefit or discount promised to eligible customers. | |
Determine the Scheduling for the Offer: | |
Capture the frequency or timing of when the offer is made (e.g., daily, weekly). | |
Structure Your Response as Follows: | |
the structure is | |
{structure} | |
the json you produce, replace the sigle quates with doublequates to match json format the json.loads is expecting | |
""" | |
gpt_assistant_prompt = """You are an expert in Semantic Understanding, Marketing, and Business Analysis. | |
You need to extract specific information like conditions, scheduling details, and promotions from a paragraph, typically blending technical skills and domain-specific knowledge. | |
Give only the JSON as the response. | |
""" | |
gpt_user_prompt = template_initial | |
resp = make_api_call(gpt_assistant_prompt, gpt_user_prompt) | |
start_index = resp.find("{") | |
end_index = resp.rfind("}") | |
resp = resp.replace("'", '"') | |
print(resp) | |
json_data_string = resp[start_index:end_index + 1] | |
json_data = json.loads(json_data_string) | |
return json_data | |
def make_prompt(query, db1): | |
matching_docs = db1.similarity_search(query, k=4) | |
li = [] | |
for doc in matching_docs: | |
js = json.loads(doc.page_content) | |
s = f"{js['kpis']}: {js['description']}" | |
li.append(s) | |
final = ",/n ".join(li) | |
prompt_template = f""" | |
Given the following user query and matched information if the KPI's: | |
User Query: {query} | |
Matched KPI's: | |
{final} | |
Write a condition to fulfill the user query. | |
IMPORTANT: | |
- Use only the KPI's mentioned in the Matched KPI's. | |
- Associate KPI's with the corresponding descriptions provided in the Matched KPI's. | |
- provide only the condition in response | |
- only the exact KPI and condition value is to be present in the response no other text should be present. | |
- only use one matching KPI or the closer once | |
""" | |
gpt_assistant_prompt = """You are an expert in Semantic Understanding, Marketing, and Business Analysis. | |
You need to check whether the text matches. If the matching probability is high, then proceed with the requirement. | |
""" | |
resp = make_api_call(gpt_assistant_prompt, prompt_template) | |
return str(resp) | |
def replace_with_shielding(json_data, condition_string): | |
placeholders = {key: f"__{key}__" for key in json_data.keys()} | |
for key, placeholder in placeholders.items(): | |
condition_string = condition_string.replace(key, placeholder) | |
stop_processing = False | |
for key, value in json_data.items(): | |
if stop_processing: | |
break | |
if "The provided KPI's do not match the user query." in str(value): | |
stop_processing = True | |
condition_string = condition_string.replace(placeholders[key], "Description not matching with kpi so update kpi and description in kpi's json file") | |
else: | |
condition_string = condition_string.replace(placeholders[key], str(value)) | |
if "Description not matching with kpi so update kpi and description in kpi's json file" in condition_string: | |
stop_index = condition_string.index("Description not matching with kpi so update kpi and description in kpi's json file") | |
condition_string = condition_string[:stop_index] + "Description not matching with kpi so update kpi and description in kpi's json file" | |
return condition_string | |
def form_json_rule(condition_string): | |
final_template = """ | |
The task is to generate a json format with the help of english text. | |
I will help you with some details about the conversion. | |
a sample rule json structure is | |
{"featureId":"","appName":"","username":"","password":"","reqTxnId":"","msgOrigin":"","msgDest":"","timestamp":"","id":"","ruletype":"","data":{"detail":{"rules":{"id":"0","pid":"#","childrens":[{"id":"0_0","pid":"0","type":"conditions","option":"All","childrens":[{"id":"0_0_0","pid":"0_0","type":"condition","profile":{"id":1,"name":"P_AON"},"operator":">","values":{"value":"30"}},{"id":"0_0_1","pid":"0_0","type":"condition","profile":{"id":862,"name":"P_DEVICE_TYPE"},"operator":"=","values":{"value":"PHONE"}},{"id":"0_0_2","pid":"0_0","type":"action","action":{"id":98,"name":"Mobile App Notification"},"field":[{"name":"ActionID","value":"0_0_2"},{"name":"ActionName","value":""},{"name":"ActionCall","value":""}],"request":{"field":[]}}]}]}}}} | |
Root Level Properties | |
featureId: String. A unique identifier for the feature. | |
appName: String. The name of the application. | |
username: String. The username for authentication. | |
password: String. The password for authentication. | |
reqTxnId: String. A unique transaction identifier. | |
msgOrigin: String. The origin of the message. | |
msgDest: String. The destination of the message. | |
timestamp: String (ISO 8601 format). The timestamp of the request or action. | |
id: String. A unique identifier for this particular instance. | |
ruletype: String. The type of rule being defined. | |
Data and Detail Section | |
data: Object container. | |
detail: Object within 'data'. | |
rules: Object representing the rule logic. | |
id: String. The unique identifier of the rule. | |
pid: String. The parent identifier of the rule. | |
childrens: Array of child objects. Each object represents a condition or an action. | |
type: String. Specifies if it's a 'condition' or 'action' | |
if inside childrens if type is condition then the option can come as a logical operator like 'All' or 'Any'. | |
ALL is like an and operation where as Any is like an OR operation | |
If type is 'any' or 'and', it contains a conditions array with condition objects. | |
If type is 'condition' or 'action', it follows the respective structures below. | |
Condition Structure | |
id: String. The unique identifier of the condition. | |
pid: String. The parent identifier of the condition. | |
profile: Object containing details of the condition. | |
name: String. The name of the condition. | |
id: String. The unique identifier of the condition. | |
operator: String. The operation applied in the condition (e.g., '=', '<>'). | |
values: String. The values to check against in the condition. | |
isTextMode: Boolean. Indicates text mode evaluation. | |
Action Structure | |
id: String. The unique identifier of the action. | |
pid: String. The parent identifier of the action. | |
action: Object containing details of the action. | |
name: String. The name of the action. | |
id: String. The unique identifier of the action. | |
field: Object of parameters related to the action (key-value pairs). | |
request (Optional): Object detailing an external service request (key-value pairs). | |
isTextMode: Boolean. Indicates text mode processing. | |
Schedule Section | |
schedule: Object defining scheduling details. | |
field: Array of scheduling parameter objects. | |
Each object contains scheduling details (key-value pairs) like ScheduleName, ScheduleType, CAMPAIGN_NAME, ExpiryDate. | |
now from the information provided below convert it to json | |
shielded_replaced_string | |
""" | |
final_template = final_template.replace("shielded_replaced_string", condition_string) | |
gpt_assistant_prompt2 = """You are a json rule maker. You make rules based on the structure provided. | |
IMPORTANT: | |
Only give the valid json as response no other text should be present. | |
Give the response json in compact form. | |
the response should be exactly same to the template used in the prompt | |
""" | |
resp = make_api_call(gpt_assistant_prompt2, final_template) | |
return resp | |
# Streamlit App | |
def main(): | |
st.title("Flight Data Analysis") | |
uploaded_file = st.file_uploader("Choose a JSON file", type="jsnol") | |
if uploaded_file is not None: | |
with open("flight.jsnol", "wb") as f: | |
f.write(uploaded_file.getvalue()) | |
documents = JSONLoader(file_path='flight.jsnol', jq_schema='.', text_content=False, json_lines=True).load() | |
query = st.text_input("Enter your query") | |
if st.button("Split Documents"): | |
docs = split_docs(documents) | |
st.write(f"Number of documents: {len(docs)}") | |
st.json(docs) | |
if st.button("Create Embeddings and Vector Store"): | |
docs = split_docs(documents) | |
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
db1 = Chroma.from_documents( | |
documents=docs, | |
embedding=embeddings, | |
persist_directory="embeddings" | |
) | |
db1.persist() | |
st.write("Embeddings and vector store created.") | |
if st.button("Condition Split"): | |
if query: | |
json_data = condition_split(query) | |
st.write("JSON Data:") | |
st.json(json_data) | |
else: | |
st.write("Please enter a query.") | |
if st.button("Make Prompt"): | |
if query: | |
docs = split_docs(documents) | |
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
db1 = Chroma.from_documents( | |
documents=docs, | |
embedding=embeddings, | |
persist_directory="embeddings" | |
) | |
db1.persist() | |
condition_string = make_prompt(query, db1) | |
st.write("Condition String:") | |
st.write(condition_string) | |
else: | |
st.write("Please enter a query.") | |
if st.button("Replace with Shielding"): | |
if query: | |
docs = split_docs(documents) | |
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
db1 = Chroma.from_documents( | |
documents=docs, | |
embedding=embeddings, | |
persist_directory="embeddings" | |
) | |
db1.persist() | |
condition_string = make_prompt(query, db1) | |
json_data = condition_split(query) | |
shielded_replaced_string = replace_with_shielding(json_data, condition_string) | |
st.write("Shielded Replaced String:") | |
st.write(shielded_replaced_string) | |
else: | |
st.write("Please enter a query.") | |
if st.button("Form JSON Rule"): | |
if query: | |
docs = split_docs(documents) | |
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
db1 = Chroma.from_documents( | |
documents=docs, | |
embedding=embeddings, | |
persist_directory="embeddings" | |
) | |
db1.persist() | |
condition_string = make_prompt(query, db1) | |
json_data = condition_split(query) | |
shielded_replaced_string = replace_with_shielding(json_data, condition_string) | |
json_rule = form_json_rule(shielded_replaced_string) | |
# Print the JSON rule as plain text | |
st.write("JSON Rule:") | |
st.write(json_rule) | |
# Add a download button for the text file | |
st.download_button( | |
label="Download JSON Rule", | |
data=str(json_rule), | |
file_name="json_rule.txt", | |
mime="text/plain" | |
) | |
else: | |
st.write("Please enter a query.") | |
if __name__ == '__main__': | |
main() | |