Spaces:
Sleeping
Sleeping
from dotenv import load_dotenv | |
import os | |
from PyPDF2 import PdfReader | |
from docx import Document | |
from docx.text.paragraph import Paragraph | |
from docx.table import Table | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
from langchain_community.embeddings import OpenAIEmbeddings | |
import streamlit as st | |
from textwrap import dedent | |
from Prompts_and_Chains import LLMChains | |
from Templates import json_structure | |
import json | |
from Utils import estimate_to_value | |
from Utils import is_key_value_present | |
def extract_text_from_file(file): | |
text = file.read().decode("utf-8") | |
return text | |
def process_paragraph(paragraph): | |
# Process the content of the paragraph as needed | |
return paragraph.text | |
def process_table(table): | |
# Process the content of the table as needed | |
text = "" | |
for row in table.rows: | |
for cell in row.cells: | |
text += cell.text | |
return text | |
def read_docx(file_path): | |
doc = Document(file_path) | |
data = [] | |
for element in doc.iter_inner_content(): | |
if isinstance(element, Paragraph): | |
data.append(process_paragraph(element)) | |
if isinstance(element, Table): | |
data.append(process_table(element)) | |
return "\n".join(data) | |
def get_pdf_text(pdf): | |
"""This function extracts the text from the PDF file""" | |
text = [] | |
pdf_reader = PdfReader(pdf) | |
for page in pdf_reader.pages: | |
text.append(page.extract_text()) | |
return "\n".join(text) | |
class RFPProcessor: | |
def __init__(self): | |
load_dotenv() | |
self.openai_api_key = os.getenv("OPENAI_API_KEY") | |
self.chains_obj = LLMChains() | |
def generate_roadmap(self): | |
roadmap_data = self.chains_obj.roadmap_chain.run( | |
{"project_input": st.session_state["estimation_data_json"]} | |
) | |
print(roadmap_data) | |
st.session_state["roadmap_data_json"] = roadmap_data | |
data = json.loads(roadmap_data) | |
try: | |
decoded_data = json.loads(roadmap_data) | |
print(decoded_data) | |
except json.decoder.JSONDecodeError as e: | |
print(f"JSON decoding error: {e}") | |
for phases_data in data["phases"]: | |
phase = phases_data["name"] | |
for milestone in phases_data["milestones"]: | |
milestone_name = milestone["name"] | |
total_time = milestone["totalTime"] | |
for feature in milestone["features"]: | |
featue_name = feature["name"] | |
featue_rationale = feature["rationale"] | |
featue_effort = feature["effort"] | |
phase_key_present = is_key_value_present( | |
st.session_state["roadmap_data"], "phases", phase | |
) | |
if phase_key_present: | |
milestone_key_present = is_key_value_present( | |
st.session_state["roadmap_data"], | |
"milestones", | |
milestone_name, | |
) | |
if milestone_key_present: | |
st.session_state.roadmap_data.append( | |
{ | |
"phases": "", | |
"milestones": "", | |
"total_time": "", | |
"features": featue_name, | |
"rational": featue_rationale, | |
"effort": featue_effort, | |
} | |
) | |
else: | |
st.session_state.roadmap_data.append( | |
{ | |
"phases": "", | |
"milestones": milestone_name, | |
"total_time": total_time, | |
"features": featue_name, | |
"rational": featue_rationale, | |
"effort": featue_effort, | |
} | |
) | |
else: | |
st.session_state.roadmap_data.append( | |
{ | |
"phases": phase, | |
"milestones": milestone_name, | |
"total_time": total_time, | |
"features": featue_name, | |
"rational": featue_rationale, | |
"effort": featue_effort, | |
} | |
) | |
st.session_state["is_roadmap_data_created"] = True | |
def generate_estimations(self, tech_leads, senior_developers, junior_developers): | |
print(st.session_state["user_stories_json"]) | |
inputs = { | |
"project_summary": st.session_state["rfp_summary"], | |
"user_stories": st.session_state["user_stories_json"], | |
"tech_leads": tech_leads, | |
"senior_developers": senior_developers, | |
"junior_developers": junior_developers, | |
} | |
data = self.chains_obj.estimations_chain.run(inputs) | |
st.session_state["estimation_data_json"] = data | |
estimation_json_data = json.loads(data) | |
for epic_data in estimation_json_data["epics"]: | |
epic = epic_data["name"] | |
for feature_data in epic_data["features"]: | |
feature = feature_data["name"] | |
for story in feature_data["stories"]: | |
average = estimate_to_value(story["estimate"]) | |
st.session_state.estimation_data.append( | |
{ | |
"epic": epic, | |
"Feature": feature, | |
"Story Description": story["description"], | |
"Estimate": story["estimate"], | |
"Person Days Range": story["effort"], | |
"Story Rationale": story["rationale"], | |
"Person Days": average, | |
} | |
) | |
st.session_state["is_estimation_data_created"] = True | |
def process_rfp_data(self, project_name, files): | |
if project_name and files: | |
extracted_data = [] | |
for file in files: | |
if file.name.endswith(".docx"): | |
extracted_data.append(read_docx(file)) | |
elif file.name.endswith(".pdf"): | |
extracted_data.append(get_pdf_text(file)) | |
else: | |
extracted_data.append(extract_text_from_file(file)) | |
# loader = PdfReader(file) | |
# for i, page in enumerate(loader.pages): | |
# content = page.extract_text() | |
# if content: | |
# temp = st.session_state["rfp_details"] | |
# st.session_state["rfp_details"] = temp + content | |
st.session_state["rfp_details"] = " ".join(extracted_data) | |
text_splitter = CharacterTextSplitter( | |
separator="\n", chunk_size=1000, chunk_overlap=150, length_function=len | |
) | |
texts = text_splitter.split_text(st.session_state["rfp_details"]) | |
st.session_state["vectorstore"] = Chroma().from_texts( | |
texts, embedding=OpenAIEmbeddings(openai_api_key=self.openai_api_key) | |
) | |
st.session_state.project_name = project_name | |
st.session_state["rfp_summary"] = self.chains_obj.summary_chain.run( | |
{ | |
"project_name": st.session_state["project_name"], | |
"rfp_details": dedent(st.session_state["rfp_details"]), | |
} | |
) | |
st.session_state["is_data_processed"] = True | |
st.success("data processed sucessfully") | |
def genrate_bot_result(self): | |
if len(st.session_state["input"]) > 0: | |
db = st.session_state["vectorstore"] | |
context = db.similarity_search(st.session_state["input"]) | |
inputs = { | |
"context": context[0].page_content, | |
"input": st.session_state["input"], | |
} | |
output = self.chains_obj.bot_chain.run(inputs) | |
st.session_state.past.append(st.session_state["input"]) | |
st.session_state.generated.append(output) | |
st.session_state["input"] = "" | |
def genrate_user_stories(self): | |
output = self.chains_obj.user_story_chain.run( | |
{ | |
"project_name": st.session_state["project_name"], | |
"rfp_details": st.session_state["rfp_details"], | |
} | |
) | |
st.session_state["user_stories"] = output | |
json_response = self.chains_obj.json_chain.run( | |
{ | |
"user_stories": st.session_state["user_stories"], | |
"json_structure": json_structure, | |
} | |
) | |
user_stories_data = json.loads(json_response) | |
print(user_stories_data) | |
st.session_state["user_stories_json"] = user_stories_data | |
for epic_data in user_stories_data["epics"]: | |
epic = epic_data["name"] | |
for feature_data in epic_data["features"]: | |
feature = feature_data["name"] | |
for story in feature_data["stories"]: | |
st.session_state.user_stories_data.append( | |
{ | |
"epic": epic, | |
"Feature": feature, | |
"Story Description": story["description"], | |
} | |
) | |
st.session_state["is_user_stories_created"] = True | |