vipra's picture
Update app.py
4f0fd4b verified
# app.py
import streamlit as st
import openai
import re
import os
import json
import traceback
from collections import defaultdict
# Security: Use Streamlit secrets for API key
# openai.api_key = st.secrets["OPENAI_API_KEY"]
openai.api_key = os.getenv("OPENAI_API_KEY")
# Important: Use environment variables for API keys in production!
# api_key = os.getenv("OPENAI_API_KEY") # Recommended approach
# Hardcoded Rubric
DEFAULT_RUBRIC = """
Spelling and Grammar
HD (16.0-20) Spelling and grammar of a very high standard. Writers at this level need to demonstrate strong writing skills and the ability to succinctly express ideas in their own words. Well-crafted topic sentences present.
D (14.0-15.9) Spelling and grammar of a high standard. Written expression adds value to the argument with sound overall argument structure and idea organization. Clear topic sentences.
C (12.0-13.9) Spelling and grammar of an acceptable standard. Written expression problems may be present but are not severe. Topic sentences generally capture the theme and are reasonably clear.
P (10.0-11.9) Frequent minor problems with spelling and grammar, but sufficient organization of thought and expression to allow the reader to follow without too much effort. Topic sentences may be present but are not always clear.
N (0-9.9) Significant problems with spelling and grammar. Work poorly organized or not logically presented. Many short or long paragraphs and no topic sentences.
Depth and Breadth of Analysis
HD (16.0-20) Integrated complex knowledge of frameworks and presented a sound critical analysis that is well supported. Evidence of extensive reading of quality sources and strong analysis that reflects a deep understanding. Broad coverage of causes and remedies.
D (14.0-15.9) Evidence of relevant reading of quality sources and effective integration into arguments. Intelligent organization and expansion of concepts and ideas, with sound analysis and discussion of implications involved.
C (12.0-13.9) Covers the key issues. Analysis of the issues/concepts/theories involved with clear presentation of components. Ideas generally organized and logically presented. Some evidence of additional reading.
P (10.0-11.9) Evidence that most key issues are understood and addressed. Evidence of required reading. Awareness and understanding at a basic level of the major concepts, theories, and frameworks involved. Flaws in thinking may be present in aspects of work.
N (0-9.9) Insufficient work and research. Discussion contains significant flaws such as large gaps in knowledge of key issues and/or errors in application of concepts/theories.
Evidence of Research
HD (4.0-5) Very strong supporting evidence is provided. Group Reports: 30 or more credible sources. Individual reports: 20 credible sources or more.
D (3.5-3.9) Significant supporting evidence is provided. Group Reports: 23-30 credible sources. Individual reports: 15-20 credible sources.
C (3.0-3.4) Satisfactory evidence is provided. Group Reports: 18-23 credible sources. Individual reports: 12-15 credible sources.
P (2.5-2.9) Minimum evidence is provided. Group Reports: 15-18 credible sources. Individual reports: 10-12 credible sources.
N (0-2.4) Minimum evidence is not provided. Group Reports: Less than 15 credible sources. Individual reports: Less than 10 credible sources.
Formatting of Referencing
HD (4.0-5) In-text referencing adheres to all aspects of guidelines. Reference list adheres to all aspects of guidelines.
D (3.5-3.9) In-text referencing errors are minimal and minor. Reference list errors are minimal and minor.
C (3.0-3.4) There are a number of minor in-text referencing errors. There are a number of minor referencing list errors.
P (2.5-2.9) There are some serious in-text referencing errors. There are some serious referencing list errors.
N (0-2.4) There are many serious in-text referencing errors. There are many serious referencing list errors.
Presentation
HD (4.0-5) Very high standard. Strict adherence to format and presentation standards contained in the templates, etc. Use of high-quality appendices that are presented well in table/diagram form.
D (3.5-3.9) Presentation of a high standard. Consistent standard in formatting that observes the standards embodied in the detailed report standards and templates. Effective use of appendices.
C (3.0-3.4) Soundly presented. Occasional format problems or inconsistencies which are generally of a minor nature. Appendices used but not always integrated or presented well.
P (2.5-2.9) Some problems and inconsistencies with overall presentation and formatting but not serious. Appendices used minimally and often not well laid out or presented.
N (0-2.4) Poorly presented report with multiple and/or serious format problems. Sections may be missing.
"""
# Hardcoded Report
DEFAULT_REPORT = """
1.0 Executive Summary
This report examines BDE Insurance’s training personnel and points out three inherent issues: poor planning, communication, and leadership. Poor planning has produced reactive decision-making and unclear goals. Reliance on impersonal channels and inadequate feedback have promoted misapprehensions and internal conflict. Unilateral leadership choices have also undermined confidence and morale and caused unnecessary turnover. To solve such issues, this report will advise in-depth the internal examination by SWOT to achieve concise targets, establishing regular meetings and open feedback systems to promote openness and transparency. Adaptation of transformational leadership behaviors by imparting focused management development in conflict resolution and emotional intelligence. These integrated solutions are intended to re-establish team coherence, enhance productivity, and assure lasting success.
2.0 Introduction
2.1 Report Topic
This case study will report how BDE Insurance Company has managed itself, highlighting the internal structure within its organizational planning and the strategies that can be implemented to motivate restructuring. The case study will also describe how to utilize proper communication and management policies by identifying the inadequacies presented and offering recommendations for implementation for a good job design.
2.2 Limitations
This report will only be limited to the information provided by BDE Insurance company. Any other external resources will not be in the discussion because of lack thereof. We will conduct a SWOT Analysis for the external threats and opportunities; however, any other information may limit a proper discussion. The case study only describes one department, which may lack a full scope of the entire company.
2.3 Sources of Data
All the information stated has been sourced from recognized information databases, websites, and peer-reviewed journal articles. Evidence for further information can also be found under references and the appendices.
2.4 Organisation of Report
As illustrated from the table of contents, all parts of this document have been categorized into eight sections, with three primary problems pointed out and described, further recommending ways on rectifying them, and finally offering a conclusion and appendices for the reader’s own reference.
3.0 Organizational Planning
BDE Insurance initiated a massive restructuring to counter its global competitors while simultaneously conducting a training compliance audit. All these changes had been implemented without proper organizational planning. Planning is one of the first measures that BDE Insurance should conduct, and its significance should not be overlooked (Davidson and Griffin, 2005). This process is important to exercise caution and ensure the systematic coordination of resources, activities, and timelines to define and achieve both short-term and long-term objectives, mitigating potential risks for the company (Josiah 2025, 43). After the planning process, (Josiah 2025, 53) concludes how critical it is for organizations to implement a strategy that prioritizes stakeholder interests and promotes responsible business practices for achieving long-term success and enhancing the prospects for a successful organizational change initiative.
3.1 Strategy
Strategy formulation begins with managers examining both internal and external factors that influence the organization’s ability to achieve its current and future goals, as stated by (Jones, George, and Barrett 2016, 437). A tool for business strategic planning that BDE Insurance should use is the S.W.O.T. analysis (see Appendix A), which stands for strengths, weaknesses, opportunities, and threats (Benzaghta et al. 2021, 55). This is particularly important because, using the SWOT analysis, the leadership at BDE Insurance will be more assertive about the company’s position in the market and gain better insight into where the company should focus its efforts for its continued long-term success and profitability. In this case, (Jones, George, and Barrett 2016) suggest that to maintain the company’s competitiveness, the BDE Insurance leadership must develop the necessary skills to manage this change effectively.
3.2 Change and Re-structuring
The new leadership at BDE Insurance sought to implement rapid changes within its organization in an attempt to make a strong impression. However, this approach resulted in negative consequences, such as increased pressure within the organizational environment. From (Josiah 2025, 50) point of view, effective planning for organizational change considers all the resources available, along with any limitations, to create strategies that ensure the achievement of the changed objectives. At BDE Insurance—before implementing any changes, the leadership failed to analyze the resources available, and the modifications ultimately overloaded the team, guiding the company toward failure. As (Kupiek and Marcinkowski 2024, 4) note, without proper planning, change fatigue or complete overload is more likely to occur, leading to a pre-programmed failure. Moving forward, BDE Insurance must prioritize comprehensive resource analysis and strategic planning to ensure that changes are implemented thoughtfully and with careful consideration of organizational capacity and employee well-being, thereby enhancing employee motivation.
3.3 Motivation
At BDE Insurance, employees like Emma and some of her colleagues, appear to lack motivation. Without it, the workforce will struggle to align with the company’s direction toward organizational goals and will be unable to utilize their knowledge and skills effectively (Sale and Jones 2018, 9). The BDE Insurance workforce is experiencing overload in their work activities, leading to projects falling under schedule, increased friction between teams, and several resignations; the main causes for this situation, especially between Emma and Joan, are analyzed in Appendix B. To maintain a high level of motivation within an organization, the employees need to receive comparable rewards and contribute similar efforts (Jones, George, and Barrett, 2016, 310).
4.0 Communication
Workplace communication is one of the most effective ways to solve inadequacy, especially in the case of BDE Insurance, because weak communication practices can cause many root problems. Poor communication caused misunderstandings, workplace hostility, and team division at BDE Insurance. Patrick’s reliance on emails and the lack of transparency has led to resentment and disengagement. According to (Lee et al., 2020), clear and direct communication increases mutual understanding, reduces misinterpretation, and improves workplace collaboration overall. By choosing communication as the solution to this problem, the misunderstandings happening between employees can be solved through open discussions, allowing for all parties to express concerns and expectations. Studies also indicate that strong communication strategies can enhance trust and teamwork, which can be crucial in repairing workplace relationships and preventing further conflicts (Brown & Williams, 2019).
5.0 Recommendations
• Implement contingency planning to address unexpected challenges and reduce the risk of change fatigue.
• Conduct a full financial and resource assessment to determine the company’s position.
• Utilize strategic methods like a SWOT analysis to identify internal and external issues.
• Implement regular, scheduled team meetings (face-to-face or virtual) to ensure real-time information sharing and clarification.
• Create anonymous feedback channels to enable employees to report concerns without reprisal.
• Move towards a leadership style that emphasizes empowerment, participative decision-making, and emotional intelligence.
• Train managers in non-violent communications and conflict resolution to adopt open conversations and reduce misunderstanding.
• Develop protocols for clearly communicating key decisions and changes, involving team members where possible to build trust and inclusivity.
• Provide targeted training programs focused on conflict resolution, effective feedback, and task management for managers.
• Establish policies for workplace bullying and transparent promotion practices, reinforced through regular performance reviews.
• Introduce team-based performance indicators and reward systems (e.g., “Team of the Quarter”) to encourage collaboration and reduce individualism.
6.0 Conclusion
Therefore, issues at BDE Insurance are caused by poor planning, improper communication channels, and unacceptable management, which impact team productivity and cooperation. The organization will turn around its business direction by strategic objective settings utilizing instruments such as SWOT and improved contingency planning, fostering inclusive dialogues by structuring its communication channels. At the same time, reinforcing informal feedback systems and transparent decision-making practices to regain confidence and minimize employee misconceptions. Finally, BDE Insurance will develop an environment of support and cooperation by investing in effective managerial development programs and steadfast commitment to fair and structured company policies. These collective interventions are imperative in improving team morale, sustaining performance, and achieving long-term business success.
7.0 References
Aamir Chughtai, Marann Byrne, Barbara Flood. 2015. "Linking Ethical Leadership to Employee Well-Being: The Role of Trust in Supervisor." Journal of Business Ethics 653-663.
Benzaghta, M. A., A. Elwalda, M. M. Mousa, I. Erkan, and M. Rahman. 2021. "SWOT Analysis Applications: An Integrative Literature Review." Journal of Global Business Insights 6 (1): 55-73. https://www.doi.org/10.5038/2640-6489.6.1.1148.
Brown, Lisa, and Mark Williams. 2019. Workplace Communication Strategies for Effective Leadership. New York: Business Insights Press.
CRYSTAL M. HAROLD, BRIAN C. HOLTZ. 2015. "The Effects of Passive Leadership on Workplace Incivility." Journal of Organizational Behavior 16-18.
Davidson, Paul, and Griffin, . 2005. Management: Australia in a Global Context. Milton, QLD: Wiley.
DOSTIE, BENOIT. 2018. "THE IMPACT OF TRAINING ON INNOVATION." ILR Review 64-66.
Frederiksen, Anders. 2017. "Job satisfaction and employee turnover: A firm-level perspective." German Journal of Human Resource Management 132-161.
Garcia, Daniel, and Robert Lopez. 2021. "Direct Communication and Conflict Resolution in Teams." Journal of Organizational Behavior 45 (2): 102-119.
Jiang, Kaifeng, and Tahira Probst. 2017. "Workplace Inclusion and Employee Performance: The Role of Job Satisfaction." Journal of Applied Psychology 102 (5): 789-803.
Jones, Gareth R., George, Jennifer M., and Barrett, Mary. 2016. Contemporary Management. North Ryde: McGraw-Hill Australia. Accessed March 12, 2025. ProQuest Ebook Central.
Josiah, Donnell S. 2025. “Planning.” In The 5-Ps of Change, 1st ed., 1:42–102. Routledge. https://doi.org/10.4324/9781003544883-3.
Kupiek, Martin, and Marcinkowski, Bettina eds. 2024. Dynamic Change Management: A Context-Oriented Approach to Foster the Adaptability of Organizations. Cham: Springer. https://doi.org/10.1007/978-3-031-70706-3.
Lee, Chang, and Sunwoo Kim. 2018. "Trust and Commitment in Leadership: A Pathway to Reducing Workplace Conflict." Academy of Management Review 43 (3): 330-349.
Lee, Hannah, James Smith, and Patricia Johnson. 2020. "Face-to-Face vs. Electronic Communication: Resolving Workplace Miscommunication." Journal of Business Communication 57 (1): 44-60.
LJUNGHOLM, DOINA POPESCU. 2015. "THE PRACTICE OF PERFORMANCE MANAGEMENT IN PUBLIC SECTOR ORGANIZATIONS." Geopolitics, History, and International Relations 190-192.
Lv, Ping. 2018. "The Role of Emotional Intelligence in Leadership Effectiveness: A Meta-Analysis." Journal of Organizational Psychology 39 (2): 120-134.
Markus Berndt, Jan-Willem Strijbos, Frank Fischer. 2018. "Effects of written peer-feedback content and sender's competence on perceptions, performance, and mindful cognitive processing." European Journal of Psychology of Education 31-34.
MORGEN JOHANSEN, DANIEL P. HAWES. 2016. "THE EFFECT OF THE TASKS MIDDLE MANAGERS PERFORM ON ORGANIZATIONAL PERFORMANCE." Public Administration Quarterly 589-616.
Northouse, Peter G. 2018. Leadership: Theory and Practice. 8th ed. Thousand Oaks, CA: Sage Publications.
Parker, Sharon K., Van den Broeck, Anja, and Holman, David. 2016. "Work Design Influences: A Synthesis of Multilevel Factors That Affect the Design of Jobs." Academy of Management Annals 11 (1): 267–308. https://doi.org/10.5465/annals.2014.0054.
Saha, Suman, and Rahul Kumar. 2017. "Participative Decision-Making and Employee Learning: Evidence from Corporate Case Studies." Management Research Review 40 (3): 234-249.
Sale, James, and Jones, Steve. 2018. Mapping Motivation for Engagement. First edition. Boca Raton, FL: Routledge.
Shore, Lynn M., Jeanette N. Cleveland, and Diana Sanchez. 2018. "Workplace Inclusion and Organizational Outcomes: The Role of Psychological Safety." Human Resource Management Journal 28 (2): 176-191.
Smith, Patricia, and Robert Johnson. 2018. "The Impact of Electronic Communication on Workplace Relationships." Journal of Business Ethics 153 (4): 901-915.
Wang, Xinyue, Mark Thompson, and Rebecca Allen. 2016. "Transformational Leadership and Employee Satisfaction: The Role of Open Communication." Leadership & Organization Development Journal 37 (6): 748-764.
YAN ZHANG, DAVID A. WALDMAN, YU-LAN HAN, XIAO-BEI LI. 2015. "PARADOXICAL LEADER BEHAVIORS IN PEOPLE MANAGEMENT: ANTECEDENTS AND CONSEQUENCES." The Academy of Management Journal 538-539.
Zhang, Wei, Anna Cooper, and Daniel Harrison. 2015. "Employee Involvement in Decision-Making and Job Satisfaction: A Longitudinal Study." International Journal of Management Studies 42 (1): 88-107.
8.0 Appendices
8.1 Appendix A: SWOT Analysis
[Content not provided in the original text]
8.2 Appendix B: Equity Theory Analysis
[Content not provided in the original text]
"""
DEFAULT_SYSTEM_PROMPT = """You are an expert academic evaluator. Return analysis in this exact JSON format:
{
'sections': [
{
'name': 'SectionName',
'score': X.X,
'feedback': '...',
'improvements': ['...'],
'hd_improvements': ['Specific examples to achieve HD']
}
],
'total_score': XX.X
}
For each criterion:
1. Provide current feedback
2. List general improvements
3. Give specific HD-level improvement examples"""
class ReportEvaluator:
def __init__(self, api_key):
print("Initializing ReportEvaluator...")
self.client = openai.OpenAI(api_key=api_key)
self.rubric_parser = RubricParser()
self.report_analyzer = ReportAnalyzer()
def evaluate(self, rubric_text, report_text, system_prompt):
print("\n=== Starting Evaluation Process ===")
# Parse rubric structure
print("\n[1/3] Parsing Rubric...")
rubric = self.rubric_parser.parse_rubric(rubric_text)
print(f"Parsed Rubric Structure: {json.dumps(rubric, indent=2)[:500]}...")
# Perform objective measurements
print("\n[2/3] Analyzing Report...")
objective_results = self.report_analyzer.analyze_report(report_text)
print(f"Objective Analysis Results: {json.dumps(objective_results, indent=2)}")
# Perform subjective evaluations using GPT
print("\n[3/3] Running GPT Evaluation...")
# subjective_results = self._gpt_evaluation(rubric_text, report_text)
subjective_results = self._gpt_evaluation(rubric_text, report_text, system_prompt)
# Combine results
print("\nCombining Results...")
return self._combine_results(rubric, objective_results, subjective_results)
def _gpt_evaluation(self, rubric, report, system_prompt):
try:
print("\n--- GPT Evaluation Started ---")
print(f"Rubric length: {len(rubric)} chars")
print(f"Report length: {len(report)} chars")
response = self.client.chat.completions.create(
model="gpt-4-turbo",
messages=[{
"role": "system",
"content": system_prompt
}, {
"role": "user",
"content": f"RUBRIC:\n{rubric}\nREPORT:\n{report}"
}],
temperature=0.2,
response_format={"type": "json_object"}
)
return json.loads(response.choices[0].message.content)
except Exception as e:
print(f"GPT Error: {str(e)}")
return {'sections': [], 'total_score': 0}
def _gpt_evaluation(self, rubric, report, system_prompt):
try:
print("\n--- GPT Evaluation Started ---")
print(f"Rubric length: {len(rubric)} chars")
print(f"Report length: {len(report)} chars")
response = self.client.chat.completions.create(
model="gpt-4-turbo",
messages=[{
"role": "system",
"content": system_prompt
}, {
"role": "user",
"content": f"RUBRIC:\n{rubric}\nREPORT:\n{report}"
}],
temperature=0.2,
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
print("\n--- GPT Response Received ---")
print("GPT Raw Response:", json.dumps(result, indent=2))
if not result.get('sections'):
raise ValueError("GPT response missing 'sections' key")
return result
except Exception as e:
print(f"\n!!! GPT Evaluation Error: {str(e)}")
return {'sections': [], 'total_score': 0}
# Update the combine_results method
def _combine_results(self, rubric, objective, subjective):
print("\nCombining objective and subjective results...")
combined = {
"sections": [],
"total_score": 0,
"detailed_feedback": []
}
if 'sections' not in subjective:
print("!!! Warning: Subjective results missing sections key")
return combined
print(f"Processing {len(rubric['sections'])} rubric sections...")
for section in rubric['sections']:
section_name = section['name']
print(f"\nProcessing section: {section_name}")
obj_data = objective.get(section_name, {})
print(f"Objective data: {obj_data}")
subj_data = next(
(s for s in subjective.get('sections', []) if s.get('name') == section_name),
{}
)
print(f"Subjective data: {subj_data}")
combined_section = {
"name": section_name,
"score": self._calculate_section_score(section, obj_data, subj_data),
"objective_metrics": obj_data,
"subjective_feedback": subj_data.get('feedback', ''),
"improvements": subj_data.get('improvements', []),
"hd_improvements": subj_data.get('hd_improvements', []) # New field
}
combined['sections'].append(combined_section)
combined['total_score'] += combined_section['score']
print("\nFinal Combined Scores Calculated")
return combined
# Update the display formatting
def format_results_for_display(self, results):
"""Convert JSON results to human-readable format for Streamlit"""
print("\nFormatting results for display...")
output = []
# Header
output.append(f"📝 Final Evaluation Report")
output.append(f"🏆 Total Score: {results['total_score']}/60")
output.append("\n---")
# Section details
for section in results['sections']:
output.append(f"\n📋 Section: {section['name']}")
output.append(f"⭐ Score: {section['score']}")
if section['subjective_feedback']:
output.append(f"\n📄 Feedback: {section['subjective_feedback']}")
if section['improvements']:
output.append("\n🚀 General Improvements:")
for imp in section['improvements']:
output.append(f"- {imp}")
if section.get('hd_improvements'):
output.append("\n🎯 HD-Level Improvements:")
for imp in section['hd_improvements']:
output.append(f"- {imp}")
if section['objective_metrics']:
output.append("\n🔍 Objective Metrics:")
for k, v in section['objective_metrics'].items():
output.append(f"- {k}: {v}")
output.append("\n---")
return "\n".join(output)
def _calculate_section_score(self, rubric_section, objective, subjective):
# Implement custom scoring logic based on rubric rules
# Example for references section:
if rubric_section['name'] == "Evidence of research":
source_count = objective.get('source_count', 0)
return self._score_source_count(source_count)
# Default to subjective score
return subjective.get('score', 0)
def _score_source_count(self, count):
# Implement rubric-based scoring logic
if count >= 30: return 5.0
elif 23 <= count < 30: return 3.5
elif 18 <= count < 23: return 3.0
elif 15 <= count < 18: return 2.5
else: return 0.0
class RubricParser:
def parse_rubric(self, rubric_text):
sections = []
current_section = None
for line in rubric_text.split('\n'):
line = line.strip()
if not line:
continue
# Detect section headers (e.g., "Spelling and Grammar")
if re.match(r'^[A-Z][a-zA-Z\s]+$', line) and not re.search(r'[\(\)]', line):
if current_section:
sections.append(current_section)
current_section = {'name': line, 'criteria': []}
elif current_section and re.match(r'^(HD|D|C|P|N)\s*\(', line):
current_section['criteria'].append(self._parse_criterion(line))
if current_section:
sections.append(current_section)
return {'sections': sections}
def _is_section_header(self, line):
# Match lines without parentheses (section headers)
return not re.search(r'\(', line) and re.match(r'^[A-Z][a-zA-Z\s]+$', line)
def _is_criterion(self, line):
# Match lines starting with HD/D/C/P/N
return re.match(r'^([A-Z]{1,2})\s*\(', line)
def _parse_criterion(self, line):
# Extract grade, score range, and description
match = re.match(r'^([A-Z]{1,2})\s*\(([^)]+)\)\s*(.*)', line)
return {
'grade': match.group(1),
'range': match.group(2).strip(),
'description': match.group(3).strip()
}
class ReportAnalyzer:
def analyze_report(self, report_text):
return {
"Evidence of research": {
"source_count": self._count_sources(report_text),
"source_types": self._analyze_source_types(report_text)
},
"Formatting": {
"heading_consistency": self._check_headings(report_text),
"appendix_quality": self._check_appendices(report_text)
}
}
def _count_sources(self, text):
return len(re.findall(r'\([A-Za-z]+\s\d{4}\)', text))
def _analyze_source_types(self, text):
types = defaultdict(int)
# Implement source type analysis
return dict(types)
def _check_headings(self, text):
# Implement heading consistency check
return True
def _check_appendices(self, text):
# Implement appendix quality check
return True
def main():
st.set_page_config(page_title="Academic Report Evaluator", layout="wide")
# Initialize session state
if 'rubric_content' not in st.session_state:
st.session_state.rubric_content = DEFAULT_RUBRIC
if 'report_content' not in st.session_state:
st.session_state.report_content = DEFAULT_REPORT
if 'system_prompt' not in st.session_state:
st.session_state.system_prompt = DEFAULT_SYSTEM_PROMPT
st.title("📚 Automated Report Evaluation System")
# System prompt editor
with st.expander("⚙️ Evaluation Settings", expanded=False):
st.session_state.system_prompt = st.text_area(
"Evaluation Prompt:",
value=st.session_state.system_prompt,
height=300
)
# Input columns
col1, col2 = st.columns(2)
with col1:
st.session_state.rubric_content = st.text_area(
"Evaluation Rubric:",
value=st.session_state.rubric_content,
height=400
)
with col2:
st.session_state.report_content = st.text_area(
"Student Report:",
value=st.session_state.report_content,
height=400
)
# Control buttons
col_btns = st.columns([1, 1, 3])
with col_btns[0]:
if st.button("🔄 Reset to Defaults"):
st.session_state.rubric_content = DEFAULT_RUBRIC
st.session_state.report_content = DEFAULT_REPORT
st.session_state.system_prompt = DEFAULT_SYSTEM_PROMPT
st.rerun()
if col_btns[1].button("🚀 Run Evaluation", use_container_width=True):
try:
evaluator = ReportEvaluator(openai.api_key)
with st.spinner("🔍 Analyzing report..."):
results = evaluator.evaluate(
st.session_state.rubric_content,
st.session_state.report_content,
st.session_state.system_prompt
)
st.success("✅ Evaluation Complete!")
# Single results view
with st.container():
formatted = evaluator.format_results_for_display(results)
st.markdown(formatted)
# Raw data section
with st.expander("🔍 Raw Evaluation Data"):
st.json(results)
except Exception as e:
st.error(f"❌ Error occurred: {str(e)}")
st.text(traceback.format_exc())
if __name__ == "__main__":
main()