GRC_framework / utils.py
abdulnim's picture
added all the remaining categories
b08ba8c
raw
history blame contribute delete
No virus
16 kB
ai_audit_analysis_categories = {
"AI Audit": [
"sentiment_analysis",
"emotion_detection",
"political_bias_detection",
"stress_level_detection",
"empathy_level_assessment",
"mood_detection",
"toxicity_detection"
],
"GDPR": [
"Privacy_Assessment",
"Consent_and_Transparency",
"Data_Security",
"Environmental_Impact"],
"Toxicity": [
"Content_Moderation",
"Reporting_Mechanism",
"Content_Guidelines",
"User_Education"],
"Legal": [
"Privacy_Policy",
"Data_Retention",
"Consent_Mechanism"],
"Context": [
"Ethical_AI",
"Bais_Mitigation",
"Fairness_Assestment",
"Explainability"],
"Governance": [
"Model_development",
"Data_Quality",
"Bais_Mitigation",
"Fairness_Assestment"
"Explainability"
"User_Input"],
"RiskManagement": [
"Corporate_Ethics",
"Board_Management",
"Stakeholder_Engagement"],
"Robustness": [
"System_Reliability",
"Quality_Assurance",
"Stress_Testing",
"Fail_Safe_Procedures"],
"Sustainability": [
"Renewable_Resources",
"Waste_Reduction",
"Energy_Efficiency",
"Sustainable_Practices"]
}
# Define a standard template for prompts
STANDARD_PROMPT_TEMPLATE = "You are a data analysis assistant capable of {analysis_type} analysis. {specific_instruction} Respond with your analysis in JSON format. The JSON schema should include '{json_schema}'."
def get_system_prompt(analysis_type: str) -> str:
specific_instruction = ANALYSIS_TYPES.get(analysis_type, "Perform the analysis as per the specified type.")
json_schema = JSON_SCHEMAS.get(analysis_type, {})
json_schema_str = ', '.join([f"'{key}': {value}" for key, value in json_schema.items()])
return (f"You are a data analyst API capable of {analysis_type} analysis. "
f"{specific_instruction} Please respond with your analysis directly in JSON format "
f"(without using Markdown code blocks or any other formatting). Always include confidence_score:number (0-1) with two decimals for result based on analysis"
f"The JSON schema should include: {{{json_schema_str}}}.")
ANALYSIS_TYPES = {
"sentiment_analysis": "Analyze the sentiment of the provided text. Determine whether the sentiment is positive, negative, or neutral and provide a confidence score.",
"emotion_detection": "Detect and identify the primary emotions expressed in the provided text. Provide a score for the intensity of the detected emotion.",
"political_bias_detection": "Detect any political bias in the provided text, identifying leaning towards particular ideologies or parties.",
"stress_level_detection": "Analyze the text to assess stress levels, identifying triggers and intensity of stress.",
"empathy_level_assessment": "Assess the level of empathy expressed in the text, identifying empathetic responses and tendencies.",
"mood_detection": "Detect the mood of the individual based on textual cues, ranging from happy to sad, calm to angry.",
"toxicity_detection": "Identify and assess the level of toxicity in the provided text. Determine whether the text contains harmful, offensive, or inappropriate content and provide a score indicating the severity of the toxicity",
# GDPR-related types
"Consent_and_Transparency": "Evaluate how consent is obtained and the level of transparency provided to users regarding data usage.",
"Data_Security": "Assess the measures in place for data security, including vulnerabilities and compliance with security standards.",
"Privacy_Assessment": "Analyze the overall privacy practices, including policy compliance, data minimization, and user data accessibility.",
"Environmental_Impact": "Assess the environmental impact of data processing practices, including carbon footprint and energy efficiency.",
# Toxicity-related types
"Content_Moderation": "Evaluate the effectiveness of content moderation practices, including automated and human moderation efforts.",
"Reporting_Mechanism": "Assess the ease and effectiveness of reporting mechanisms for inappropriate or harmful content.",
"Content_Guidelines": "Analyze the clarity and comprehensiveness of content guidelines and their enforcement consistency.",
"User_Education": "Evaluate the availability and accessibility of educational resources for users regarding appropriate content and behavior.",
# Legal-related types
"Privacy_Policy": "Analyze the clarity and compliance of a privacy policy with legal standards.",
"Data_Retention": "Evaluate the data retention practices, including periods, deletion policies, and legal compliance.",
"Consent_Mechanism": "Assess the clarity and effectiveness of the consent mechanism in place for data collection and usage.",
"GDPR_Compliance": "Evaluate the level of GDPR compliance in data handling, protection measures, and breach notification protocols.",
# Context-related types
"Ethical_AI": "Assess adherence to ethical standards in AI practices, including identification and mitigation of ethical issues.",
"Bias_Mitigation": "Evaluate the presence and mitigation of bias in data or algorithms.",
"Fairness_Assessment": "Assess fairness in AI systems, identifying affected groups and providing recommendations for improvement.",
"Explainability": "Evaluate the transparency and explainability of AI models to users.",
# Governance-related types
"Model_Development": "Analyze the process of model development, including team composition and ethical considerations.",
"Data_Quality": "Assess the quality of data used, focusing on accuracy, completeness, and timeliness.",
"User_Input": "Evaluate the mechanisms for and impact of user feedback on the system.",
# Risk Management-related types
"Corporate_Ethics": "Assess the ethical practices within a corporation, including employee training and ethics code adherence.",
"Board_Management": "Evaluate the effectiveness and diversity of board management and its compliance with ethical standards.",
"Stakeholder_Engagement": "Analyze stakeholder engagement practices, including inclusion, feedback mechanisms, and satisfaction.",
"Risk_Management": "Assess the identification, mitigation, and monitoring of risks within an organization.",
# Robustness-related types
"System_Reliability": "Evaluate the reliability and resilience of a system, including uptime and redundancy measures.",
"Quality_Assurance": "Assess the quality assurance practices, including compliance with standards and testing frequency.",
"Stress_Testing": "Analyze the system's robustness through stress testing and identify weaknesses.",
"Fail_Safe_Procedures": "Evaluate the effectiveness of fail-safe procedures in place for system failures.",
# Sustainability-related types
"Renewable_Resources": "Assess the use of renewable resources and sustainability goals in operations.",
"Waste_Reduction": "Evaluate waste management practices, reduction rates, and recycling initiatives.",
"Energy_Efficiency": "Analyze energy consumption and efficiency, including energy-saving measures and audits.",
"Sustainable_Practices": "Evaluate the adoption of sustainable practices, including training and overall impact."
}
JSON_SCHEMAS = {
"sentiment_analysis": {
"sentiment": "string (positive, negative, neutral)",
"confidence_score": "number (0-1)",
"text_snippets": "array of strings (specific text portions contributing to sentiment)"
},
"emotion_detection": {
"emotion": "string (primary emotion detected)",
"confidence_score": "number (0-1)",
"secondary_emotions": "array of objects (secondary emotions and their scores)"
},
"political_bias_detection": {
"bias": "string (left, right, neutral)",
"confidence_score": "number (0-1)",
"bias_indicators": "array of strings (elements indicating bias)",
"political_alignment_score": "number (quantifying degree of political bias)"
},
"stress_level_detection": {
"stress_level": "string",
"stress_triggers": "array of strings"
},
"empathy_level_assessment": {
"empathy_level": "string",
"empathetic_responses": "array of strings"
},
"mood_detection": {
"mood": "string",
"mood_intensity": "number"
},
"toxicity_detection": {
"toxicity_level": "string (none, low, medium, high)",
"toxicity_flags": "array of strings (specific words or phrases contributing to toxicity)",
"contextual_factors": "array of objects (additional contextual elements influencing toxicity interpretation)"
},
# GDPR-related schemas
"Consent_and_Transparency": {
"consent_obtained": "boolean",
"transparency_level": "string (low, medium, high)",
"missing_information": "array of strings (information not clearly presented or missing)",
"user_understanding": "string (poor, average, good)"
},
"Data_Security": {
"security_status": "string (secure, at risk, breached)",
"vulnerability_points": "array of strings (specific areas of potential vulnerability)",
"data_encryption": "boolean",
"compliance_status": "string (compliant, partially compliant, non-compliant)"
},
"Environmental_Impact": {
"carbon_footprint": "number (metric tons of CO2 equivalent)",
"energy_efficiency": "string (low, moderate, high)",
"sustainable_practices": "boolean",
"environmental_impact_score": "number (0-100)"
},
"Privacy_Assessment": {
"overall_privacy_status": "string (positive, negative)" ,
"privacy_policy_compliance": "string (compliant, partially compliant, non-compliant)",
"data_minimization": "boolean",
"user_data_accessibility": "string (none, limited, full)",
"anonymization": "boolean"
},
# Toxicity-related schemas
"Content_Moderation": {
"moderation_effectiveness": "string (low, medium, high)",
"moderated_content_types": "array of strings (types of content being moderated)",
"automated_moderation": "boolean",
"human_moderation": "boolean"
},
"Reporting_Mechanism": {
"reporting_ease": "string (easy, moderate, difficult)",
"response_time": "string (fast, average, slow)",
"report_feedback": "string (detailed, minimal, none)"
},
"Content_Guidelines": {
"clarity": "string (clear, somewhat clear, unclear)",
"comprehensiveness": "string (comprehensive, partial, lacking)",
"enforcement_consistency": "string (consistent, inconsistent)"
},
"User_Education": {
"educational_resources_available": "boolean",
"resource_accessibility": "string (easy, moderate, difficult)",
"user_comprehension_level": "string (high, medium, low)"
},
# Legal-related schemas
"Privacy_Policy": {
"clarity": "string (clear, somewhat clear, unclear)",
"compliance": "string (compliant, partially compliant, non-compliant)",
"user_rights": "array of strings (specific rights mentioned in policy)"
},
"Consent_Mechanism": {
"mechanism_clarity": "string (clear, somewhat clear, unclear)",
"user_control": "boolean",
"opt_in_out": "string (opt-in, opt-out, not applicable)"
},
"GDPR_Compliance": {
"compliance_level": "string (fully compliant, partially compliant, non-compliant)",
"data_protection_officer": "boolean",
"breach_notification": "boolean"
},
# Context-related schemas
"Ethical_AI": {
"ethical_standards_adherence": "string (high, medium, low)",
"ethical_issues_identified": "array of strings",
"mitigation_measures": "array of strings"
},
"Bias_Mitigation": {
"bias_identified": "boolean",
"bias_types": "array of strings",
"mitigation_strategies": "array of strings"
},
"Fairness_Assessment": {
"fairness_level": "string (high, medium, low)",
"affected_groups": "array of strings",
"improvement_recommendations": "array of strings"
},
"Explainability": {
"model_transparency": "string (transparent, opaque)",
"explanation_comprehensibility": "string (high, medium, low)",
"user_friendly_explanations": "boolean"
},
# Governance-related schemas
"Model_Development": {
"development_process": "string (structured, ad-hoc, undefined)",
"team_composition": "array of strings (roles involved)",
"ethics_considerations": "boolean"
},
"Data_Quality": {
"accuracy_level": "string (high, medium, low)",
"completeness": "string (complete, partial, incomplete)",
"timeliness": "string (up-to-date, outdated)"
},
"User_Input": {
"user_feedback_mechanism": "boolean",
"feedback_responsiveness": "string (responsive, moderately responsive, unresponsive)",
"user_input_impact": "string (high, medium, low)"
},
# Risk Management-related schemas
"Corporate_Ethics": {
"ethics_code": "string (exists, partial, none)",
"employee_training": "boolean",
"ethics_violations": "array of strings"
},
"Board_Management": {
"board_structure": "string (effective, average, ineffective)",
"board_diversity": "boolean",
"board_ethics_compliance": "string (compliant, non-compliant)"
},
"Stakeholder_Engagement": {
"stakeholder_inclusion": "string (inclusive, partially inclusive, exclusive)",
"feedback_mechanism": "boolean",
"stakeholder_satisfaction": "string (high, medium, low)"
},
"Risk_Management": {
"risk_identification": "boolean",
"risk_mitigation_strategies": "array of strings",
"risk_monitoring": "boolean"
},
# Robustness-related schemas
"System_Reliability": {
"uptime_percentage": "number (0-100)",
"system_resilience": "string (high, medium, low)",
"redundancy_measures": "boolean"
},
"Quality_Assurance": {
"quality_standards": "array of strings",
"testing_frequency": "string (frequent, occasional, rare)",
"quality_assurance_compliance": "string (compliant, partially compliant, non-compliant)"
},
"Stress_Testing": {
"stress_test_pass_rate": "number (0-100)",
"identified_weaknesses": "array of strings",
"improvement_actions": "array of strings"
},
"Fail_Safe_Procedures": {
"procedures_defined": "boolean",
"execution_frequency": "string (regular, occasional, never)",
"effectiveness": "string (effective, partially effective, ineffective)"
},
# Sustainability-related schemas
"Renewable_Resources": {
"resource_usage": "string (high, moderate, low)",
"renewable_resource_percentage": "number (0-100)",
"sustainability_goals": "boolean"
},
"Waste_Reduction": {
"waste_management_practices": "string (effective, average, poor)",
"reduction_rate": "number (0-100)",
"recycling_initiatives": "boolean"
},
"Energy_Efficiency": {
"energy_consumption": "string (high, moderate, low)",
"energy_saving_measures": "array of strings",
"energy_audit": "boolean"
},
"Sustainable_Practices": {
"practice_adoption": "string (widespread, partial, none)",
"sustainability_training": "boolean",
"sustainability_impact": "string (high, medium, low)"
}
}