SummaryGenerator / docs /generate_template.py
Adive01's picture
Upload docs/generate_template.py with huggingface_hub
067c154 verified
import sys
import subprocess
# Ensure python-docx is installed
try:
import docx
except ImportError:
print("Installing python-docx...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "python-docx"])
import docx
from docx import Document
from docx.shared import Pt, Inches, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
def main():
doc = Document()
# Apply global styling: Times New Roman, Size 12, 1.5 line spacing
style = doc.styles['Normal']
font = style.font
font.name = 'Times New Roman'
font.size = Pt(12)
style.paragraph_format.line_spacing = 1.5
# Modify Heading 1 style
h1 = doc.styles['Heading 1']
h1.font.name = 'Times New Roman'
h1.font.size = Pt(14)
h1.font.bold = True
h1.font.color.rgb = RGBColor(0, 0, 0)
h1.paragraph_format.line_spacing = 1.5
# Modify Heading 2 style
h2 = doc.styles['Heading 2']
h2.font.name = 'Times New Roman'
h2.font.size = Pt(12)
h2.font.bold = True
h2.font.color.rgb = RGBColor(0, 0, 0)
h2.paragraph_format.line_spacing = 1.5
# 1. Title Page
for _ in range(5):
doc.add_paragraph('')
title = doc.add_paragraph('Project Title')
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
title_run = title.runs[0]
title_run.font.size = Pt(16)
title_run.font.bold = True
doc.add_paragraph('\n\n')
details = [
"Course: CSE274 – Applied Machine Learning",
"Names of Students: [Enter Names]",
"Roll Numbers: [Enter Roll Numbers]",
"Instructor Name: [Enter Instructor Name]",
"Department / University: [Enter Dept/University]",
"Submission Date: [Enter Date]"
]
for d in details:
p = doc.add_paragraph(d)
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.add_page_break()
# Helper for adding bullet points
def add_bullets(items):
for item in items:
doc.add_paragraph(item, style='List Bullet')
# 2. Abstract
doc.add_heading('2. Abstract', level=1)
doc.add_paragraph('Brief overview of the project (150–250 words).')
add_bullets([
'Problem statement',
'Techniques used',
'Key results'
])
# 3. Introduction
doc.add_heading('3. Introduction', level=1)
add_bullets([
'Background of the problem',
'Importance of the study',
'Real-world relevance',
'Objective of the project'
])
# 4. Problem Statement
doc.add_heading('4. Problem Statement', level=1)
doc.add_paragraph('Clearly define:')
add_bullets([
'What problem you are solving',
'Type: Classification / Regression / Clustering'
])
doc.add_paragraph('Example:\n- Disease prediction (Classification)\n- House price prediction (Regression)\n- Customer segmentation (Clustering)')
# 5. Dataset Description
doc.add_heading('5. Dataset Description', level=1)
add_bullets([
'Dataset source (Kaggle / UCI / etc.)',
'Number of records and features',
'Feature description (table format recommended)',
'Target variable (if applicable)'
])
# 6. Data Preprocessing
doc.add_heading('6. Data Preprocessing', level=1)
add_bullets([
'Handling missing values',
'Outlier detection & treatment',
'Encoding categorical variables',
'Feature scaling / normalization',
'Handling class imbalance (if classification)',
'Data leakage prevention'
])
# 7. Feature Engineering & Dimensionality
doc.add_heading('7. Feature Engineering & Dimensionality', level=1)
doc.add_heading('Feature selection techniques:', level=2)
add_bullets([
'Variance Threshold',
'Correlation-based removal',
'Forward / Backward selection'
])
add_bullets([
'Feature extraction',
'PCA / LDA (if used)',
'Explanation of selected features'
])
# 8. Methodology
doc.add_heading('8. Methodology', level=1)
doc.add_paragraph('(Choose based on project type)')
doc.add_heading('A. For Classification', level=2)
add_bullets(['Models used: Logistic Regression, Naïve Bayes, KNN / SVM / Decision Tree'])
doc.add_heading('B. For Regression', level=2)
add_bullets(['Models used: Linear Regression, Multiple Regression, Polynomial / Regularized models'])
doc.add_heading('C. For Clustering', level=2)
add_bullets(['Algorithms used: K-Means, Hierarchical Clustering, DBSCAN'])
doc.add_paragraph('\nInclude:')
add_bullets([
'Reason for choosing each algorithm',
'Workflow diagram (recommended)'
])
# 9. Implementation Details
doc.add_heading('9. Implementation Details', level=1)
add_bullets([
'Tools used: Python, Jupyter Notebook',
'Libraries: NumPy, Pandas, Scikit-learn, Matplotlib, Seaborn',
'Parameter settings: (e.g., K (clusters), Learning rate, Depth of tree)'
])
# 10. Model Evaluation
doc.add_heading('10. Model Evaluation', level=1)
doc.add_heading('For Classification', level=2)
add_bullets(['Confusion Matrix', 'Accuracy, Precision, Recall, F1-score', 'ROC Curve, AUC'])
doc.add_heading('For Regression', level=2)
add_bullets(['MAE, MSE, RMSE', 'R² Score', 'Residual plots'])
doc.add_heading('For Clustering', level=2)
add_bullets(['Silhouette Score', 'WCSS (Elbow Method)', 'Davies-Bouldin Index'])
# 11. Results & Visualization
doc.add_heading('11. Results & Visualization', level=1)
add_bullets([
'Graphs: ROC Curve, Elbow Graph, Cluster plots, Actual vs Predicted plots',
'Tables comparing models'
])
# 12. Hyperparameter Tuning
doc.add_heading('12. Hyperparameter Tuning', level=1)
add_bullets([
'Grid Search / Random Search',
'Cross-validation',
'Best parameters found',
'Performance improvement'
])
# 13. Interpretation & Insights
doc.add_heading('13. Interpretation & Insights', level=1)
add_bullets([
'What did the model learn?',
'Key patterns or trends',
'Business/real-world insights'
])
# 14. Conclusion
doc.add_heading('14. Conclusion', level=1)
add_bullets([
'Summary of findings',
'Best performing model',
'Limitations',
'Future scope'
])
# 15. Appendix
doc.add_heading('15. Appendix', level=1)
add_bullets([
'Code snippets',
'Additional graphs',
'Screenshots'
])
# 16. References
doc.add_heading('16. References', level=1)
add_bullets([
'Dataset source',
'Research papers / websites',
'Books'
])
# Save the document
file_path = 'ML_Project_Report_Template.docx'
doc.save(file_path)
print(f"Success! Saved to {file_path}")
if __name__ == '__main__':
main()