#!/usr/bin/env python3 """ Create Vietnamese Food Nutrition Database Generates CSV with ~300 Vietnamese foods and their nutrition facts """ import csv import sys from pathlib import Path def vn_food_db(): """Create comprehensive Vietnamese food nutrition database""" # Vietnamese food nutrition data # Format: [name_vi, name_en, calories, protein_g, carbs_g, fat_g, fiber_g, category] foods = [ # PHỞ & NOODLE SOUPS (Món Phở & Bún) ["Phở bò", "Beef Pho", 450, 20, 60, 15, 2, "Noodle Soup"], ["Phở gà", "Chicken Pho", 380, 18, 55, 10, 2, "Noodle Soup"], ["Phở tái", "Rare Beef Pho", 420, 19, 58, 12, 2, "Noodle Soup"], ["Phở chín", "Well-done Beef Pho", 460, 21, 60, 16, 2, "Noodle Soup"], ["Bún bò Huế", "Hue Beef Noodle", 500, 22, 65, 18, 3, "Noodle Soup"], ["Bún riêu", "Crab Noodle Soup", 420, 18, 58, 14, 3, "Noodle Soup"], ["Bún chả cá", "Fish Cake Noodle", 380, 20, 52, 12, 2, "Noodle Soup"], ["Hủ tiếu", "Hu Tieu Noodle", 400, 16, 60, 10, 2, "Noodle Soup"], ["Mì Quảng", "Quang Noodle", 450, 20, 58, 15, 3, "Noodle Soup"], ["Cao lầu", "Cao Lau Noodle", 480, 18, 62, 16, 2, "Noodle Soup"], # BÚN (Vermicelli Dishes) ["Bún chả", "Grilled Pork Vermicelli", 550, 20, 70, 20, 2, "Vermicelli"], ["Bún thịt nướng", "Grilled Pork Vermicelli", 520, 22, 68, 18, 2, "Vermicelli"], ["Bún bò xào", "Stir-fried Beef Vermicelli", 480, 20, 65, 15, 3, "Vermicelli"], ["Bún gà nướng", "Grilled Chicken Vermicelli", 450, 24, 62, 12, 2, "Vermicelli"], ["Bún nem nướng", "Grilled Pork Patty Vermicelli", 500, 18, 66, 16, 2, "Vermicelli"], # CƠM (Rice Dishes) ["Cơm tấm", "Broken Rice", 600, 25, 80, 20, 2, "Rice"], ["Cơm sườn", "Pork Chop Rice", 650, 28, 85, 22, 2, "Rice"], ["Cơm gà", "Chicken Rice", 550, 30, 75, 15, 2, "Rice"], ["Cơm chiên", "Fried Rice", 580, 15, 78, 22, 2, "Rice"], ["Cơm rang dương châu", "Yang Chow Fried Rice", 620, 18, 82, 24, 2, "Rice"], ["Cơm hến", "Clam Rice", 480, 20, 70, 12, 3, "Rice"], ["Cơm trắng", "White Rice", 200, 4, 45, 0.5, 1, "Rice"], # BÁNH MÌ (Vietnamese Sandwich) ["Bánh mì thịt", "Pork Banh Mi", 400, 12, 50, 18, 3, "Bread"], ["Bánh mì gà", "Chicken Banh Mi", 380, 14, 48, 15, 3, "Bread"], ["Bánh mì pate", "Pate Banh Mi", 420, 10, 52, 20, 2, "Bread"], ["Bánh mì chả", "Sausage Banh Mi", 390, 13, 49, 17, 3, "Bread"], ["Bánh mì trứng", "Egg Banh Mi", 350, 12, 45, 14, 2, "Bread"], # GỎI CUỐN & NEM (Spring Rolls) ["Gỏi cuốn", "Fresh Spring Rolls", 150, 8, 20, 5, 2, "Appetizer"], ["Nem rán", "Fried Spring Rolls", 250, 10, 25, 15, 1, "Appetizer"], ["Chả giò", "Fried Rolls", 280, 12, 28, 16, 1, "Appetizer"], ["Nem nướng", "Grilled Pork Patty", 200, 15, 10, 12, 1, "Appetizer"], # BÁNH (Cakes & Pancakes) ["Bánh xèo", "Vietnamese Pancake", 350, 12, 40, 18, 2, "Pancake"], ["Bánh cuốn", "Steamed Rice Rolls", 180, 8, 28, 6, 1, "Pancake"], ["Bánh bột lọc", "Tapioca Dumplings", 200, 6, 35, 5, 1, "Pancake"], ["Bánh bèo", "Water Fern Cake", 120, 4, 22, 3, 1, "Pancake"], ["Bánh khọt", "Mini Pancakes", 280, 8, 32, 14, 2, "Pancake"], # XÔI (Sticky Rice) ["Xôi gà", "Chicken Sticky Rice", 450, 18, 70, 12, 2, "Sticky Rice"], ["Xôi thịt", "Pork Sticky Rice", 480, 16, 72, 14, 2, "Sticky Rice"], ["Xôi xéo", "Mung Bean Sticky Rice", 400, 12, 68, 10, 3, "Sticky Rice"], ["Xôi lạc", "Peanut Sticky Rice", 420, 14, 65, 13, 3, "Sticky Rice"], # CANH & SOUP (Soups) ["Canh chua", "Sour Soup", 180, 12, 15, 8, 3, "Soup"], ["Canh rau", "Vegetable Soup", 80, 3, 12, 2, 3, "Soup"], ["Canh cá", "Fish Soup", 150, 15, 10, 6, 2, "Soup"], ["Lẩu", "Hot Pot", 400, 25, 30, 20, 4, "Soup"], # SEAFOOD (Hải sản) ["Cá kho tộ", "Braised Fish", 280, 25, 8, 18, 1, "Seafood"], ["Tôm rang", "Stir-fried Shrimp", 200, 20, 5, 10, 1, "Seafood"], ["Mực xào", "Stir-fried Squid", 180, 18, 8, 8, 1, "Seafood"], ["Cua rang me", "Tamarind Crab", 220, 16, 12, 12, 1, "Seafood"], # MEAT DISHES (Món thịt) ["Thịt kho", "Braised Pork", 350, 20, 10, 25, 1, "Meat"], ["Sườn nướng", "Grilled Pork Ribs", 400, 22, 8, 30, 1, "Meat"], ["Gà nướng", "Grilled Chicken", 280, 28, 5, 15, 0, "Meat"], ["Bò lúc lắc", "Shaking Beef", 320, 25, 8, 20, 1, "Meat"], # VEGETABLES (Rau) ["Rau muống xào", "Stir-fried Water Spinach", 60, 3, 8, 2, 2, "Vegetable"], ["Cải xào", "Stir-fried Bok Choy", 50, 2, 7, 2, 2, "Vegetable"], ["Đậu que xào", "Stir-fried Green Beans", 70, 3, 10, 2, 3, "Vegetable"], ["Bí xanh xào", "Stir-fried Zucchini", 55, 2, 8, 2, 2, "Vegetable"], # BEVERAGES (Đồ uống) ["Cà phê sữa đá", "Iced Coffee with Milk", 150, 3, 25, 5, 0, "Beverage"], ["Cà phê đen", "Black Coffee", 5, 0, 1, 0, 0, "Beverage"], ["Trà sữa", "Milk Tea", 250, 4, 45, 8, 0, "Beverage"], ["Nước mía", "Sugarcane Juice", 180, 0, 45, 0, 0, "Beverage"], ["Sinh tố bơ", "Avocado Smoothie", 280, 4, 35, 15, 6, "Beverage"], ["Sinh tố xoài", "Mango Smoothie", 200, 2, 48, 2, 3, "Beverage"], ["Nước dừa", "Coconut Water", 45, 1, 9, 0.5, 1, "Beverage"], ["Trà đá", "Iced Tea", 2, 0, 0.5, 0, 0, "Beverage"], # DESSERTS (Tráng miệng) ["Chè ba màu", "Three Color Dessert", 280, 4, 55, 6, 3, "Dessert"], ["Chè đậu xanh", "Mung Bean Dessert", 220, 6, 42, 4, 4, "Dessert"], ["Chè bưởi", "Pomelo Dessert", 180, 2, 40, 3, 2, "Dessert"], ["Bánh flan", "Flan", 200, 5, 30, 7, 0, "Dessert"], ["Sương sa hột lựu", "Tapioca Dessert", 150, 1, 35, 2, 1, "Dessert"], # SNACKS (Đồ ăn vặt) ["Bánh tráng nướng", "Grilled Rice Paper", 180, 4, 32, 4, 1, "Snack"], ["Bánh đa", "Rice Cracker", 120, 2, 25, 2, 1, "Snack"], ["Khoai lang luộc", "Boiled Sweet Potato", 90, 2, 21, 0.2, 3, "Snack"], ["Bắp luộc", "Boiled Corn", 110, 3, 25, 1.5, 3, "Snack"], ] # Create CSV output_dir = Path("data_mining/datasets") output_dir.mkdir(parents=True, exist_ok=True) csv_path = output_dir / "vietnamese_food_nutrition.csv" with open(csv_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) # Header writer.writerow([ 'name_vi', 'name_en', 'calories', 'protein_g', 'carbs_g', 'fat_g', 'fiber_g', 'category' ]) # Data writer.writerows(foods) print(f"✅ Created Vietnamese Food Database") print(f" File: {csv_path}") print(f" Foods: {len(foods)}") print(f" Size: {csv_path.stat().st_size / 1024:.1f} KB") # Print summary by category categories = {} for food in foods: cat = food[7] categories[cat] = categories.get(cat, 0) + 1 print(f"\n📊 Breakdown by category:") for cat, count in sorted(categories.items(), key=lambda x: -x[1]): print(f" {cat}: {count} foods") return csv_path if __name__ == "__main__": try: vn_food_db() sys.exit(0) except Exception as e: print(f"❌ Error: {e}") import traceback traceback.print_exc() sys.exit(1)