File size: 4,176 Bytes
1f1394a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import json
import logging
import typing as t

import gradio as gr
from huggingface_hub import InferenceClient

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)


HF_API_TOKEN = os.getenv("HF_API_TOKEN")
MODEL_REPO_ID= "google/gemma-2-9b-it"

if HF_API_TOKEN is None:
    logger.error("HF_API_TOKEN environment variable is not set.")
    raise ValueError("Error: HF_API_TOKEN environment variable is not set.")


def build_messages(input_json: str) -> t.Sequence[t.Mapping[str, str]]:
    data_structure = (
        "You are given a restaurant menu in Spanish. You MUST return a single valid JSON in this format:\n"
        "Menu Format:\n"
        "<menu> ::= '{' \"restaurant_name\": <string>, \"categories\": [ <category>* ] '}'\n"
        "<category> ::= '{' \"category\": <string>, \"items\": [ <item>* ] '}'\n"
        "<item> ::= '{' \"name\": <string>, \"price\": <number>, \"description\": <string> }'\n"
    )

    instructions = (
        "Requirements:\n"
        "1. **Translate ALL Spanish text into English**, including:\n"
        "   - restaurant_name (only if it's in Spanish).\n"
        "   - All category names.\n"
        "   - All item names.\n"
        "   - Any Spanish words in the final descriptions.\n\n"
        "2. If an item name is a distinct dish with no direct English equivalent, "
        "   **still attempt** an English literal translation, or provide a parenthetical explanation if needed.\n\n"
        "3. For every item, add a new field called description in **concise, appetizing English**.\n\n"
        "4. **Do not** change or remove existing fields. The only added field is description.\n\n"
        "5. **Do not** change meaning or make up information."
        "6. Return **only** the JSON object. **No markdown**, no code fences, no extra text.\n"
        "7. Ensure the output is **valid JSON** with correct brackets, commas, and quotes.\n\n"
    )

    system_message = data_structure + instructions

    user_message = (
        "Process the following menu:\n\n"
        f"{input_json}"
    )

    return [
        {"role": "user", "content": system_message + user_message},
    ]


def process_menu(input_text: str) -> str:
    client = InferenceClient(model=MODEL_REPO_ID, token=HF_API_TOKEN)
    messages = build_messages(input_text)
    
    logger.info("Generating response from the model.")
    response = client.chat_completion(
        messages,
        max_tokens=2048,
        temperature=0.1,
        seed=42,
    )
    if response is not None and response.choices is not None:
        content = response.choices[0].message.content
    
    logger.info(response)
    logger.info(content)
    
    parsed = json.loads(content)
    logger.info("Parsed JSON successfully.")
    return json.dumps(parsed, indent=2, ensure_ascii=False)

        
def process_data(data: t.Any) -> str:
    logger.info("Reading input file: %s", data)
    input_name = os.path.basename(data)
    preprocessed_name = "preprocessed_" + input_name

    with open(data, "r", encoding="utf-8") as raw_data:
        menu = raw_data.read()
    
    logger.info("Processing the menu data through the model.")
    preprocessed_data = process_menu(menu)
    
    logger.info("Writing preprocessed data to file: %s", preprocessed_name)
    with open(preprocessed_name, "w", encoding="utf-8") as temp_data:
        temp_data.write(preprocessed_data)
        
    logger.info("Processing complete. Preprocessed file created: %s", preprocessed_name)
    return preprocessed_name


with gr.Blocks() as demo:
    gr.Markdown("# Restaurant Menu Processor")
    gr.Markdown(
        "Upload a JSON file containing a restaurant menu in Spanish. "
        "This tool will translate the menu into English and add descriptions."
    )

    with gr.Row():
        input_file = gr.File(label="Upload Restaurant Menu JSON (Spanish)")
    output_file = gr.File(label="Download Augmented Menu JSON (English)")
    
    process_button = gr.Button("Process Menu")
    process_button.click(process_data, inputs=input_file, outputs=output_file)

        
demo.launch()