MSheng-Lee's picture
Upload folder using huggingface_hub
f20b100 verified
from openai import AzureOpenAI
import json
import time
import pandas as pd
client = AzureOpenAI(
# https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
api_version="2025-01-01-preview",
api_key="M4hT7ULYSumBpJ3rREIyf0Xxd286HwKG",
# https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
azure_endpoint="http://api.gameai-llm.woa.com/llm-service/azure/public",
)
local_assets = pd.read_excel("../assets/copy.xlsx", skiprows=2)
captions = local_assets["caption_clip"].tolist()
# Function to generate a room description
def generate_room_descriptions(n=100, batch_size=50):
descriptions = []
prompt_template = """Generate {count} unique, simple and clear room descriptions.
Each user_input should include both a description of the room and a brief scene setup.
Provide 0 to 5 simple and common objects in the user_input. The objects can only be chosen from {captions}.
Moreover, the length and width of the room are 5.0 meters, and the height is 3.0 meters.
The description should not include spatial relationships (front, back, left, right, above, under, etc.) and should not mention doors or windows.
The description should not include words like 'high-poly', 'material', 'high quality' or similar terms.
The output format should be a JSON list where each item is formatted as:
{{
"user_input": "<room description>",
}}
Examples:
[
{{
"user_input": "A cozy living room in a warm style with a brown fabric sofa, a brand new large screen TV with thin bezel and stand.",
}},
{{
"user_input": "This is a kid bedroom. There is a single bed, a modern style minimalist dressing table with drawers, and a folding wooden dining chair.",
}},
{{
"user_input": "Design me a room with a rectangular billiard table in entertainment area. The room should have a modern style.",
}}
...
]
Now generate {count} descriptions following this format.
"""
for i in range(0, n, batch_size):
count = min(batch_size, n - i)
prompt = prompt_template.format(count=count, captions=captions)
try:
response = client.chat.completions.create(
model="gpt-4-1106-Preview",
messages=[{"role": "system", "content": "You are a creative assistant generating structured room descriptions, with a focus on meeting human practical needs."},
{"role": "user", "content": prompt}],
temperature=0.7,
seed=30
)
result = json.loads(response.choices[0].message.content.strip("```json\n").strip("```"))
descriptions.extend(result)
except Exception as e:
print(f"Error at batch {i}: {e}")
time.sleep(0.2) # Wait before retrying
# Avoid rate limits
time.sleep(0.1)
print('--------------------------------')
print(f"Generated {i+count} descriptions")
print('--------------------------------')
return descriptions
import time
start_time = time.time()
# Generate 10,000 room descriptions
room_data = generate_room_descriptions(n=400, batch_size=50)
end_time = time.time()
print(f"Time taken: {(end_time - start_time) / 60} minutes")
# Save to file
file_path = "7.json"
with open(file_path, "w") as f:
json.dump(room_data, f)
print(f"Room descriptions saved to: {file_path}")