GSoC-Super-Rapid-Annotator / src /text_processor.py
ManishThota's picture
Update src/text_processor.py
4cfac13 verified
raw
history blame
1.71 kB
# --- src/text_processor.py ---
from huggingface_hub import InferenceClient
from pydantic import BaseModel
import os
# Hugging Face Hub client setup
client = InferenceClient(
"microsoft/Phi-3-mini-4k-instruct",
token = os.environ['HUGGINGFACE_API_KEY']
)
# Pydantic class for output validation
class VideoAnalysis(BaseModel):
indoor: int
hands_free: int
screen_interaction: int
standing: int
def process_description(description):
# Construct a prompt for your LLM based on the video description
prompt = f"""
You are a helpful AI assistant. Analyze the following video description and answer the questions with 0 for True and 1 for False:
Video Description: {description}
Questions:
- Is the scene indoors?
- Are the subject's hands free?
- Is there screen interaction by the subject?
- Is the subject standing?
Provide your answers in JSON format like this:
{{"indoor": 0, "hands_free": 1, "screen_interaction": 0, "standing": 1}}
"""
# Using the Hugging Face Hub InferenceClient for text generation
response = client.chat_completion(
messages=[{"role": "user", "content": prompt}],
max_tokens=100, # Adjust as needed
)
# Extract the generated JSON text from the response
json_text = response.choices[0].message.content
try:
# Attempt to parse and validate the JSON response
analysis_result = VideoAnalysis.model_validate_json(json_text)
return analysis_result.model_dump_json() # Return as valid JSON
except Exception as e:
print(f"Error processing LLM output: {e}")
return {"error": "Could not process the video description."}