ManishThota commited on
Commit
f4de9a0
1 Parent(s): 8a67c3f

Create text_processor.py

Browse files
Files changed (1) hide show
  1. src/text_processor.py +62 -0
src/text_processor.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ from pydantic import BaseModel
4
+ import spaces
5
+
6
+ torch.random.manual_seed(0)
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ "microsoft/Phi-3-mini-4k-instruct",
9
+ device_map="cuda",
10
+ torch_dtype="auto",
11
+ trust_remote_code=True,
12
+ )
13
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
14
+ pipe = pipeline(
15
+ "text-generation",
16
+ model=model,
17
+ tokenizer=tokenizer,
18
+ )
19
+
20
+ # Pydantic class for output validation
21
+ class VideoAnalysis(BaseModel):
22
+ indoor: int
23
+ hands_free: int
24
+ screen_interaction: int
25
+ standing: int
26
+
27
+
28
+ @spaces.GPU(duration=100)
29
+ def process_description(description):
30
+ # Construct a prompt for your LLM based on the video description
31
+ prompt = f"""
32
+ You are a helpful AI assistant. Analyze the following video description and answer the questions with 0 for True and 1 for False:
33
+
34
+ Video Description: {description}
35
+
36
+ Questions:
37
+ - Is the scene indoors?
38
+ - Are the subject's hands free?
39
+ - Is there screen interaction by the subject?
40
+ - Is the subject standing?
41
+
42
+ Provide your answers in JSON format like this:
43
+ {{"indoor": 0, "hands_free": 1, "screen_interaction": 0, "standing": 1}}
44
+ """
45
+
46
+ generation_args = {
47
+ "max_new_tokens": 100, # Adjust as needed
48
+ "return_full_text": False,
49
+ "temperature": 0.0,
50
+ "do_sample": False,
51
+ }
52
+
53
+ output = pipe(prompt, **generation_args)
54
+ json_text = output[0]['generated_text']
55
+
56
+ try:
57
+ # Attempt to parse and validate the JSON response
58
+ analysis_result = VideoAnalysis.model_validate_json(json_text)
59
+ return analysis_result.model_dump_json() # Return as valid JSON
60
+ except Exception as e:
61
+ print(f"Error processing LLM output: {e}")
62
+ return {"error": "Could not process the video description."}