ManishThota commited on
Commit
eec5a54
1 Parent(s): 5443ab4

Update src/text_processor.py

Browse files
Files changed (1) hide show
  1. src/text_processor.py +46 -56
src/text_processor.py CHANGED
@@ -1,33 +1,20 @@
1
- import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
- from pydantic import BaseModel
4
  import json
5
  import warnings
6
- import spaces
7
  from typing import Dict
 
8
 
 
9
  # Ignore warnings
10
  warnings.filterwarnings(action='ignore')
11
 
12
  # Set random seed
13
  torch.random.manual_seed(0)
14
 
15
- # Define the model path
16
  model_path = "microsoft/Phi-3-mini-4k-instruct"
17
- device= "cuda"
18
-
19
- # Load the model and pipeline outside the function
20
- pipe = pipeline(
21
- "text-generation",
22
- model=AutoModelForCausalLM.from_pretrained(
23
- model_path,
24
- device_map=device,
25
- torch_dtype="auto",
26
- trust_remote_code=True,
27
- ),
28
- tokenizer=AutoTokenizer.from_pretrained(model_path),
29
- )
30
-
31
  generation_args = {
32
  "max_new_tokens": 50,
33
  "return_full_text": False,
@@ -35,34 +22,45 @@ generation_args = {
35
  "do_sample": True
36
  }
37
 
38
- @spaces.GPU(duration=75)
39
- class LLMHelper:
40
- def __init__(self, pipeline):
41
- self.chatbot = pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- def generate_logic(self, llm_output: str):
44
- prompt = f"""
45
- Provide the response in json string for the below keys and context based on the description: '{llm_output}'.
46
-
47
- Screen.interaction_yes: This field indicates whether there was an interaction of the person with a screen during the activity. A value of 1 means there was screen interaction (Yes), and a value of 0 means there was no screen interaction (No).
48
- Hands.free: This field indicates whether the person's hands were free during the activity. A value of 1 means the person was not holding anything (Yes), indicating free hands. A value of 0 means the person was holding something (No), indicating the hands were not free.
49
- Indoors: This field indicates whether the activity took place indoors. A value of 1 means the activity occurred inside a building or enclosed space (Yes), and a value of 0 means the activity took place outside (No).
50
- Standing: This field indicates whether the person was standing during the activity. A value of 1 means the person was standing (Yes), and a value of 0 means the person was not standing (No).
51
- """
52
 
53
- messages = [
54
- {"role": "system", "content": "Please answer questions just based on this information: " + llm_output},
55
- {"role": "user", "content": prompt},
56
- ]
57
 
58
- response = self.chatbot(messages, **generation_args)
59
- generated_text = response[0]['generated_text']
60
- # Extract JSON from the generated text
61
- start_index = generated_text.find('{')
62
- end_index = generated_text.rfind('}') + 1
63
- json_str = generated_text[start_index:end_index]
64
- return json_str
65
 
 
66
  class VideoAnalysis(BaseModel):
67
  screen_interaction_yes: int
68
  hands_free: int
@@ -70,7 +68,7 @@ class VideoAnalysis(BaseModel):
70
  standing: int
71
 
72
  @classmethod
73
- def from_llm_output(cls, llm_output: str, generated_logic: str) -> 'VideoAnalysis':
74
  logic_dict = json.loads(generated_logic)
75
  return cls(
76
  screen_interaction_yes=logic_dict.get("Screen.interaction_yes", 0),
@@ -79,16 +77,8 @@ class VideoAnalysis(BaseModel):
79
  standing=logic_dict.get("Standing", 0)
80
  )
81
 
82
- # Create an instance of LLMHelper (using the already loaded pipeline)
83
- llm_helper = LLMHelper(pipe)
84
-
85
- def process_llm_output(description) -> Dict:
86
- # Generate the logic from the LLM output
87
- generated_logic = llm_helper.generate_logic(description)
88
-
89
- # Create the structured output
90
- structured_output = VideoAnalysis.from_llm_output(input.llm_output, generated_logic)
91
-
92
- # Return the structured output as a dictionary
93
- return structured_output.dict()
94
-
 
1
+ import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
3
  import json
4
  import warnings
5
+ from pydantic import BaseModel
6
  from typing import Dict
7
+ import spaces
8
 
9
+ device = "cuda"
10
  # Ignore warnings
11
  warnings.filterwarnings(action='ignore')
12
 
13
  # Set random seed
14
  torch.random.manual_seed(0)
15
 
16
+ # Define model path and generation arguments
17
  model_path = "microsoft/Phi-3-mini-4k-instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  generation_args = {
19
  "max_new_tokens": 50,
20
  "return_full_text": False,
 
22
  "do_sample": True
23
  }
24
 
25
+ # Load the model and pipeline once
26
+ def load_model_pipeline(model_path: str):
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ model_path,
29
+ device_map=device,
30
+ torch_dtype="auto",
31
+ trust_remote_code=True,
32
+ )
33
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
34
+ return pipeline("text-generation", model=model, tokenizer=tokenizer)
35
+
36
+ pipe = load_model_pipeline(model_path)
37
+
38
+ # Generate logic from LLM output
39
+ @spaces.GPU(duration=50)
40
+ def generate_logic(llm_output: str, pipeline) -> str:
41
+ prompt = f"""
42
+ Provide the response in json string for the below keys and context based on the description: '{llm_output}'.
43
+
44
+ Screen.interaction_yes: This field indicates whether there was an interaction of the person with a screen during the activity. A value of 1 means there was screen interaction (Yes), and a value of 0 means there was no screen interaction (No).
45
+ Hands.free: This field indicates whether the person's hands were free during the activity. A value of 1 means the person was not holding anything (Yes), indicating free hands. A value of 0 means the person was holding something (No), indicating the hands were not free.
46
+ Indoors: This field indicates whether the activity took place indoors. A value of 1 means the activity occurred inside a building or enclosed space (Yes), and a value of 0 means the activity took place outside (No).
47
+ Standing: This field indicates whether the person was standing during the activity. A value of 1 means the person was standing (Yes), and a value of 0 means the person was not standing (No).
48
+ """
49
 
50
+ messages = [
51
+ {"role": "system", "content": "Please answer questions just based on this information: " + llm_output},
52
+ {"role": "user", "content": prompt},
53
+ ]
 
 
 
 
 
54
 
55
+ response = pipeline(messages, **generation_args)
56
+ generated_text = response[0]['generated_text']
 
 
57
 
58
+ # Extract JSON from the generated text
59
+ start_index = generated_text.find('{')
60
+ end_index = generated_text.rfind('}') + 1
61
+ return generated_text[start_index:end_index]
 
 
 
62
 
63
+ # Pydantic model for structured output
64
  class VideoAnalysis(BaseModel):
65
  screen_interaction_yes: int
66
  hands_free: int
 
68
  standing: int
69
 
70
  @classmethod
71
+ def from_llm_output(cls, generated_logic: str) -> 'VideoAnalysis':
72
  logic_dict = json.loads(generated_logic)
73
  return cls(
74
  screen_interaction_yes=logic_dict.get("Screen.interaction_yes", 0),
 
77
  standing=logic_dict.get("Standing", 0)
78
  )
79
 
80
+ # Main function to process LLM output
81
+ def process_llm_output(description: str) -> Dict:
82
+ generated_logic = generate_logic(description, pipe)
83
+ structured_output = VideoAnalysis.from_llm_output(generated_logic)
84
+ return structured_output.dict()