Spaces:

ManishThota
/

GSoC-Super-Rapid-Annotator

Runtime error

App Files Files Community

ManishThota commited on Aug 21

Commit

eec5a54

•

1 Parent(s): 5443ab4

Update src/text_processor.py

Browse files

Files changed (1) hide show

src/text_processor.py +46 -56

src/text_processor.py CHANGED Viewed

@@ -1,33 +1,20 @@
-import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-from pydantic import BaseModel
 import json
 import warnings
-import spaces
 from typing import Dict
 # Ignore warnings
 warnings.filterwarnings(action='ignore')
 # Set random seed
 torch.random.manual_seed(0)
-# Define the model path
 model_path = "microsoft/Phi-3-mini-4k-instruct"
-device= "cuda"
-# Load the model and pipeline outside the function
-pipe = pipeline(
-    "text-generation",
-    model=AutoModelForCausalLM.from_pretrained(
-        model_path,
-        device_map=device,
-        torch_dtype="auto",
-        trust_remote_code=True,
-    ),
-    tokenizer=AutoTokenizer.from_pretrained(model_path),
-)
 generation_args = {
     "max_new_tokens": 50,
     "return_full_text": False,
@@ -35,34 +22,45 @@ generation_args = {
     "do_sample": True
 }
-@spaces.GPU(duration=75)
-class LLMHelper:
-    def __init__(self, pipeline):
-        self.chatbot = pipeline
-    def generate_logic(self, llm_output: str):
-        prompt = f"""
-        Provide the response in json string for the below keys and context based on the description: '{llm_output}'.
-        Screen.interaction_yes: This field indicates whether there was an interaction of the person with a screen during the activity. A value of 1 means there was screen interaction (Yes), and a value of 0 means there was no screen interaction (No).
-        Hands.free: This field indicates whether the person's hands were free during the activity. A value of 1 means the person was not holding anything (Yes), indicating free hands. A value of 0 means the person was holding something (No), indicating the hands were not free.
-        Indoors: This field indicates whether the activity took place indoors. A value of 1 means the activity occurred inside a building or enclosed space (Yes), and a value of 0 means the activity took place outside (No).
-        Standing: This field indicates whether the person was standing during the activity. A value of 1 means the person was standing (Yes), and a value of 0 means the person was not standing (No).
-        """
-        messages = [
-            {"role": "system", "content": "Please answer questions just based on this information: " + llm_output},
-            {"role": "user", "content": prompt},
-        ]
-        response = self.chatbot(messages, **generation_args)
-        generated_text = response[0]['generated_text']
-        # Extract JSON from the generated text
-        start_index = generated_text.find('{')
-        end_index = generated_text.rfind('}') + 1
-        json_str = generated_text[start_index:end_index]
-        return json_str
 class VideoAnalysis(BaseModel):
     screen_interaction_yes: int
     hands_free: int
@@ -70,7 +68,7 @@ class VideoAnalysis(BaseModel):
     standing: int
     @classmethod
-    def from_llm_output(cls, llm_output: str, generated_logic: str) -> 'VideoAnalysis':
         logic_dict = json.loads(generated_logic)
         return cls(
             screen_interaction_yes=logic_dict.get("Screen.interaction_yes", 0),
@@ -79,16 +77,8 @@ class VideoAnalysis(BaseModel):
             standing=logic_dict.get("Standing", 0)
         )
-# Create an instance of LLMHelper (using the already loaded pipeline)
-llm_helper = LLMHelper(pipe)
-def process_llm_output(description) -> Dict:
-    # Generate the logic from the LLM output
-    generated_logic = llm_helper.generate_logic(description)
-    # Create the structured output
-    structured_output = VideoAnalysis.from_llm_output(input.llm_output, generated_logic)
-    # Return the structured output as a dictionary
-    return structured_output.dict()

+ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import json
 import warnings
+from pydantic import BaseModel
 from typing import Dict
+import spaces
+device = "cuda"
 # Ignore warnings
 warnings.filterwarnings(action='ignore')
 # Set random seed
 torch.random.manual_seed(0)
+# Define model path and generation arguments
 model_path = "microsoft/Phi-3-mini-4k-instruct"
 generation_args = {
     "max_new_tokens": 50,
     "return_full_text": False,
     "do_sample": True
 }
+# Load the model and pipeline once
+def load_model_pipeline(model_path: str):
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        device_map=device,
+        torch_dtype="auto",
+        trust_remote_code=True,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    return pipeline("text-generation", model=model, tokenizer=tokenizer)
+pipe = load_model_pipeline(model_path)
+# Generate logic from LLM output
+@spaces.GPU(duration=50)
+def generate_logic(llm_output: str, pipeline) -> str:
+    prompt = f"""
+    Provide the response in json string for the below keys and context based on the description: '{llm_output}'.
+    Screen.interaction_yes: This field indicates whether there was an interaction of the person with a screen during the activity. A value of 1 means there was screen interaction (Yes), and a value of 0 means there was no screen interaction (No).
+    Hands.free: This field indicates whether the person's hands were free during the activity. A value of 1 means the person was not holding anything (Yes), indicating free hands. A value of 0 means the person was holding something (No), indicating the hands were not free.
+    Indoors: This field indicates whether the activity took place indoors. A value of 1 means the activity occurred inside a building or enclosed space (Yes), and a value of 0 means the activity took place outside (No).
+    Standing: This field indicates whether the person was standing during the activity. A value of 1 means the person was standing (Yes), and a value of 0 means the person was not standing (No).
+    """
+    messages = [
+        {"role": "system", "content": "Please answer questions just based on this information: " + llm_output},
+        {"role": "user", "content": prompt},
+    ]
+    response = pipeline(messages, **generation_args)
+    generated_text = response[0]['generated_text']
+    # Extract JSON from the generated text
+    start_index = generated_text.find('{')
+    end_index = generated_text.rfind('}') + 1
+    return generated_text[start_index:end_index]
+# Pydantic model for structured output
 class VideoAnalysis(BaseModel):
     screen_interaction_yes: int
     hands_free: int
     standing: int
     @classmethod
+    def from_llm_output(cls, generated_logic: str) -> 'VideoAnalysis':
         logic_dict = json.loads(generated_logic)
         return cls(
             screen_interaction_yes=logic_dict.get("Screen.interaction_yes", 0),
             standing=logic_dict.get("Standing", 0)
         )
+# Main function to process LLM output
+def process_llm_output(description: str) -> Dict:
+    generated_logic = generate_logic(description, pipe)
+    structured_output = VideoAnalysis.from_llm_output(generated_logic)
+    return structured_output.dict()