Spaces:
Runtime error
Runtime error
File size: 1,836 Bytes
5cbf359 962cccf 6ee5519 9fd7f68 a5beb04 eec5a54 f4de9a0 eec5a54 9fd7f68 6ee5519 9fd7f68 6ee5519 9fd7f68 eec5a54 9fd7f68 5cbf359 eec5a54 5cbf359 eec5a54 5cbf359 eec5a54 57cdbdf 7f587f0 5cbf359 eec5a54 57cdbdf eec5a54 9fd7f68 eec5a54 57cdbdf eec5a54 9fd7f68 5cbf359 eec5a54 9fd7f68 57cdbdf 032d6c3 57cdbdf f4de9a0 57cdbdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import warnings
from typing import Dict
import spaces
device = "cuda"
# Ignore warnings
warnings.filterwarnings(action='ignore')
# Set random seed
torch.random.manual_seed(0)
# Define model path and generation arguments
model_path = "microsoft/Phi-3-mini-4k-instruct"
generation_args = {
"max_new_tokens": 50,
"return_full_text": False,
"temperature": 0.1,
"do_sample": True
}
# Load the model and pipeline once and keep it in memory
def load_model_pipeline(model_path: str):
if not hasattr(load_model_pipeline, "pipe"):
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map=device,
torch_dtype="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
load_model_pipeline.pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
return load_model_pipeline.pipe
# Initialize the pipeline and keep it in memory
pipe = load_model_pipeline(model_path)
# Generate output from LLM
@spaces.GPU(duration=40)
def generate_logic(llm_output: str) -> str:
prompt = f"""
Provide a detailed response based on the description: '{llm_output}'.
"""
messages = [
{"role": "system", "content": "Please provide a detailed response."},
{"role": "user", "content": prompt},
]
response = pipe(messages, **generation_args)
generated_text = response[0]['generated_text']
# Log the generated text
print(f"Generated Text: {generated_text}")
return generated_text
# Main function to process LLM output and return raw text
def process_description(description: str) -> str:
generated_output = generate_logic(description)
return generated_output |