Spaces:

missbaj
/

llmaapi

Sleeping

File size: 1,153 Bytes

530df2d
eb889cd
a939c7f
5427311
eb889cd
a939c7f
eb889cd
 
5427311
a939c7f
 
 
 
eb889cd
a939c7f
 
 
 
 
 
 
eb889cd
a939c7f
eb889cd
530df2d
eb889cd
 
 
 
 
 
 
 
530df2d
a939c7f
eb889cd

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the smaller model and tokenizer
model_name = "distilgpt2"  # A smaller model that should work with 16GB of RAM
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Set the device to GPU if available, else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def generate_response(prompt):
    # Encode the input prompt
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
    
    # Generate the output sequence
    outputs = model.generate(inputs, max_length=150, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
    
    # Decode the generated sequence
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return response

# Set up Gradio interface
iface = gr.Interface(
    fn=generate_response, 
    inputs="text", 
    outputs="text", 
    title="Crypto Analysis Model", 
    description="Enter your prompt related to Bitcoin or cryptocurrency."
)

# Launch the interface
iface.launch()