# Load model directly import streamlit as st from unsloth import FastLanguageModel import torch model, tokenizer = FastLanguageModel.from_pretrained( model_name = "shivam9980/mistral-7b-news-cnn-merged", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B max_seq_length = 2048, dtype = None, load_in_4bit = True, token = hf_token, # use one if using gated models like meta-llama/Llama-2-7b-hf ) # alpaca_prompt = You MUST copy from above! alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}""" # alpaca_prompt = Copied from above c = st.text_input('Enter the contents ') FastLanguageModel.for_inference(model) # Enable native 2x faster inference inputs = tokenizer( [ alpaca_prompt.format( "The following passage is content from a news report. Please summarize this passage in one sentence or less.", # instruction c, "", # output - leave this blank for generation! ) ], return_tensors = "pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True) results = tokenizer.batch_decode(outputs) out = results[0].split('\n')[-1] st.text_area(label='Headline',value=out[:])