Spaces:
Paused
Paused
from ctransformers import AutoModelForCausalLM, AutoTokenizer | |
from transformers import pipeline | |
import streamlit as st | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
# Simple inference example | |
# output = llm( | |
# "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", # Prompt | |
# max_tokens=512, # Generate up to 512 tokens | |
# stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using. | |
# echo=True # Whether to echo the prompt | |
#) | |
prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" | |
template = """Question: {question} | |
Answer:""" | |
def get_llm_response(repo, filename, model_type, gpu_layers, prompt): | |
print("Loading model") | |
llm = AutoModelForCausalLM.from_pretrained(repo, model_file=filename, model_type=model_type, gpu_layers=gpu_layers) | |
print("Model loaded") | |
#llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt) | |
print(f"LLM prompt: {prompt}") | |
prompt = PromptTemplate(template=template, input_variables=["question"]) | |
llm_chain = LLMChain(prompt=prompt, llm=llm) | |
response = llm_chain.run(prompt) | |
return response | |