local-llm-2 / utils /epfl_meditron_utils.py
Robin Genolet
feat: use langchain
90d439d
raw
history blame
1.33 kB
from ctransformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline
import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
# Simple inference example
# output = llm(
# "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", # Prompt
# max_tokens=512, # Generate up to 512 tokens
# stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using.
# echo=True # Whether to echo the prompt
#)
prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
template = """Question: {question}
Answer:"""
def get_llm_response(repo, filename, model_type, gpu_layers, prompt):
print("Loading model")
llm = AutoModelForCausalLM.from_pretrained(repo, model_file=filename, model_type=model_type, gpu_layers=gpu_layers)
print("Model loaded")
#llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt)
print(f"LLM prompt: {prompt}")
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)
response = llm_chain.run(prompt)
return response