Spaces:

polpoDevs
/

LlamaDutchDemo

Sleeping

App Files Files Community

LlamaDutchDemo / app.py

polpoDevs

Rename DemoApp.py to app.py

0c87aa4 verified 3 months ago

raw history blame contribute delete

No virus

3.04 kB

	import re
	import time
	import streamlit as st
	from transformers import pipeline, Conversation, AutoTokenizer
	from langdetect import detect
	import torch

	print(f"Is CUDA available: {torch.cuda.is_available()}")
	# True
	print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
	# Tesla T4


	# choose your model here by setting model_chosen_id equal to 1 or 2
	model_chosen_id = 2
	model_name_options = {
	1: "meta-llama/Llama-2-13b-chat-hf",
	2: "BramVanroy/Llama-2-13b-chat-dutch"
	}
	model_chosen = model_name_options[model_chosen_id]

	my_config = {'model_name': model_chosen, 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500, }
	print(f"Selected model: {my_config['model_name']}")
	print(f"Parameters are: {my_config}")

	def count_words(text):
	# Use a simple regular expression to count words
	words = re.findall(r'\b\w+\b', text)
	return len(words)

	def generate_with_llama_chat(my_config):
	# get the parameters from the config dict
	do_sample = my_config.get('do_sample', True)
	temperature = my_config.get('temperature', 0.1)
	repetition_penalty = my_config.get('repetition_penalty', 1.1)
	max_new_tokens = my_config.get('max_new_tokens', 500)

	start_time = time.time()
	model = my_config['model_name']
	tokenizer = AutoTokenizer.from_pretrained(model)

	chatbot = pipeline("conversational",model=model,
	tokenizer=tokenizer,
	do_sample=do_sample,
	temperature=temperature,
	repetition_penalty=repetition_penalty,
	#max_length=2000,
	max_new_tokens=max_new_tokens,
	#model_kwargs={"device_map": "auto","load_in_8bit": True}
	) #, "src_lang": "en", "tgt_lang": "nl"}) does not work!
	end_time = time.time()
	elapsed_time = end_time - start_time
	print(f"Loading the model: {elapsed_time} seconds")
	return chatbot

	def get_answer(chatbot, input_text):
	start_time = time.time()
	print(f"Processing the input\n {input_text}\n")
	print('Processing the answer....')
	conversation = Conversation(input_text)
	print(f"Conversation(input_text): {conversation}")
	output = (chatbot(conversation))[1]['content']
	elapsed_time = time.time() - start_time
	#Add the last print statement to the output variable
	output += f"\nAnswered in {elapsed_time:.1f} seconds, Nr generated words: {count_words(output)}"

	return output

	if "model_name" not in st.session_state.keys():
	# Initialize the model with the default option
	st.session_state["model_name"] = "BramVanroy/Llama-2-13b-chat-dutch"
	my_config.update({'model_name': st.session_state["model_name"]})
	llm_chatbot = generate_with_llama_chat(my_config)
	st.session_state["model"] = llm_chatbot

	text = st.text_area("Enter text to summarize here.")

	if text:
	out = get_answer(st.session_state["model"], text)
	st.write(out)