Spaces:
Runtime error
Runtime error
File size: 3,519 Bytes
fce98ea ea4e7e1 fce98ea ea4e7e1 fce98ea ea4e7e1 fce98ea ea4e7e1 fce98ea ea4e7e1 fce98ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
from huggingface_hub import list_models
import streamlit as st
from model import ReplicateModel
import os
import pandas as pd
DATASETS_PATH = 'datasets'
models = {
'mistral_instruct': ReplicateModel('mistralai/mistral-7b-instruct-v0.1:83b6a56e7c828e667f21fd596c338fd4f0039b46bcfa18d973e8e70e455fda70'),
}
prompts = {
'simple_prompt':
'''
I have topic that is described by the following keywords: [KEYWORDS]
Based on the information above, extract a short topic label in the following format:
topic: <topic label>
''',
'few_shot_examples':
'''
I have a topic that is described by the following keywords: [KEYWORDS]
Example 1:
Keywords: apple,fruit,healthy,snack,red,orchard
Topic label: Healthy Fruit Snacks
Example 2:
Keywords: computer,technology,silicon,programming,internet,hardware
Topic label: Computer Technology
Example 3:
Keywords: democracy,government,elections,vote,political,representation
Topic label: Democratic Governance
Based on the information above, extract a short topic label in the following format:
topic: <topic label>
'''
# 'custom_prompt': ''
}
topicsets = {
'example_topics': os.path.join(DATASETS_PATH, 'topics.csv'),
}
@st.cache_data(show_spinner=False)
def get_available_models():
# return [model.modelId for model in list_models(author='textminr')]
return models.keys()
@st.cache_resource(show_spinner='Loading model...')
def load_model(model_name: str):
# model = AutoGPTQForCausalLM.from_quantized(model_name, device_map='auto')
# return pipeline('text-generation', model=model, tokenizer=model_name)
return models[model_name].load()
st.set_page_config(page_title='TL playground', page_icon='π', layout='wide')
st.title('π Topic Labelling playground')
percentage_width_main = 70
st.markdown(
f'''<style>
@media only screen and (min-width: 1500px) {{
.appview-container .main .block-container{{
max-width: {percentage_width_main}%;
}}
}}
</style>
''',
unsafe_allow_html=True,
)
col1, col2 = st.columns(2, gap='medium')
sel_model_name = col1.selectbox('Select a model', models, index=None, placeholder='Select a model')
if sel_model_name:
model = load_model(sel_model_name)
sel_dataset_name = col1.selectbox('Select a dataset', topicsets.keys(), index=None)
if sel_dataset_name:
sel_dataset = pd.read_csv(topicsets[sel_dataset_name])
sel_dataset.drop(columns=['topic_id', 'domain'], inplace=True)
col1.dataframe(sel_dataset)
sel_row_index = col1.selectbox('Select a topic', sel_dataset.index)
sel_prompt = col2.selectbox('Select a prompt', prompts.keys())
if sel_prompt != 'custom_prompt':
col2.code(prompts[sel_prompt], language='text')
sel_prompt_text = prompts[sel_prompt]
else:
sel_prompt_text = st.text_area('Custom prompt', height=200)
col2.caption('Make sure to use "[KEYWORDS]" to indicate where the keywords should be inserted.')
btn_generate = col2.button('Generate', disabled=(sel_model_name is None or sel_dataset_name is None))
if btn_generate:
keywords = ','.join(sel_dataset.iloc[sel_row_index].tolist()[1:])
placeholder = col2.empty()
with placeholder, st.spinner('Generating...'):
prompt = sel_prompt_text.replace('[KEYWORDS]', keywords)
# result = model(prompt, max_new_tokens=100, return_full_text=False)[0]['generated_text']
result = model.generate(prompt)
message = col2.chat_message("ai")
message.write(result)
message.caption('Keywords: ' + keywords)
|