Spaces:
Runtime error
Runtime error
Andreas Sünder
commited on
Commit
·
fce98ea
0
Parent(s):
Add files from previous repo
Browse files- .gitattributes +35 -0
- .gitignore +1 -0
- README.md +14 -0
- app.py +91 -0
- datasets/lda_poe_topics.csv +40 -0
- model.py +25 -0
- requirements.txt +3 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__/
|
README.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Topic Labelling Playground
|
3 |
+
emoji: 🚀
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: green
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.27.2
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: other
|
11 |
+
models:
|
12 |
+
- textminr/llama-2-7b-4bit-gptq
|
13 |
+
- textminr/llama-2-7b-chat-4bit-gptq
|
14 |
+
---
|
app.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import list_models
|
2 |
+
import streamlit as st
|
3 |
+
from model import ReplicateModel
|
4 |
+
|
5 |
+
import os
|
6 |
+
import pandas as pd
|
7 |
+
|
8 |
+
DATASETS_PATH = 'datasets'
|
9 |
+
|
10 |
+
models = {
|
11 |
+
'mistral': ReplicateModel('mistralai/mistral-7b-instruct-v0.1:83b6a56e7c828e667f21fd596c338fd4f0039b46bcfa18d973e8e70e455fda70'),
|
12 |
+
}
|
13 |
+
|
14 |
+
prompts = {
|
15 |
+
'simple_prompt':
|
16 |
+
'''
|
17 |
+
I have topic that is described by the following keywords: [KEYWORDS]
|
18 |
+
|
19 |
+
Based on the information above, extract a short topic label in the following format:
|
20 |
+
topic: <topic label>
|
21 |
+
'''
|
22 |
+
# 'custom_prompt': ''
|
23 |
+
}
|
24 |
+
|
25 |
+
topicsets = {
|
26 |
+
'lda_poe_topics': os.path.join(DATASETS_PATH, 'lda_poe_topics.csv'),
|
27 |
+
}
|
28 |
+
|
29 |
+
@st.cache_data(show_spinner=False)
|
30 |
+
def get_available_models():
|
31 |
+
# return [model.modelId for model in list_models(author='textminr')]
|
32 |
+
return models.keys()
|
33 |
+
|
34 |
+
@st.cache_resource(show_spinner='Loading model...')
|
35 |
+
def load_model(model_name: str):
|
36 |
+
# model = AutoGPTQForCausalLM.from_quantized(model_name, device_map='auto')
|
37 |
+
# return pipeline('text-generation', model=model, tokenizer=model_name)
|
38 |
+
return models[model_name].load()
|
39 |
+
|
40 |
+
st.set_page_config(page_title='TL playground', page_icon='🚀', layout='wide')
|
41 |
+
st.title('🚀 Topic Labelling playground')
|
42 |
+
|
43 |
+
percentage_width_main = 70
|
44 |
+
st.markdown(
|
45 |
+
f'''<style>
|
46 |
+
@media only screen and (min-width: 1500px) {{
|
47 |
+
.appview-container .main .block-container{{
|
48 |
+
max-width: {percentage_width_main}%;
|
49 |
+
}}
|
50 |
+
}}
|
51 |
+
</style>
|
52 |
+
''',
|
53 |
+
unsafe_allow_html=True,
|
54 |
+
)
|
55 |
+
|
56 |
+
col1, col2 = st.columns(2, gap='medium')
|
57 |
+
|
58 |
+
sel_model_name = col1.selectbox('Select a model', models, index=None, placeholder='Select a model')
|
59 |
+
if sel_model_name:
|
60 |
+
model = load_model(sel_model_name)
|
61 |
+
|
62 |
+
sel_dataset_name = col1.selectbox('Select a dataset', topicsets.keys(), index=None)
|
63 |
+
if sel_dataset_name:
|
64 |
+
sel_dataset = pd.read_csv(topicsets[sel_dataset_name], header=None)
|
65 |
+
col1.dataframe(sel_dataset)
|
66 |
+
|
67 |
+
sel_row_index = col1.selectbox('Select a row', sel_dataset.index)
|
68 |
+
|
69 |
+
sel_prompt = col2.selectbox('Select a prompt', prompts.keys())
|
70 |
+
if sel_prompt != 'custom_prompt':
|
71 |
+
col2.code(prompts[sel_prompt], language='text')
|
72 |
+
sel_prompt_text = prompts[sel_prompt]
|
73 |
+
else:
|
74 |
+
sel_prompt_text = st.text_area('Custom prompt', height=200)
|
75 |
+
col2.caption('Make sure to use "[KEYWORDS]" to indicate where the keywords should be inserted.')
|
76 |
+
|
77 |
+
btn_generate = col2.button('Generate', disabled=(sel_model_name is None or sel_dataset_name is None))
|
78 |
+
if btn_generate:
|
79 |
+
keywords = ','.join(sel_dataset.iloc[sel_row_index].tolist()[1:])
|
80 |
+
|
81 |
+
placeholder = col2.empty()
|
82 |
+
with placeholder, st.spinner('Generating...'):
|
83 |
+
prompt = sel_prompt_text.replace('[KEYWORDS]', keywords)
|
84 |
+
# result = model(prompt, max_new_tokens=100, return_full_text=False)[0]['generated_text']
|
85 |
+
result = model.generate(prompt)
|
86 |
+
|
87 |
+
message = col2.chat_message("ai")
|
88 |
+
message.write(result)
|
89 |
+
message.caption('Keywords: ' + keywords)
|
90 |
+
|
91 |
+
|
datasets/lda_poe_topics.csv
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Topic 1,howev,even,found,period,altogeth,imposs,precis,perhap,short,prove
|
2 |
+
Topic 2,water,wind,black,vessel,larg,sea,river,white,heavi,float
|
3 |
+
Topic 3,beauti,shall,name,whose,new,tree,angel,find,flower,fair
|
4 |
+
Topic 4,will,say,mean,rememb,speak,know,none,jupit,dare,limb
|
5 |
+
Topic 5,call,may,doubt,matter,question,exist,mind,beyond,term,now
|
6 |
+
Topic 6,one,everi,thing,moment,just,instant,anoth,almost,inde,frame
|
7 |
+
Topic 7,point,must,certain,fact,case,view,captain,given,consid,import
|
8 |
+
Topic 8,immedi,found,near,saw,state,now,present,sever,discov,approach
|
9 |
+
Topic 9,made,make,way,attempt,get,forc,difficulti,account,escap,effort
|
10 |
+
Topic 10,two,three,feet,year,hour,half,four,minut,thousand,hundr
|
11 |
+
Topic 11,whole,form,earth,small,around,figur,stood,portion,surfac,vast
|
12 |
+
Topic 12,upon,eye,look,fell,face,floor,fall,spot,depend,tabl
|
13 |
+
Topic 13,one,hope,power,leav,follow,entir,scarc,consider,pass,mad
|
14 |
+
Topic 14,man,old,die,away,live,ladi,young,busi,gentleman,pass
|
15 |
+
Topic 15,time,first,long,second,keep,thought,circumst,instanc,letter,care
|
16 |
+
Topic 16,much,evid,gave,although,truth,express,mean,seem,sens,felt
|
17 |
+
Topic 17,like,light,ever,life,dream,dark,moon,wild,deep,appear
|
18 |
+
Topic 18,effect,appear,step,upon,except,found,caus,event,discov,produc
|
19 |
+
Topic 19,day,night,last,long,continu,cours,late,arriv,bring,raven
|
20 |
+
Topic 20,lie,side,full,went,length,peter,augustus,lay,deck,board
|
21 |
+
Topic 21,far,air,seem,heaven,high,sun,breath,grew,atmospher,rise
|
22 |
+
Topic 22,now,becam,soon,distinct,object,absolut,necessari,appar,render,felt
|
23 |
+
Topic 23,poem,poe,origin,work,first,read,poet,note,paper,line
|
24 |
+
Topic 24,great,degre,island,sea,reach,set,land,strong,measur,sight
|
25 |
+
Topic 25,natur,feel,excit,interest,true,differ,intens,result,principl,peculiar
|
26 |
+
Topic 26,less,part,appear,thus,regard,posit,person,number,greater,mention
|
27 |
+
Topic 27,hand,left,place,take,took,right,arm,hold,put,end
|
28 |
+
Topic 28,without,bodi,reason,believ,suppos,corps,becom,madam,murder,least
|
29 |
+
Topic 29,still,idea,think,fanci,human,dead,possess,impress,smile,wonder
|
30 |
+
Topic 30,death,voic,sound,bell,heard,ear,low,without,fire,proceed
|
31 |
+
Topic 31,word,let,thus,attent,utter,spoke,alon,gone,repeat,scene
|
32 |
+
Topic 32,head,turn,came,upon,back,extrem,sudden,come,near,round
|
33 |
+
Topic 33,mani,manner,charact,particular,subject,singular,success,weather,alway,articl
|
34 |
+
Topic 34,littl,mere,remain,purpos,better,longer,suffer,use,wish,home
|
35 |
+
Topic 35,now,thou,friend,sure,inde,say,repli,art,fear,sir
|
36 |
+
Topic 36,well,good,among,observ,world,general,known,men,may,knew
|
37 |
+
Topic 37,said,might,possibl,thought,taken,king,still,bird,yes,dupin
|
38 |
+
Topic 38,open,door,within,close,room,chamber,wall,enter,main,box
|
39 |
+
Topic 39,never,can,noth,see,yet,seen,even,done,know,eye
|
40 |
+
Topic 40,love,thi,heart,soul,spirit,thee,god,shadow,within,passion
|
model.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
+
import replicate
|
3 |
+
|
4 |
+
class GenericModel:
|
5 |
+
def __init__(self, name: str):
|
6 |
+
self.name = name
|
7 |
+
|
8 |
+
@abstractmethod
|
9 |
+
def load(self):
|
10 |
+
pass
|
11 |
+
|
12 |
+
@abstractmethod
|
13 |
+
def generate(self, prompt: str):
|
14 |
+
pass
|
15 |
+
|
16 |
+
class ReplicateModel(GenericModel):
|
17 |
+
def __init__(self, name: str):
|
18 |
+
super().__init__(name)
|
19 |
+
|
20 |
+
def load(self):
|
21 |
+
return self
|
22 |
+
|
23 |
+
def generate(self, prompt: str):
|
24 |
+
iterator = replicate.run(self.name, input={'prompt': prompt})
|
25 |
+
return ''.join(item for item in iterator)
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
transformers @ git+https://github.com/huggingface/transformers
|
2 |
+
replicate
|
3 |
+
auto_gptq
|