Spaces:

rycont
/

Biblify

Runtime error

File size: 2,032 Bytes

9234bd3
 
 
 
 
 
 
 
 
 
 
 
 
f2de50e
 
3a2997a
53637e0
1fbbb31
f2de50e
9234bd3
 
 
 
 
 
 
 
 
 
2defe66
 
9234bd3
 
 
 
c5ecc2d
f2de50e
 
aeb7c06
9234bd3
 
 
 
 
2defe66
9234bd3
2defe66
9234bd3
 
 
 
 
 
 
 
 
 
084831c
 
2defe66
 
 
084831c
2defe66
084831c
 
 
2defe66
084831c

import streamlit as lit
import torch
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast

@lit.cache(allow_output_mutation = True)
def loadModels():
  repository = "rycont/biblify"
  _model = BartForConditionalGeneration.from_pretrained(repository)
  _tokenizer = PreTrainedTokenizerFast.from_pretrained(repository)
  
  print("Loaded :)")
  return _model, _tokenizer

lit.title("성경말투 생성기")
lit.caption("한 문장을 가장 잘 변환합니다. 제대로 동작하지 않다면 아래 링크로 이동해주세요")
lit.caption("https://main-biblify-space-rycont.endpoint.ainize.ai/")

model, tokenizer = loadModels()

MAX_LENGTH = 128

def biblifyWithBeams(beam, tokens, attention_mask):
  generated = model.generate(
    input_ids = torch.Tensor([ tokens ]).to(torch.int64),
    attention_mask = torch.Tensor([ attentionMasks ]).to(torch.int64),
    num_beams = beam,
    max_length = MAX_LENGTH,
    eos_token_id=tokenizer.eos_token_id,
    bad_words_ids=[[tokenizer.unk_token_id]]
  )[0]
  
  return tokenizer.decode(
    generated,
  ).replace('<s>', '').replace('</s>', '')

with lit.form("gen"):
  text_input = lit.text_input("문장 입력")
  submitted = lit.form_submit_button("생성")

if len(text_input.strip()) > 0:
  print(text_input)
  
  text_input = "<s>" + text_input + "</s>"
  
  tokens = tokenizer.encode(text_input)
  tokenLength = len(tokens)
  
  attentionMasks = [ 1 ] * tokenLength + [ 0 ] * (MAX_LENGTH - tokenLength)
  tokens = tokens + [ tokenizer.pad_token_id ] * (MAX_LENGTH - tokenLength)
  
  results = []
  
  for i in range(10)[5:]:
    generated = biblifyWithBeams(
      i + 1,
      tokens,
      attentionMasks
    )
    if generated in results:
       print("중복됨")
       continue
       
    results.append(generated)
     
    with lit.expander(str(len(results)) + "번째 결과 (" + str(i +1) + ")", True):
      lit.write(generated)
      print(generated)
     
    lit.caption("및 " + str(5 - len(results)) + " 개의 중복된 결과")