YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
import nlp2
import json
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from asrp.code2voice_model.hubert import hifigan_hubert_layer6_code100
import IPython.display as ipd
tokenizer = AutoTokenizer.from_pretrained("Oscarshih/long-t5-base-SQA-15ep")
model = AutoModelForSeq2SeqLM.from_pretrained("Oscarshih/long-t5-base-SQA-15ep")
dataset = load_dataset("voidful/NMSQA-CODE")
cs = hifigan_hubert_layer6_code100()
qa_item = dataset['dev'][0]
question_unit = json.loads(qa_item['hubert_100_question_unit'])[0]["merged_code"]
context_unit = json.loads(qa_item['hubert_100_context_unit'])[0]["merged_code"]
answer_unit = json.loads(qa_item['hubert_100_answer_unit'])[0]["merged_code"]
# groundtruth answer
ipd.Audio(data=cs(answer_unit), autoplay=False, rate=cs.sample_rate)
# predict answer
inputs = tokenizer("".join([f"v_tok_{i}" for i in question_unit]) + "".join([f"v_tok_{i}" for i in context_unit]), return_tensors="pt")
code = tokenizer.batch_decode(model.generate(**inputs,max_length=1024))[0]
code = [int(i) for i in code.replace("</s>","").replace("<s>","").split("v_tok_")[1:]]
ipd.Audio(data=cs(code), autoplay=False, rate=cs.sample_rate)
- Downloads last month
- 2
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.