adrien1 commited on
Commit
60816d5
1 Parent(s): 40a1e4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -32
app.py CHANGED
@@ -1,36 +1,20 @@
1
- import torch
2
- import gradio as gr
3
- from transformers import GPT2Tokenizer, GPT2LMHeadModel
4
 
5
- # Load pre-trained model and tokenizer
6
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
7
- model = GPT2LMHeadModel.from_pretrained("gpt2")
8
 
9
- # Define a function to generate text
10
- def generate_text(input_text):
11
- input_ids = tokenizer.encode(input_text, return_tensors="pt")
12
 
13
- # Generate text
14
- with torch.no_grad():
15
- output = model.generate(input_ids, max_length=100, no_repeat_ngram_size=True, do_sample=True, temperature=0.9)
16
-
17
- # Decode the generated text
18
- generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
19
-
20
- return generated_text
21
 
22
- # Create a Gradio interface
23
-
24
- # Create a Gradio interface
25
- iface = gr.Interface(
26
- generate_text,
27
- gr.Textbox(lines=5, label="Input Text"),
28
- "textbox",
29
- examples=[
30
- ["Once upon a time, in a land far, far away..."]
31
- ],
32
- title="GPT-2 Text Generation",
33
- description="Enter some text and GPT-2 will generate more!",
34
- theme="compact"
35
- )
36
- iface.launch()
 
1
+ from transformers import AutoProcessor, SeamlessM4Tv2Model
2
+ import torchaudio
3
+ from IPython.display import Audio
4
 
5
+ processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
6
+ model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
 
7
 
8
+ # from text
9
+ text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
10
+ audio_array_from_text = model.generate(**text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
11
 
12
+ # from audio
13
+ audio, orig_freq = torchaudio.load("https://www2.cs.uic.edu/~i101/SoundFiles/preamble10.wav")
14
+ audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16_000) # must be a 16 kHz waveform array
15
+ audio_inputs = processor(audios=audio, return_tensors="pt")
16
+ audio_array_from_audio = model.generate(**audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
 
 
 
17
 
18
+ sample_rate = model.config.sampling_rate
19
+ Audio(audio_array_from_text, rate=sample_rate)
20
+ # Audio(audio_array_from_audio, rate=sample_rate)