Files changed (1) hide show
  1. app.py +21 -67
app.py CHANGED
@@ -3,77 +3,31 @@ import librosa
3
  import numpy as np
4
  import torch
5
 
6
- from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
7
 
8
 
9
- checkpoint = "microsoft/speecht5_tts"
10
- processor = SpeechT5Processor.from_pretrained(checkpoint)
11
- model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
12
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
13
 
 
 
 
 
14
 
15
- speaker_embeddings = {
16
- "BDL": "Speakers/cmu_us_bdl_arctic-wav-arctic_a0009.npy",
17
- "CLB": "Speakers/cmu_us_clb_arctic-wav-arctic_a0144.npy",
18
- "KSP": "Speakers/cmu_us_ksp_arctic-wav-arctic_b0087.npy",
19
- "RMS": "Speakers/cmu_us_rms_arctic-wav-arctic_b0353.npy",
20
- "SLT": "Speakers/cmu_us_slt_arctic-wav-arctic_a0508.npy",
21
- }
22
 
 
 
 
 
23
 
24
- def predict(text, speaker):
25
- if len(text.strip()) == 0:
26
- return (16000, np.zeros(0).astype(np.int16))
 
 
 
 
 
27
 
28
- inputs = processor(text=text, return_tensors="pt")
29
-
30
- # limit input length
31
- input_ids = inputs["input_ids"]
32
- input_ids = input_ids[..., :model.config.max_text_positions]
33
-
34
- if speaker == "Surprise Me!":
35
- # load one of the provided speaker embeddings at random
36
- idx = np.random.randint(len(speaker_embeddings))
37
- key = list(speaker_embeddings.keys())[idx]
38
- speaker_embedding = np.load(speaker_embeddings[key])
39
-
40
- # randomly shuffle the elements
41
- np.random.shuffle(speaker_embedding)
42
-
43
- # randomly flip half the values
44
- x = (np.random.rand(512) >= 0.5) * 1.0
45
- x[x == 0] = -1.0
46
- speaker_embedding *= x
47
-
48
- #speaker_embedding = np.random.rand(512).astype(np.float32) * 0.3 - 0.15
49
- else:
50
- speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
51
-
52
- speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
53
-
54
- speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)
55
-
56
- speech = (speech.numpy() * 32767).astype(np.int16)
57
- return (16000, speech)
58
-
59
- title = "LoreWeaver: A Novel Generation Multimodal LLM"
60
-
61
- gr.Interface(
62
- fn=predict,
63
- inputs=[
64
- gr.Text(label="Input Text"),
65
- gr.Radio(label="Speaker", choices=[
66
- "BDL (male)",
67
- "CLB (female)",
68
- "KSP (male)",
69
- "RMS (male)",
70
- "SLT (female)",
71
- "Surprise Me!"
72
- ],
73
- value="BDL (male)"),
74
- ],
75
- outputs=[
76
- gr.Audio(label="Generated Speech", type="numpy"),
77
- ],
78
- title=title,
79
- ).launch()
 
3
  import numpy as np
4
  import torch
5
 
 
6
 
7
 
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
 
 
9
 
10
+ # Load the model and tokenizer
11
+ model_name = "Reverb/Mistral-7B-LoreWeaver"
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = AutoModelForCausalLM.from_pretrained(model_name)
14
 
15
+ # Initialize the pipeline
16
+ generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
 
 
 
 
 
17
 
18
+ def generate_story(prompt):
19
+ # Generate a response using the model
20
+ responses = generator(prompt, max_length=200, num_return_sequences=1)
21
+ return responses[0]['generated_text']
22
 
23
+ # Define the Gradio interface
24
+ iface = gr.Interface(
25
+ fn=generate_story,
26
+ inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
27
+ outputs=gr.Textbox(label="Generated Story"),
28
+ title="Mistral-7B-LoreWeaver Story Generator",
29
+ description="Enter a prompt to generate a narrative text using the Mistral-7B-LoreWeaver model."
30
+ )
31
 
32
+ # Launch the interface
33
+ iface.launch()