kjysmu commited on
Commit
f1a5bcb
1 Parent(s): 0e18a50

modify app

Browse files
Files changed (1) hide show
  1. app.py +119 -79
app.py CHANGED
@@ -78,6 +78,8 @@ flatsharpDic = {
78
  'Bb':'A#'
79
  }
80
 
 
 
81
  max_conseq_N = 0
82
  max_conseq_chord = 2
83
  tempo = 120
@@ -89,6 +91,17 @@ min_velocity = 49 # Minimum velocity value in the output range
89
  max_velocity = 112 # Maximum velocity value in the output range
90
 
91
 
 
 
 
 
 
 
 
 
 
 
 
92
  def split_video_into_frames(video, frame_dir):
93
  output_path = os.path.join(frame_dir, f"%03d.jpg")
94
  cmd = f"ffmpeg -i {video} -vf \"select=bitor(gte(t-prev_selected_t\,1)\,isnan(prev_selected_t))\" -vsync 0 -qmin 1 -q:v 1 {output_path}"
@@ -390,77 +403,6 @@ class Video2music:
390
 
391
  def generate(self, video, primer, key):
392
 
393
- feature_dir = Path("./feature")
394
- output_dir = Path("./output")
395
- if feature_dir.exists():
396
- shutil.rmtree(str(feature_dir))
397
- if output_dir.exists():
398
- shutil.rmtree(str(output_dir))
399
-
400
- feature_dir.mkdir(parents=True)
401
- output_dir.mkdir(parents=True)
402
-
403
- frame_dir = feature_dir / "vevo_frame"
404
-
405
- #video features
406
- semantic_dir = feature_dir / "vevo_semantic"
407
- emotion_dir = feature_dir / "vevo_emotion"
408
- scene_dir = feature_dir / "vevo_scene"
409
- scene_offset_dir = feature_dir / "vevo_scene_offset"
410
- motion_dir = feature_dir / "vevo_motion"
411
-
412
- frame_dir.mkdir(parents=True)
413
- semantic_dir.mkdir(parents=True)
414
- emotion_dir.mkdir(parents=True)
415
- scene_dir.mkdir(parents=True)
416
- scene_offset_dir.mkdir(parents=True)
417
- motion_dir.mkdir(parents=True)
418
-
419
- #music features
420
- chord_dir = feature_dir / "vevo_chord"
421
- loudness_dir = feature_dir / "vevo_loudness"
422
- note_density_dir = feature_dir / "vevo_note_density"
423
-
424
- chord_dir.mkdir(parents=True)
425
- loudness_dir.mkdir(parents=True)
426
- note_density_dir.mkdir(parents=True)
427
-
428
- split_video_into_frames(video, frame_dir)
429
- gen_semantic_feature(frame_dir, semantic_dir)
430
- gen_emotion_feature(frame_dir, emotion_dir)
431
- gen_scene_feature(video, scene_dir, frame_dir)
432
- gen_scene_offset_feature(scene_dir, scene_offset_dir)
433
- gen_motion_feature(video, motion_dir)
434
-
435
- feature_scene_offset = get_scene_offset_feature(scene_offset_dir)
436
- feature_motion = get_motion_feature(motion_dir)
437
- feature_emotion = get_emotion_feature(emotion_dir)
438
- feature_semantic = get_semantic_feature(semantic_dir)
439
-
440
- # cuda
441
- feature_scene_offset = feature_scene_offset.to(self.device)
442
- feature_motion = feature_motion.to(self.device)
443
- feature_emotion = feature_emotion.to(self.device)
444
-
445
- feature_scene_offset = feature_scene_offset.unsqueeze(0)
446
- feature_motion = feature_motion.unsqueeze(0)
447
- feature_emotion = feature_emotion.unsqueeze(0)
448
-
449
- feature_semantic = feature_semantic.to(self.device)
450
- feature_semantic_list = []
451
- feature_semantic = torch.unsqueeze(feature_semantic, 0)
452
- feature_semantic_list.append( feature_semantic.to(self.device) )
453
- #feature_semantic_list.append( feature_semantic )
454
-
455
- if "major" in key:
456
- feature_key = torch.tensor([0])
457
- feature_key = feature_key.float()
458
- elif "minor" in key:
459
- feature_key = torch.tensor([1])
460
- feature_key = feature_key.float()
461
-
462
- feature_key = feature_key.to(self.device)
463
-
464
  with open('dataset/vevo_meta/chord.json') as json_file:
465
  chordDic = json.load(json_file)
466
  with open('dataset/vevo_meta/chord_inv.json') as json_file:
@@ -504,14 +446,30 @@ class Video2music:
504
  pChord = pChord[0:type_idx] + ":maj6"
505
  if pChord[type_idx+1:] == "M7":
506
  pChord = pChord[0:type_idx] + ":maj7"
507
- if pChord[type_idx+1:] == "":
508
  pChord = pChord[0:type_idx]
509
 
510
  print("pchord is ", pChord)
511
- chordID = chordDic[pChord]
512
- primerCID.append(chordID)
513
 
514
  chord_arr = pChord.split(":")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  if len(chord_arr) == 1:
516
  chordRootID = chordRootDic[chord_arr[0]]
517
  primerCID_root.append(chordRootID)
@@ -537,6 +495,84 @@ class Video2music:
537
  primerCID_attr = primerCID_attr.to(torch.long)
538
  primerCID_attr = primerCID_attr.to(self.device)
539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  # self.model.eval()
541
  # self.modelReg.eval()
542
 
@@ -616,6 +652,7 @@ class Video2music:
616
  midi_chords = voice(midi_chords_orginal)
617
  trans = traspose_key_dic[key]
618
 
 
619
  for i, chord in enumerate(midi_chords):
620
  if densitylist[i] == 0:
621
  if len(chord) >= 4:
@@ -727,6 +764,9 @@ def gradio_generate2(input_youtube, input_primer, input_key):
727
  youtube_dir.mkdir(parents=True)
728
 
729
  yObject = YouTube(input_youtube)
 
 
 
730
  yObject_stream = yObject.streams.get_by_resolution("240p")
731
  fname = yObject.video_id +".mp4"
732
  if yObject_stream == None:
@@ -813,11 +853,11 @@ with gr.Blocks(css=css) as demo:
813
  # with gr.Column(visible=True) as colA:
814
  with gr.Column(visible=True) as rowA:
815
  with gr.Row():
816
- input_video = gr.Video(label="Input Video")
817
  with gr.Row():
818
  with gr.Row():
819
- input_primer = gr.Textbox(label="Input Primer", value="C Am F G")
820
- input_key = gr.Dropdown(choices=["C major", "A minor"], value="C major", label="Input Key")
821
  with gr.Row():
822
  btn = gr.Button("Generate")
823
 
@@ -826,8 +866,8 @@ with gr.Blocks(css=css) as demo:
826
  input_video_yt = gr.Textbox(label="YouTube URL")
827
  with gr.Row():
828
  with gr.Row():
829
- input_primer_yt = gr.Textbox(label="Input Primer", value="C Am F G")
830
- input_key_yt = gr.Dropdown(choices=["C major", "A minor"], value="C major", label="Input Key")
831
  with gr.Row():
832
  btn_yt = gr.Button("Generate")
833
 
 
78
  'Bb':'A#'
79
  }
80
 
81
+ chordList = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
82
+
83
  max_conseq_N = 0
84
  max_conseq_chord = 2
85
  tempo = 120
 
91
  max_velocity = 112 # Maximum velocity value in the output range
92
 
93
 
94
+ # def get_video_duration(file_path):
95
+ # try:
96
+ # clip = VideoFileClip(file_path)
97
+ # duration = clip.duration
98
+ # clip.close()
99
+ # return duration
100
+ # except Exception as e:
101
+ # print(f"An error occurred: {e}")
102
+ # return None
103
+
104
+
105
  def split_video_into_frames(video, frame_dir):
106
  output_path = os.path.join(frame_dir, f"%03d.jpg")
107
  cmd = f"ffmpeg -i {video} -vf \"select=bitor(gte(t-prev_selected_t\,1)\,isnan(prev_selected_t))\" -vsync 0 -qmin 1 -q:v 1 {output_path}"
 
403
 
404
  def generate(self, video, primer, key):
405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  with open('dataset/vevo_meta/chord.json') as json_file:
407
  chordDic = json.load(json_file)
408
  with open('dataset/vevo_meta/chord_inv.json') as json_file:
 
446
  pChord = pChord[0:type_idx] + ":maj6"
447
  if pChord[type_idx+1:] == "M7":
448
  pChord = pChord[0:type_idx] + ":maj7"
449
+ if pChord[type_idx+1:] == "" or pChord[type_idx+1:] == "maj" or pChord[type_idx+1:] == "M":
450
  pChord = pChord[0:type_idx]
451
 
452
  print("pchord is ", pChord)
453
+ if pChord not in chordDic:
454
+ raise gr.Error("Not Supported Chord Type!")
455
 
456
  chord_arr = pChord.split(":")
457
+
458
+ trans = traspose_key_dic[key]
459
+ trasindex = (chordList.index( chord_arr[0] ) - trans) % 12
460
+
461
+ if len(chord_arr) == 1:
462
+ pChordTrans = chordList[trasindex]
463
+ elif len(chord_arr) == 2:
464
+ pChordTrans = chordList[trasindex] + ":" + chord_arr[1]
465
+
466
+ print(pChordTrans)
467
+
468
+
469
+ chordID = chordDic[pChordTrans]
470
+ primerCID.append(chordID)
471
+ chord_arr = pChordTrans.split(":")
472
+
473
  if len(chord_arr) == 1:
474
  chordRootID = chordRootDic[chord_arr[0]]
475
  primerCID_root.append(chordRootID)
 
495
  primerCID_attr = primerCID_attr.to(torch.long)
496
  primerCID_attr = primerCID_attr.to(self.device)
497
 
498
+ # duration = get_video_duration(video)
499
+
500
+ # if duration >= 300:
501
+ # raise gr.Error("We only support duration of video less than 300 seconds")
502
+
503
+ feature_dir = Path("./feature")
504
+ output_dir = Path("./output")
505
+ if feature_dir.exists():
506
+ shutil.rmtree(str(feature_dir))
507
+ if output_dir.exists():
508
+ shutil.rmtree(str(output_dir))
509
+
510
+ feature_dir.mkdir(parents=True)
511
+ output_dir.mkdir(parents=True)
512
+
513
+ frame_dir = feature_dir / "vevo_frame"
514
+
515
+ #video features
516
+ semantic_dir = feature_dir / "vevo_semantic"
517
+ emotion_dir = feature_dir / "vevo_emotion"
518
+ scene_dir = feature_dir / "vevo_scene"
519
+ scene_offset_dir = feature_dir / "vevo_scene_offset"
520
+ motion_dir = feature_dir / "vevo_motion"
521
+
522
+ frame_dir.mkdir(parents=True)
523
+ semantic_dir.mkdir(parents=True)
524
+ emotion_dir.mkdir(parents=True)
525
+ scene_dir.mkdir(parents=True)
526
+ scene_offset_dir.mkdir(parents=True)
527
+ motion_dir.mkdir(parents=True)
528
+
529
+ #music features
530
+ chord_dir = feature_dir / "vevo_chord"
531
+ loudness_dir = feature_dir / "vevo_loudness"
532
+ note_density_dir = feature_dir / "vevo_note_density"
533
+
534
+ chord_dir.mkdir(parents=True)
535
+ loudness_dir.mkdir(parents=True)
536
+ note_density_dir.mkdir(parents=True)
537
+
538
+ split_video_into_frames(video, frame_dir)
539
+ gen_semantic_feature(frame_dir, semantic_dir)
540
+ gen_emotion_feature(frame_dir, emotion_dir)
541
+ gen_scene_feature(video, scene_dir, frame_dir)
542
+ gen_scene_offset_feature(scene_dir, scene_offset_dir)
543
+ gen_motion_feature(video, motion_dir)
544
+
545
+ feature_scene_offset = get_scene_offset_feature(scene_offset_dir)
546
+ feature_motion = get_motion_feature(motion_dir)
547
+ feature_emotion = get_emotion_feature(emotion_dir)
548
+ feature_semantic = get_semantic_feature(semantic_dir)
549
+
550
+ # cuda
551
+ feature_scene_offset = feature_scene_offset.to(self.device)
552
+ feature_motion = feature_motion.to(self.device)
553
+ feature_emotion = feature_emotion.to(self.device)
554
+
555
+ feature_scene_offset = feature_scene_offset.unsqueeze(0)
556
+ feature_motion = feature_motion.unsqueeze(0)
557
+ feature_emotion = feature_emotion.unsqueeze(0)
558
+
559
+ feature_semantic = feature_semantic.to(self.device)
560
+ feature_semantic_list = []
561
+ feature_semantic = torch.unsqueeze(feature_semantic, 0)
562
+ feature_semantic_list.append( feature_semantic.to(self.device) )
563
+ #feature_semantic_list.append( feature_semantic )
564
+
565
+ if "major" in key:
566
+ feature_key = torch.tensor([0])
567
+ feature_key = feature_key.float()
568
+ elif "minor" in key:
569
+ feature_key = torch.tensor([1])
570
+ feature_key = feature_key.float()
571
+
572
+ feature_key = feature_key.to(self.device)
573
+
574
+
575
+
576
  # self.model.eval()
577
  # self.modelReg.eval()
578
 
 
652
  midi_chords = voice(midi_chords_orginal)
653
  trans = traspose_key_dic[key]
654
 
655
+
656
  for i, chord in enumerate(midi_chords):
657
  if densitylist[i] == 0:
658
  if len(chord) >= 4:
 
764
  youtube_dir.mkdir(parents=True)
765
 
766
  yObject = YouTube(input_youtube)
767
+ if yObject.length >= 300:
768
+ raise gr.Error("We only support duration of video less than 300 seconds")
769
+
770
  yObject_stream = yObject.streams.get_by_resolution("240p")
771
  fname = yObject.video_id +".mp4"
772
  if yObject_stream == None:
 
853
  # with gr.Column(visible=True) as colA:
854
  with gr.Column(visible=True) as rowA:
855
  with gr.Row():
856
+ input_video = gr.Video(label="Input Video", max_length=299)
857
  with gr.Row():
858
  with gr.Row():
859
+ input_primer = gr.Textbox(label="Input Primer", value="C Am F G", info="Supported types: dim, sus4, min7(m7), min(m), sus2, aug, dim7, maj6(M6), hdim7, 7, min6(m6), maj7(M7)")
860
+ input_key = gr.Dropdown(choices=all_key_names, value="C major", label="Input Key")
861
  with gr.Row():
862
  btn = gr.Button("Generate")
863
 
 
866
  input_video_yt = gr.Textbox(label="YouTube URL")
867
  with gr.Row():
868
  with gr.Row():
869
+ input_primer_yt = gr.Textbox(label="Input Primer", value="C Am F G", info="Supported types: dim, sus4, min7(m7), min(m), sus2, aug, dim7, maj6(M6), hdim7, 7, min6(m6), maj7(M7)")
870
+ input_key_yt = gr.Dropdown(choices=all_key_names, value="C major", label="Input Key")
871
  with gr.Row():
872
  btn_yt = gr.Button("Generate")
873