File size: 3,107 Bytes
f62b8d3
 
e864e26
f62b8d3
1185ec1
e864e26
 
4c34e70
e864e26
f62b8d3
 
 
93f8352
 
f62b8d3
 
93f8352
 
0f35d4c
f62b8d3
 
 
 
93f8352
 
0f35d4c
761239a
4c34e70
93f8352
 
4c34e70
f62b8d3
 
e864e26
f62b8d3
 
 
 
 
93f8352
 
0f35d4c
761239a
 
93f8352
 
4c34e70
f62b8d3
 
e864e26
f62b8d3
 
 
 
 
 
 
 
 
 
93f8352
761239a
 
93f8352
 
4c34e70
f62b8d3
 
93f8352
0f35d4c
f62b8d3
93f8352
 
 
f62b8d3
 
 
93f8352
f62b8d3
93f8352
 
f62b8d3
 
93f8352
 
 
761239a
 
93f8352
 
4c34e70
93f8352
 
0f35d4c
f62b8d3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

import metadataParser from "markdown-yaml-metadata-parser"
import { defaultMediaOrientation, parseMediaOrientation } from "@aitube/clap"

import { ParsedDatasetReadme, ParsedMetadataAndContent } from "@/types/general"
import { defaultVideoModel } from "@/app/config"

import { parseVideoModelName } from "./parseVideoModelName"


export function parseDatasetReadme(markdown: string = ""): ParsedDatasetReadme {
  try {
    markdown = markdown.trim()

    const { metadata, content } = metadataParser(markdown) as ParsedMetadataAndContent

    // console.log("DEBUG README:", { metadata, content })
    
    const { model, lora, style, thumbnail, voice, music, description, prompt, tags, orientation } = parseMarkdown(content)

    return {
      license: typeof metadata?.license === "string" ? metadata.license : "",
      pretty_name: typeof metadata?.pretty_name === "string" ? metadata.pretty_name : "",
      hf_tags: Array.isArray(metadata?.tags) ? metadata.tags : [],
      tags: tags && typeof tags === "string" ? tags.split("-").map(x => x.trim()).filter(x => x) : [], 
      model: parseVideoModelName(model, defaultVideoModel),
      lora,
      style: style && typeof style === "string" ? style.split("- ").map(x => x.trim()).filter(x => x).join(", ") : [].join(", "),
      thumbnail,
      voice,
      music,
      description,
      prompt,
      orientation: parseMediaOrientation(orientation, defaultMediaOrientation),
    }
  } catch (err) {
    return {
      license: "",
      pretty_name: "",
      hf_tags: [], // Hugging Face tags
      tags: [],
      model: defaultVideoModel,
      lora: "",
      style: "",
      thumbnail: "",
      voice: "",
      music: "",
      description: "",
      prompt: "",
      orientation: defaultMediaOrientation,
    }
  }
}

/**
 * Simple Markdown Parser to extract sections into a JSON object
 * @param markdown A Markdown string containing Description and Prompt sections
 * @returns A JSON object with { "description": "...", "prompt": "..." }
 */
function parseMarkdown(markdown: string): {
  model: string
  lora: string
  style: string
  thumbnail: string
  voice: string
  music: string
  description: string
  prompt: string
  tags: string
  orientation: string
} {
  // console.log("markdown:", markdown)
  // Improved regular expression to find markdown sections and accommodate multi-line content.
  const sectionRegex = /^#+\s+(?<key>.+?)\n\n?(?<content>[^#]+)/gm;

  const sections: { [key: string]: string } = {};

  let match;
  while ((match = sectionRegex.exec(markdown))) {
    const { key, content } = match.groups as { key: string; content: string };
    sections[key.trim().toLowerCase()] = content.trim();
  }

  return {
    description: sections["description"] || "",
    model: sections["model"] || "",
    lora: sections["lora"] || "",
    style: sections["style"] || "",
    thumbnail: sections["thumbnail"] || "",
    voice: sections["voice"] || "",
    music: sections["music"] || "",
    prompt: sections["prompt"] || "",
    tags: sections["tags"] || "",
    orientation:  sections["orientation"] || "",
  };
}