<html> |
<head> |
<script type="text/javascript"> |
let parser = new DOMParser() |
function FormatDiscordMessage(html){ |
let dom = parser.parseFromString(html, "text/html"); |
CURRENT_CONTENT.dom = dom; |
let allChilds = dom.querySelectorAll("body > *"); |
let FullContent = []; |
let buffContent = [] |
let buffLinks = [] |
let authorName= []; |
let edition; |
let flushContent = function(){ |
console.log("line break found!"); |
let AllText = buffContent.join("").trim(); |
let links = buffLinks.join(","); |
if(!links) |
return; |
FullContent.push({ |
text: AllText |
,links |
,authors: authorName.join(",") |
,edition |
}); |
buffContent = [] |
buffLinks = []; |
authorName = [] |
} |
for(let c of allChilds){ |
let text; |
if(c.tagName == "A") |
buffLinks.push(c.href); |
if(c.tagName.at(0) == 'H'){ |
let editionMatch = c.textContent.match(/#\d+/g); |
if(editionMatch){ |
edition = parseInt( editionMatch[0].replace('#','')) |
} |
continue; |
} |
if(c.classList.contains("mention")) |
authorName.push(c.textContent); |
text = c.textContent; |
if(text) |
buffContent.push(text); |
if(/\!?\s*\n+/.test(text)){ |
flushContent(); |
} |
} |
if(buffContent){ |
flushContent(); |
} |
return { dom, content: FullContent }; |
} |
function ProcessPastedMessage(){ |
let res = FormatDiscordMessage(CURRENT_CONTENT.html) |
CURRENT_CONTENT.result = res; |
let out = document.querySelector("#result"); |
let xDoc = document.implementation.createDocument(null, "highlights"); |
let rootDoc = xDoc.querySelector("highlights"); |
let Stats = { |
total: 0 |
,edition: null |
}; |
Stats.edition = res.content[0].edition; |
for(let [i,high] of res.content.entries()){ |
let xHigh = xDoc.createElement("highlight"); |
let xAutor = xDoc.createElement("author"); |
let xLinks = xDoc.createElement("links"); |
let xEdition = xDoc.createElement("edition"); |
let xText = xDoc.createElement("text"); |
xAutor.textContent = high.authors |
xLinks.textContent = high.links |
xEdition.textContent = high.edition |
xText.textContent = high.text |
xHigh.appendChild(xText); |
xHigh.appendChild(xLinks); |
xHigh.appendChild(xEdition); |
xHigh.appendChild(xAutor); |
rootDoc.appendChild(xHigh) |
Stats.total++ |
} |
document.querySelector("#stats").innerHTML = `Stats: total = ${Stats.total}, edition = ${Stats.edition}` |
let serializer = new XMLSerializer(); |
out.innerHTML = serializer.serializeToString(xDoc); |
} |
function ProcessPasted(content){ |
navigator.clipboard.read(["text/html"]) |
.then( async (content) => { |
console.log("content", content[0].types) |
let contentTypes = content[0].types; |
let plainText = await (await content[0].getType("text/plain")).text(); |
let html = null; |
if(contentTypes.includes("text/html")){ |
htmlContent = await content[0].getType("text/html"); |
console.log("html:", htmlContent); |
html = await htmlContent.text(); |
html: await htmlContent.text() |
,text: plainText |
} |
} else { |
console.log("NotContainsHtml"); |
} |
html |
,text: plainText |
} |
setTimeout(ProcessPastedMessage, 100) |
}) |
return false; |
} |
addEventListener("paste", ProcessPasted); |
</script> |
<style> |
textarea { |
width: 100%; |
} |
.container { |
display: flex; |
flex-direction: row; |
} |
.container > div { |
width: 50%; |
height: 70vh; |
padding: 5px; |
} |
.container textarea { |
height: 100%; |
} |
</style> |
</head> |
<body> |
<p>This is a simple parser of Community Highlights, posted weekly in Huging Face Discord</p> |
<p>Community Highlights is a valuable information. While Hugging Face dont provide an official list (via some API), use that tool to parse and import anywhere</p> |
<p>Use it to transform content into something best to be parsed (for example, to import in some database, or blog)</p> |
<p>Instrunctions></p> |
<ol> |
<li>Open Discord in some Broswer (just opening in browser works)</li> |
<li>Go to desired Community Highlights message. Select all message and Copy</li> |
<li>Paste on Content Field</li> |
<li>Then, a parsed data must be generate in side input, in XML format. That format is better to you import anyhwere</li> |
</ol> |
<p>TODO: JSON Support, API import</p> |
<div> |
<p id="stats"></p> |
</div> |
<div class="container"> |
<div> |
<p>Content</p> |
<textarea></textarea> |
</div> |
<div> |
<p>XML</p> |
<textarea readonly id="result"></textarea> |
</div> |
</div> |
</body> |
</html> |