|
<html> |
|
<head> |
|
|
|
<script type="text/javascript"> |
|
let CURRENT_CONTENT; |
|
let parser = new DOMParser() |
|
|
|
function FormatDiscordMessage(html){ |
|
let dom = parser.parseFromString(html, "text/html"); |
|
CURRENT_CONTENT.dom = dom; |
|
|
|
let allChilds = dom.querySelectorAll("body > *"); |
|
|
|
let FullContent = []; |
|
|
|
let buffContent = [] |
|
let buffLinks = [] |
|
let authorName= []; |
|
let edition; |
|
|
|
let flushContent = function(){ |
|
console.log("line break found!"); |
|
|
|
let AllText = buffContent.join("").trim(); |
|
let links = buffLinks.join(","); |
|
|
|
if(!links) |
|
return; |
|
|
|
FullContent.push({ |
|
text: AllText |
|
,links |
|
,authors: authorName.join(",") |
|
,edition |
|
}); |
|
|
|
|
|
|
|
buffContent = [] |
|
buffLinks = []; |
|
authorName = [] |
|
} |
|
|
|
for(let c of allChilds){ |
|
let text; |
|
|
|
if(c.tagName == "A") |
|
buffLinks.push(c.href); |
|
|
|
|
|
if(c.tagName.at(0) == 'H'){ |
|
|
|
let editionMatch = c.textContent.match(/#\d+/g); |
|
|
|
if(editionMatch){ |
|
edition = parseInt( editionMatch[0].replace('#','')) |
|
} |
|
|
|
continue; |
|
} |
|
|
|
if(c.classList.contains("mention")) |
|
authorName.push(c.textContent); |
|
|
|
text = c.textContent; |
|
|
|
if(text) |
|
buffContent.push(text); |
|
|
|
if(/\!?\s*\n+/.test(text)){ |
|
flushContent(); |
|
} |
|
} |
|
|
|
|
|
|
|
if(buffContent){ |
|
flushContent(); |
|
} |
|
|
|
|
|
return { dom, content: FullContent }; |
|
} |
|
|
|
function ProcessPastedMessage(){ |
|
let res = FormatDiscordMessage(CURRENT_CONTENT.html) |
|
|
|
CURRENT_CONTENT.result = res; |
|
|
|
let out = document.querySelector("#result"); |
|
|
|
let xDoc = document.implementation.createDocument(null, "highlights"); |
|
let rootDoc = xDoc.querySelector("highlights"); |
|
|
|
let Stats = { |
|
total: 0 |
|
,edition: null |
|
}; |
|
|
|
Stats.edition = res.content[0].edition; |
|
|
|
for(let [i,high] of res.content.entries()){ |
|
let xHigh = xDoc.createElement("highlight"); |
|
|
|
let xAutor = xDoc.createElement("author"); |
|
let xLinks = xDoc.createElement("links"); |
|
let xEdition = xDoc.createElement("edition"); |
|
let xText = xDoc.createElement("text"); |
|
|
|
xAutor.textContent = high.authors |
|
xLinks.textContent = high.links |
|
xEdition.textContent = high.edition |
|
xText.textContent = high.text |
|
|
|
xHigh.appendChild(xText); |
|
xHigh.appendChild(xLinks); |
|
xHigh.appendChild(xEdition); |
|
xHigh.appendChild(xAutor); |
|
|
|
|
|
rootDoc.appendChild(xHigh) |
|
|
|
Stats.total++ |
|
|
|
} |
|
|
|
|
|
document.querySelector("#stats").innerHTML = `Stats: total = ${Stats.total}, edition = ${Stats.edition}` |
|
|
|
let serializer = new XMLSerializer(); |
|
out.innerHTML = serializer.serializeToString(xDoc); |
|
} |
|
|
|
|
|
function ProcessPasted(content){ |
|
|
|
navigator.clipboard.read(["text/html"]) |
|
.then( async (content) => { |
|
|
|
console.log("content", content[0].types) |
|
|
|
let contentTypes = content[0].types; |
|
let plainText = await (await content[0].getType("text/plain")).text(); |
|
let html = null; |
|
|
|
if(contentTypes.includes("text/html")){ |
|
htmlContent = await content[0].getType("text/html"); |
|
console.log("html:", htmlContent); |
|
|
|
html = await htmlContent.text(); |
|
|
|
CURRENT_CONTENT = { |
|
html: await htmlContent.text() |
|
,text: plainText |
|
} |
|
|
|
} else { |
|
console.log("NotContainsHtml"); |
|
} |
|
|
|
|
|
CURRENT_CONTENT = { |
|
html |
|
,text: plainText |
|
} |
|
|
|
setTimeout(ProcessPastedMessage, 100) |
|
|
|
}) |
|
|
|
return false; |
|
} |
|
|
|
addEventListener("paste", ProcessPasted); |
|
|
|
|
|
</script> |
|
<style> |
|
textarea { |
|
width: 100%; |
|
} |
|
|
|
.container { |
|
display: flex; |
|
flex-direction: row; |
|
} |
|
|
|
.container > div { |
|
width: 50%; |
|
height: 70vh; |
|
padding: 5px; |
|
} |
|
|
|
.container textarea { |
|
height: 100%; |
|
} |
|
</style> |
|
|
|
</head> |
|
<body> |
|
<p>This is a simple parser of Community Highlights, posted weekly in Huging Face Discord</p> |
|
<p>Community Highlights is a valuable information. While Hugging Face dont provide an official list (via some API), use that tool to parse and import anywhere</p> |
|
<p>Use it to transform content into something best to be parsed (for example, to import in some database, or blog)</p> |
|
<p>Instrunctions></p> |
|
<ol> |
|
<li>Open Discord in some Broswer (just opening in browser works)</li> |
|
<li>Go to desired Community Highlights message. Select all message and Copy</li> |
|
<li>Paste on Content Field</li> |
|
<li>Then, a parsed data must be generate in side input, in XML format. That format is better to you import anyhwere</li> |
|
</ol> |
|
<p>TODO: JSON Support, API import</p> |
|
<div> |
|
<p id="stats"></p> |
|
</div> |
|
<div class="container"> |
|
<div> |
|
<p>Content</p> |
|
<textarea></textarea> |
|
</div> |
|
|
|
<div> |
|
<p>XML</p> |
|
<textarea readonly id="result"></textarea> |
|
</div> |
|
</div> |
|
|
|
|
|
|
|
|
|
</body> |
|
</html> |