rrg92's picture
Add some description
2ede869 verified
<html>
<head>
<script type="text/javascript">
let CURRENT_CONTENT;
let parser = new DOMParser()
function FormatDiscordMessage(html){
let dom = parser.parseFromString(html, "text/html");
CURRENT_CONTENT.dom = dom;
let allChilds = dom.querySelectorAll("body > *");
let FullContent = [];
let buffContent = []
let buffLinks = []
let authorName= [];
let edition;
let flushContent = function(){
console.log("line break found!");
let AllText = buffContent.join("").trim();
let links = buffLinks.join(",");
if(!links)
return;
FullContent.push({
text: AllText
,links
,authors: authorName.join(",")
,edition
});
buffContent = []
buffLinks = [];
authorName = []
}
for(let c of allChilds){
let text;
if(c.tagName == "A")
buffLinks.push(c.href);
if(c.tagName.at(0) == 'H'){
let editionMatch = c.textContent.match(/#\d+/g);
if(editionMatch){
edition = parseInt( editionMatch[0].replace('#',''))
}
continue;
}
if(c.classList.contains("mention"))
authorName.push(c.textContent);
text = c.textContent;
if(text)
buffContent.push(text);
if(/\!?\s*\n+/.test(text)){
flushContent();
}
}
// last
if(buffContent){
flushContent();
}
return { dom, content: FullContent };
}
function ProcessPastedMessage(){
let res = FormatDiscordMessage(CURRENT_CONTENT.html)
CURRENT_CONTENT.result = res;
let out = document.querySelector("#result");
let xDoc = document.implementation.createDocument(null, "highlights");
let rootDoc = xDoc.querySelector("highlights");
let Stats = {
total: 0
,edition: null
};
Stats.edition = res.content[0].edition;
for(let [i,high] of res.content.entries()){
let xHigh = xDoc.createElement("highlight");
let xAutor = xDoc.createElement("author");
let xLinks = xDoc.createElement("links");
let xEdition = xDoc.createElement("edition");
let xText = xDoc.createElement("text");
xAutor.textContent = high.authors
xLinks.textContent = high.links
xEdition.textContent = high.edition
xText.textContent = high.text
xHigh.appendChild(xText);
xHigh.appendChild(xLinks);
xHigh.appendChild(xEdition);
xHigh.appendChild(xAutor);
rootDoc.appendChild(xHigh)
Stats.total++
}
document.querySelector("#stats").innerHTML = `Stats: total = ${Stats.total}, edition = ${Stats.edition}`
let serializer = new XMLSerializer();
out.innerHTML = serializer.serializeToString(xDoc);
}
function ProcessPasted(content){
navigator.clipboard.read(["text/html"])
.then( async (content) => {
console.log("content", content[0].types)
let contentTypes = content[0].types;
let plainText = await (await content[0].getType("text/plain")).text();
let html = null;
if(contentTypes.includes("text/html")){
htmlContent = await content[0].getType("text/html");
console.log("html:", htmlContent);
html = await htmlContent.text();
CURRENT_CONTENT = {
html: await htmlContent.text()
,text: plainText
}
} else {
console.log("NotContainsHtml");
}
CURRENT_CONTENT = {
html
,text: plainText
}
setTimeout(ProcessPastedMessage, 100)
})
return false;
}
addEventListener("paste", ProcessPasted);
</script>
<style>
textarea {
width: 100%;
}
.container {
display: flex;
flex-direction: row;
}
.container > div {
width: 50%;
height: 70vh;
padding: 5px;
}
.container textarea {
height: 100%;
}
</style>
</head>
<body>
<p>This is a simple parser of Community Highlights, posted weekly in Huging Face Discord</p>
<p>Community Highlights is a valuable information. While Hugging Face dont provide an official list (via some API), use that tool to parse and import anywhere</p>
<p>Use it to transform content into something best to be parsed (for example, to import in some database, or blog)</p>
<p>Instrunctions></p>
<ol>
<li>Open Discord in some Broswer (just opening in browser works)</li>
<li>Go to desired Community Highlights message. Select all message and Copy</li>
<li>Paste on Content Field</li>
<li>Then, a parsed data must be generate in side input, in XML format. That format is better to you import anyhwere</li>
</ol>
<p>TODO: JSON Support, API import</p>
<div>
<p id="stats"></p>
</div>
<div class="container">
<div>
<p>Content</p>
<textarea></textarea>
</div>
<div>
<p>XML</p>
<textarea readonly id="result"></textarea>
</div>
</div>
</body>
</html>