spaces-ai-description / getDescriptionsAndSave.mjs
victor's picture
victor HF staff
init
ec86b59
import { promises as fs } from "fs";
import { textGeneration } from "@huggingface/inference";
import { AutoTokenizer } from "@xenova/transformers";
const HF_TOKEN = "";
if (!HF_TOKEN) {
console.error("Please set the HF_TOKEN environment variable");
process.exit(1);
}
const buildPrompt = (
appFile
) => `Write a short, imperative description of the provided app's purpose. It MUST ALWAYS be under 80 characters and a single-sentence. You can mention some technology names that you extract from the source code.
Example descriptions: "Remove background from images.", "Generate captions for images using ViT and GPT2.", "Predict the nutritional value of food based on an image of the food." Only answer with the description and nothing else. DON'T ADD ANY EXPLANATIONS.
The provided app.py file:
\`\`\`python
${appFile}
\`\`\``;
async function readTrendingSpacesFile() {
const data = await fs.readFile("trendingSpaces.json", "utf8");
return JSON.parse(data).filter((space) => space.appFile);
}
const extractTextAfterLastInst = (str) => {
const match = str.match(/\[\/INST\]([^]*?)$/);
return match ? match[1].replace(/"/g, "").trim() : "";
};
async function processSpaces() {
const spaces = await readTrendingSpacesFile();
const tokenizer = await AutoTokenizer.from_pretrained(
"mistralai/Mistral-7B-Instruct-v0.1"
);
for (const space of spaces) {
const chat = [{ role: "user", content: buildPrompt(space.appFile) }];
try {
const inputs = await tokenizer.apply_chat_template(chat, {
tokenize: false,
});
const result = await textGeneration({
model: "mistralai/Mixtral-8x7B-Instruct-v0.1",
accessToken: HF_TOKEN,
inputs,
});
space.ai_description = extractTextAfterLastInst(result.generated_text);
console.log(
`Generated text for space ${space.id}: ${space.ai_description}`
);
} catch (error) {
console.error(`Failed to generate text for space ${space.id}: ${error}`);
}
}
await saveOutputFile(spaces);
}
async function saveOutputFile(spaces) {
const filteredSpaces = spaces.filter(
(space) => space.ai_description && space.ai_description.length <= 120
);
const sortedSpaces = filteredSpaces.sort((a, b) => b.likes - a.likes);
const output = sortedSpaces.map(
({ id, emoji, likes, ai_description }) =>
`${id},${emoji},${likes},"${ai_description}"`
);
const csvOutput = `id,emoji,likes,ai_description\n${output.join("\n")}`;
await fs.writeFile("output.csv", csvOutput);
console.log("CSV output written to file");
}
// Run the process
processSpaces().catch(console.error);