| | const puppeteer = require('puppeteer-core'); |
| |
|
| | |
| | async function scrapePinterest(browser, query) { |
| | const page = await browser.newPage(); |
| | await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'); |
| | |
| | const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}&rs=typed`; |
| | await page.goto(searchUrl, { waitUntil: 'networkidle2', timeout: 30000 }); |
| |
|
| | |
| | await page.evaluate(async () => { |
| | for (let i = 0; i < 3; i++) { |
| | window.scrollBy(0, window.innerHeight * 2); |
| | await new Promise(r => setTimeout(r, 1500)); |
| | } |
| | }); |
| |
|
| | const pinUrls = await page.evaluate(() => { |
| | return Array.from(document.querySelectorAll('a[href*="/pin/"]')) |
| | .map(a => a.href).filter(href => href.includes('/pin/')); |
| | }); |
| |
|
| | const uniquePins = [...new Set(pinUrls)].slice(0, 40); |
| |
|
| | for (const pinUrl of uniquePins) { |
| | const pPage = await browser.newPage(); |
| | try { |
| | await pPage.goto(pinUrl, { waitUntil: 'domcontentloaded', timeout: 8000 }); |
| | const content = await pPage.content(); |
| | const videoRegex = /https:\/\/v1\.pinimg\.com\/videos\/mc\/[^\s"']+/g; |
| | const matches = content.match(videoRegex); |
| |
|
| | if (matches) { |
| | let rawUrl = matches[0].replace(/\\u002F/g, '/').replace(/[奖励"']/g, ''); |
| | const hashMatch = rawUrl.match(/([a-f0-9]{32})/); |
| | if (hashMatch) { |
| | const h = hashMatch[1]; |
| | |
| | process.stdout.write(`https://v1.pinimg.com/videos/mc/720p/${h.substring(0,2)}/${h.substring(2,4)}/${h.substring(4,6)}/${h}.mp4\n`); |
| | } |
| | } |
| | } catch (e) {} |
| | await pPage.close(); |
| | } |
| | await page.close(); |
| | } |
| |
|
| | |
| | async function scrapePexels(browser, query, orientation) { |
| | const page = await browser.newPage(); |
| | await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'); |
| |
|
| | |
| | let pexelsUrl = `https://www.pexels.com/search/videos/${encodeURIComponent(query)}/`; |
| | if (orientation === 'Portrait') pexelsUrl += '?orientation=portrait'; |
| | if (orientation === 'Landscape') pexelsUrl += '?orientation=landscape'; |
| |
|
| | console.error(`Fallback: Searching Pexels for ${orientation}...`); |
| | await page.goto(pexelsUrl, { waitUntil: 'networkidle2', timeout: 30000 }); |
| |
|
| | |
| | const videoUrls = await page.evaluate(() => { |
| | const sources = Array.from(document.querySelectorAll('source[type="video/mp4"]')); |
| | return sources.map(s => s.src); |
| | }); |
| |
|
| | |
| | const uniqueUrls = [...new Set(videoUrls)]; |
| | uniqueUrls.forEach(url => { |
| | |
| | const cleanUrl = url.split('?')[0]; |
| | process.stdout.write(`${cleanUrl}\n`); |
| | }); |
| |
|
| | await page.close(); |
| | } |
| |
|
| | |
| | (async () => { |
| | const args = process.argv.slice(2); |
| | const mode = args[0]; |
| | const query = args[1]; |
| | const orientation = args[2] || 'Any'; |
| |
|
| | const browser = await puppeteer.launch({ |
| | executablePath: '/usr/bin/chromium', |
| | headless: 'new', |
| | args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] |
| | }); |
| |
|
| | try { |
| | if (mode === 'pinterest') { |
| | await scrapePinterest(browser, query); |
| | } else if (mode === 'pexels') { |
| | await scrapePexels(browser, query, orientation); |
| | } |
| | } catch (e) { |
| | console.error(e); |
| | } finally { |
| | await browser.close(); |
| | } |
| | })(); |