Spaces:

issaocean
/

cursor

Running

App Files Files Community

cursor / src /utils /extractCookieFromCsv.js

0412Xu's picture

Upload 2 files

89a92e3 verified 5 months ago

history blame contribute delete

9.18 kB

	const fs = require('fs');
	const path = require('path');
	const csv = require('csv-parser');

	/**
	* 从CSV文件中提取完整的cookie
	* @param {string} csvFilePath - CSV文件路径
	* @returns {Promise<string[]>} - 提取到的cookie数组
	*/
	async function extractCookiesFromCsv(csvFilePath) {
	return new Promise((resolve, reject) => {
	try {
	// 检查文件是否存在
	if (!fs.existsSync(csvFilePath)) {
	console.error(`CSV文件不存在: ${csvFilePath}`);
	return resolve([]);
	}

	// 读取文件内容
	const fileContent = fs.readFileSync(csvFilePath, 'utf8');
	console.log(`文件内容前200个字符: ${fileContent.substring(0, 200)}`);

	// 检查文件是否为空
	if (!fileContent \|\| fileContent.trim() === '') {
	console.error('CSV文件为空');
	return resolve([]);
	}

	// 首先尝试直接从文件内容中提取所有可能的cookie
	const cookies = [];

	// 检查是否有JWT格式的token (新格式)
	const jwtRegex = /ey[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+/g;
	const jwtMatches = fileContent.match(jwtRegex);

	if (jwtMatches && jwtMatches.length > 0) {
	console.log(`直接从文件内容中提取到 ${jwtMatches.length} 个JWT token格式的Cookie`);
	jwtMatches.forEach(match => {
	if (!cookies.includes(match)) {
	cookies.push(match);
	}
	});
	}

	// 检查文件内容是否包含关键字
	const hasTokenKeyword = fileContent.includes('token');
	const hasUserPrefix = fileContent.includes('user_');
	console.log(`文件包含"token"关键字: ${hasTokenKeyword}`);
	console.log(`文件包含"user_"前缀: ${hasUserPrefix}`);

	// 如果文件包含user_前缀，尝试提取旧格式cookie
	if (hasUserPrefix) {
	const oldFormatCookies = extractCookiesFromText(fileContent);
	if (oldFormatCookies.length > 0) {
	console.log(`从文件内容中提取到 ${oldFormatCookies.length} 个旧格式Cookie`);
	oldFormatCookies.forEach(cookie => {
	if (!cookies.includes(cookie)) {
	cookies.push(cookie);
	}
	});
	}
	}

	// 如果已经找到cookie，返回结果
	if (cookies.length > 0) {
	console.log(`总共提取到 ${cookies.length} 个Cookie`);
	return resolve(validateCookies(cookies));
	}

	// 使用csv-parser解析CSV文件
	const possibleTokenFields = ['token', 'cookie', 'value', 'Token', 'Cookie', 'Value', 'jwt', 'JWT'];

	fs.createReadStream(csvFilePath)
	.pipe(csv())
	.on('data', (row) => {
	// 检查所有可能的字段名
	for (const field of possibleTokenFields) {
	if (row[field]) {
	// 检查是否是JWT格式
	if (row[field].startsWith('ey') && row[field].includes('.')) {
	if (!cookies.includes(row[field])) {
	cookies.push(row[field]);
	}
	break;
	}
	// 检查是否是旧格式
	else if (row[field].includes('user_')) {
	if (!cookies.includes(row[field])) {
	cookies.push(row[field]);
	}
	break;
	}
	}
	}

	// 如果没有找到预定义的字段，遍历所有字段
	if (cookies.length === 0) {
	for (const field in row) {
	if (row[field] && typeof row[field] === 'string') {
	// 检查是否是JWT格式
	if (row[field].startsWith('ey') && row[field].includes('.')) {
	if (!cookies.includes(row[field])) {
	cookies.push(row[field]);
	}
	break;
	}
	// 检查是否是旧格式
	else if (row[field].includes('user_')) {
	if (!cookies.includes(row[field])) {
	cookies.push(row[field]);
	}
	break;
	}
	}
	}
	}
	})
	.on('end', () => {
	console.log(`从CSV解析中提取到 ${cookies.length} 个Cookie`);

	// 如果通过CSV解析没有找到cookie，尝试按行读取
	if (cookies.length === 0) {
	console.log('尝试按行读取文件...');
	const lines = fileContent.split('\n');
	for (const line of lines) {
	// 检查是否有JWT格式token
	if (line.includes('ey')) {
	const jwtMatches = line.match(jwtRegex);
	if (jwtMatches) {
	jwtMatches.forEach(match => {
	if (!cookies.includes(match)) {
	cookies.push(match);
	}
	});
	}
	}

	// 检查是否有旧格式cookie
	if (line.includes('user_')) {
	const extractedCookies = extractCookiesFromText(line);
	extractedCookies.forEach(cookie => {
	if (!cookies.includes(cookie)) {
	cookies.push(cookie);
	}
	});
	}
	}
	console.log(`按行读取后提取到 ${cookies.length} 个Cookie`);
	}

	// 验证提取的cookie是否完整
	const validatedCookies = validateCookies(cookies);

	resolve(validatedCookies);
	})
	.on('error', (error) => {
	console.error('解析CSV文件时出错:', error);

	// 如果已经提取到cookie，直接返回
	if (cookies.length > 0) {
	console.log(`解析出错但已提取到 ${cookies.length} 个Cookie，进行验证后返回`);
	resolve(validateCookies(cookies));
	} else {
	// 否则尝试其他方法提取
	console.log('尝试其他方法提取Cookie...');

	// 尝试提取JWT格式token
	const jwtMatches = fileContent.match(jwtRegex);
	if (jwtMatches) {
	jwtMatches.forEach(match => {
	if (!cookies.includes(match)) {
	cookies.push(match);
	}
	});
	}

	// 尝试提取旧格式cookie
	const oldFormatCookies = extractCookiesFromText(fileContent);
	oldFormatCookies.forEach(cookie => {
	if (!cookies.includes(cookie)) {
	cookies.push(cookie);
	}
	});

	console.log(`通过其他方法提取到 ${cookies.length} 个Cookie`);
	resolve(validateCookies(cookies));
	}
	});
	} catch (error) {
	console.error('提取Cookie时出错:', error);
	reject(error);
	}
	});
	}

	/**
	* 从文本中提取cookie
	* @param {string} text - 要提取cookie的文本
	* @returns {string[]} - 提取到的cookie数组
	*/
	function extractCookiesFromText(text) {
	const cookies = [];

	// 使用正则表达式匹配user_开头的cookie（旧格式）
	const oldFormatRegex = /user_[a-zA-Z0-9%]+%3A%3A[a-zA-Z0-9%\.\_\-]+/g;
	const oldFormatMatches = text.match(oldFormatRegex);

	if (oldFormatMatches) {
	oldFormatMatches.forEach(match => {
	if (!cookies.includes(match)) {
	cookies.push(match);
	}
	});
	}

	// 使用正则表达式匹配以ey开头的JWT格式cookie（新格式）
	const jwtRegex = /ey[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+/g;
	const jwtMatches = text.match(jwtRegex);

	if (jwtMatches) {
	jwtMatches.forEach(match => {
	if (!cookies.includes(match)) {
	cookies.push(match);
	}
	});
	}

	return cookies;
	}

	/**
	* 验证cookie是否完整
	* @param {string[]} cookies - 要验证的cookie数组
	* @returns {string[]} - 验证后的cookie数组
	*/
	function validateCookies(cookies) {
	return cookies.filter(cookie => {
	// 检查是否是新格式的JWT token (ey开头)
	if (cookie.startsWith('ey') && cookie.includes('.')) {
	const parts = cookie.split('.');
	// 检查JWT是否包含三个部分
	if (parts.length === 3) {
	return true; // cookie有效
	} else {
	console.warn(`检测到不完整的JWT(新格式): ${cookie}`);
	return false;
	}
	}
	// 检查旧格式cookie是否完整
	else if (cookie.includes('%3A%3A')) {
	const parts = cookie.split('%3A%3A');
	if (parts.length === 2) {
	const jwt = parts[1];
	// 检查JWT是否包含两个点（表示三个部分）
	if (jwt.includes('.') && jwt.split('.').length === 3) {
	return true; // cookie完整
	} else {
	console.warn(`检测到不完整的JWT(旧格式): ${cookie}`);
	return false;
	}
	}
	}
	return true; // 对于无法识别的格式，默认保留
	});
	}

	module.exports = {
	extractCookiesFromCsv
	};