图集短视频去水印云函数开发实践——小红书
前两篇主要讲解了抖音和快手的图集短视频对去水印解析的云函数开发实践,今天说一些小红书图集解析的云函数实践。
图集短视频去水印云函数开发实践——抖音
图集短视频去水印云函数开发实践——快手
其实都是大差不差的,首先获取到小红书的分享链接,然后重定向到原地址,然后直接请求这个地址,获取到网页HTML,直接从中提取,这里比抖音快手还要简单一些。具体代码如下:
async getRedirectUrl(url) {
try {
const response = await this.curl(url, {
method: "GET",
headers: this.headers,
followRedirect: false,
});
return this.safeGet(response, 'headers.location', url);
} catch (error) {
console.error("获取重定向URL时出错:", error);
throw error;
}
}
async getHtml(url) {
try {
const response = await this.curl(url, {
headers: this.headers,
dataType: "text",
});
return this.safeGet(response, 'data', null);
} catch (error) {
console.error("获取网页内容失败:", error);
return null;
}
}
parseHtml(html) {
const jsonMatch = html.match(
/<script>window\.__INITIAL_STATE__=(.*?)<\/script>/
);
if (!jsonMatch || jsonMatch.length < 2) {
console.error("无法找到笔记信息");
return null;
}
try {
let jsonString = jsonMatch[1].replace(/undefined/g, "null");
const data = JSON.parse(jsonString);
const noteId = Object.keys(this.safeGet(data, 'note.noteDetailMap', {}))[0];
if (!noteId) {
console.error("无法找到笔记ID");
return null;
}
const noteData = this.safeGet(data, `note.noteDetailMap.${noteId}.note`, null);
if (!noteData) {
console.error("无法获取笔记数据");
return null;
}
const result = {
title: this.safeGet(noteData, 'title', ''),
desc: this.safeGet(noteData, 'desc', ''),
type: this.safeGet(noteData, 'type', ''),
user: {
nickname: this.safeGet(noteData, 'user.nickname', ''),
avatar: this.safeGet(noteData, 'user.avatar', ''),
userId: this.safeGet(noteData, 'user.userId', ''),
},
time: this.safeGet(noteData, 'time', ''),
likes: this.safeGet(noteData, 'interactInfo.likedCount', '0'),
comments: this.safeGet(noteData, 'interactInfo.commentCount', '0'),
collects: this.safeGet(noteData, 'interactInfo.collectedCount', '0'),
view_count: this.safeGet(noteData, 'interactInfo.viewCount', '0'),
share_count: this.safeGet(noteData, 'interactInfo.shareCount', '0'),
platform: "xiaohongshu",
};
if (noteData.type === "video") {
result.video = {
url: this.safeGet(noteData, 'video.media.stream.h264.0.masterUrl', ''),
cover: this.safeGet(noteData, 'video.cover.url', ''),
};
} else {
result.images = this.safeGet(noteData, 'imageList', []).map((img) => ({
url: this.safeGet(img, 'urlDefault', '') || this.safeGet(img, 'url', ''),
width: this.safeGet(img, 'width', 0),
height: this.safeGet(img, 'height', 0),
}));
}
return result;
} catch (error) {
console.error("解析笔记信息失败:", error);
return null;
}
}
// 辅助方法:将字符串解析为数字
parseNumber(value) {
if (typeof value === "number") return value;
if (!value) return 0;
const num = parseInt(value.replace(/[^0-9]/g, ""));
return isNaN(num) ? 0 : num;
}
safeGet(obj, path, defaultValue = '') {
return path.split('.').reduce((acc, part) => {
if (acc && typeof acc === 'object' && part in acc) {
return acc[part];
}
return defaultValue;
}, obj);
}
没那么多废话了,看代码应该就可以明白了,不明白的留言问就好了。