Refactor bot blocking setup

This commit is contained in:
2025-05-09 18:28:05 +08:00
parent 74a3b7764d
commit b5ae44c822
3 changed files with 95 additions and 62 deletions

View File

@@ -1,33 +1,32 @@
{
"name": "helenchong.dev",
"description": "Helen Chong's developer portfolio and blog website.",
"author": "Helen Chong",
"repository": {
"type": "git",
"url": "https://github.com/helenclx/helenchong.dev.git"
},
"scripts": {
"start": "bunx eleventy --serve --quiet",
"build": "bunx eleventy",
"build-ghpages": "bunx eleventy"
},
"type": "module",
"devDependencies": {
"@types/bun": "^1.2.12"
},
"dependencies": {
"@11ty/eleventy": "^3.1.0-beta.1",
"@11ty/eleventy-fetch": "^5.1.0",
"@11ty/eleventy-navigation": "^1.0.4",
"@11ty/eleventy-plugin-rss": "^2.0.4",
"@11ty/eleventy-plugin-syntaxhighlight": "^5.0.1",
"@uncenter/eleventy-plugin-toc": "^1.0.3",
"@zachleat/details-utils": "^2.0.2",
"eleventy-plugin-embed-everything": "^1.21.0",
"eleventy-plugin-vento": "^4.2.1",
"eleventy-plugin-wordcount-extended": "^0.2.1",
"markdown-it-anchor": "^9.2.0",
"markdown-it-attrs": "^4.3.1",
"markdown-it-bracketed-spans": "^1.0.1"
}
}
"name": "helenchong.dev",
"description": "Helen Chong's developer portfolio and blog website.",
"author": "Helen Chong",
"repository": {
"type": "git",
"url": "https://github.com/helenclx/helenchong.dev.git"
},
"scripts": {
"start": "bunx eleventy --serve --quiet",
"build": "bunx eleventy",
"build-ghpages": "bunx eleventy"
},
"type": "module",
"devDependencies": {
"@types/bun": "^1.2.12"
},
"dependencies": {
"@11ty/eleventy": "^3.1.0-beta.1",
"@11ty/eleventy-navigation": "^1.0.4",
"@11ty/eleventy-plugin-rss": "^2.0.4",
"@11ty/eleventy-plugin-syntaxhighlight": "^5.0.1",
"@uncenter/eleventy-plugin-toc": "^1.0.3",
"@zachleat/details-utils": "^2.0.2",
"eleventy-plugin-embed-everything": "^1.21.0",
"eleventy-plugin-vento": "^4.2.1",
"eleventy-plugin-wordcount-extended": "^0.2.1",
"markdown-it-anchor": "^9.2.0",
"markdown-it-attrs": "^4.3.1",
"markdown-it-bracketed-spans": "^1.0.1"
}
}

View File

@@ -1,32 +1,65 @@
/*
Modified from Robb Knight's script:
https://rknight.me/blog/blocking-bots-with-nginx/
*/
const blockedUserAgents = [
"AI2Bot",
"Ai2Bot-Dolma",
"aiHitBot",
"Amazonbot",
"anthropic-ai",
"Applebot-Extended",
"Brightbot 1.0",
"Bytespider",
"ChatGPT-User",
"Claude-Web",
"ClaudeBot",
"cohere-ai",
"cohere-training-data-crawler",
"Cotoyogi",
"Crawlspace",
"Diffbot",
"DuckAssistBot",
"FacebookBot",
"Factset_spyderbot",
"FirecrawlAgent",
"FriendlyCrawler",
"Google-Extended",
"GoogleOther",
"GoogleOther-Image",
"GoogleOther-Video",
"GPTBot",
"iaskspider/2.0",
"ICC-Crawler",
"ImagesiftBot",
"img2dataset",
"imgproxy",
"ISSCyberRiskCrawler",
"Kangaroo Bot",
"meta-externalagent",
"Meta-ExternalAgent",
"meta-externalfetcher",
"Meta-ExternalFetcher",
"NovaAct",
"OAI-SearchBot",
"omgili",
"omgilibot",
"Operator",
"PanguBot",
"Perplexity-User",
"PerplexityBot",
"PetalBot",
"Scrapy",
"SemrushBot-OCOB",
"SemrushBot-SWA",
"Sidetrade indexer bot",
"TikTokSpider",
"Timpibot",
"VelenPublicWebCrawler",
"Webzio-Extended",
"YouBot",
];
import EleventyFetch from "@11ty/eleventy-fetch";
const txt = blockedUserAgents.map((bot) => `User-agent: ${bot}`).join("\n");
const htaccess = blockedUserAgents.join("|");
export default async function () {
const url = "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt";
let txt = await EleventyFetch(url, {
duration: "1w",
type: "text",
});
const botExceptions = ["Applebot", "CCBot"];
const botExceptionsFullStr = botExceptions.map(bot => "User-agent: " + bot)
txt = txt
.split("\n")
.filter((line) => !botExceptionsFullStr.includes(line))
.join("\n");
const bots = txt
.split("\n")
.filter((line) => line.startsWith("User-agent:"))
.map((line) => line.split(":")[1].trim().replace(/\s/gi, ".*"));
return {
txt: txt,
htaccess: bots.join('|'),
};
export default {
txt: txt,
htaccess: htaccess.replace(/\s/gi, ".*"),
}

View File

@@ -6,5 +6,6 @@ User-agent: *
Disallow:
{{ robots.txt }}
Disallow: /
Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml
Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml