Refactor bot blocking setup
This commit is contained in:
63
package.json
63
package.json
@@ -1,33 +1,32 @@
|
||||
{
|
||||
"name": "helenchong.dev",
|
||||
"description": "Helen Chong's developer portfolio and blog website.",
|
||||
"author": "Helen Chong",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/helenclx/helenchong.dev.git"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "bunx eleventy --serve --quiet",
|
||||
"build": "bunx eleventy",
|
||||
"build-ghpages": "bunx eleventy"
|
||||
},
|
||||
"type": "module",
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.2.12"
|
||||
},
|
||||
"dependencies": {
|
||||
"@11ty/eleventy": "^3.1.0-beta.1",
|
||||
"@11ty/eleventy-fetch": "^5.1.0",
|
||||
"@11ty/eleventy-navigation": "^1.0.4",
|
||||
"@11ty/eleventy-plugin-rss": "^2.0.4",
|
||||
"@11ty/eleventy-plugin-syntaxhighlight": "^5.0.1",
|
||||
"@uncenter/eleventy-plugin-toc": "^1.0.3",
|
||||
"@zachleat/details-utils": "^2.0.2",
|
||||
"eleventy-plugin-embed-everything": "^1.21.0",
|
||||
"eleventy-plugin-vento": "^4.2.1",
|
||||
"eleventy-plugin-wordcount-extended": "^0.2.1",
|
||||
"markdown-it-anchor": "^9.2.0",
|
||||
"markdown-it-attrs": "^4.3.1",
|
||||
"markdown-it-bracketed-spans": "^1.0.1"
|
||||
}
|
||||
}
|
||||
"name": "helenchong.dev",
|
||||
"description": "Helen Chong's developer portfolio and blog website.",
|
||||
"author": "Helen Chong",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/helenclx/helenchong.dev.git"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "bunx eleventy --serve --quiet",
|
||||
"build": "bunx eleventy",
|
||||
"build-ghpages": "bunx eleventy"
|
||||
},
|
||||
"type": "module",
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.2.12"
|
||||
},
|
||||
"dependencies": {
|
||||
"@11ty/eleventy": "^3.1.0-beta.1",
|
||||
"@11ty/eleventy-navigation": "^1.0.4",
|
||||
"@11ty/eleventy-plugin-rss": "^2.0.4",
|
||||
"@11ty/eleventy-plugin-syntaxhighlight": "^5.0.1",
|
||||
"@uncenter/eleventy-plugin-toc": "^1.0.3",
|
||||
"@zachleat/details-utils": "^2.0.2",
|
||||
"eleventy-plugin-embed-everything": "^1.21.0",
|
||||
"eleventy-plugin-vento": "^4.2.1",
|
||||
"eleventy-plugin-wordcount-extended": "^0.2.1",
|
||||
"markdown-it-anchor": "^9.2.0",
|
||||
"markdown-it-attrs": "^4.3.1",
|
||||
"markdown-it-bracketed-spans": "^1.0.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,32 +1,65 @@
|
||||
/*
|
||||
Modified from Robb Knight's script:
|
||||
https://rknight.me/blog/blocking-bots-with-nginx/
|
||||
*/
|
||||
const blockedUserAgents = [
|
||||
"AI2Bot",
|
||||
"Ai2Bot-Dolma",
|
||||
"aiHitBot",
|
||||
"Amazonbot",
|
||||
"anthropic-ai",
|
||||
"Applebot-Extended",
|
||||
"Brightbot 1.0",
|
||||
"Bytespider",
|
||||
"ChatGPT-User",
|
||||
"Claude-Web",
|
||||
"ClaudeBot",
|
||||
"cohere-ai",
|
||||
"cohere-training-data-crawler",
|
||||
"Cotoyogi",
|
||||
"Crawlspace",
|
||||
"Diffbot",
|
||||
"DuckAssistBot",
|
||||
"FacebookBot",
|
||||
"Factset_spyderbot",
|
||||
"FirecrawlAgent",
|
||||
"FriendlyCrawler",
|
||||
"Google-Extended",
|
||||
"GoogleOther",
|
||||
"GoogleOther-Image",
|
||||
"GoogleOther-Video",
|
||||
"GPTBot",
|
||||
"iaskspider/2.0",
|
||||
"ICC-Crawler",
|
||||
"ImagesiftBot",
|
||||
"img2dataset",
|
||||
"imgproxy",
|
||||
"ISSCyberRiskCrawler",
|
||||
"Kangaroo Bot",
|
||||
"meta-externalagent",
|
||||
"Meta-ExternalAgent",
|
||||
"meta-externalfetcher",
|
||||
"Meta-ExternalFetcher",
|
||||
"NovaAct",
|
||||
"OAI-SearchBot",
|
||||
"omgili",
|
||||
"omgilibot",
|
||||
"Operator",
|
||||
"PanguBot",
|
||||
"Perplexity-User",
|
||||
"PerplexityBot",
|
||||
"PetalBot",
|
||||
"Scrapy",
|
||||
"SemrushBot-OCOB",
|
||||
"SemrushBot-SWA",
|
||||
"Sidetrade indexer bot",
|
||||
"TikTokSpider",
|
||||
"Timpibot",
|
||||
"VelenPublicWebCrawler",
|
||||
"Webzio-Extended",
|
||||
"YouBot",
|
||||
];
|
||||
|
||||
import EleventyFetch from "@11ty/eleventy-fetch";
|
||||
const txt = blockedUserAgents.map((bot) => `User-agent: ${bot}`).join("\n");
|
||||
const htaccess = blockedUserAgents.join("|");
|
||||
|
||||
export default async function () {
|
||||
const url = "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt";
|
||||
let txt = await EleventyFetch(url, {
|
||||
duration: "1w",
|
||||
type: "text",
|
||||
});
|
||||
|
||||
const botExceptions = ["Applebot", "CCBot"];
|
||||
const botExceptionsFullStr = botExceptions.map(bot => "User-agent: " + bot)
|
||||
|
||||
txt = txt
|
||||
.split("\n")
|
||||
.filter((line) => !botExceptionsFullStr.includes(line))
|
||||
.join("\n");
|
||||
|
||||
const bots = txt
|
||||
.split("\n")
|
||||
.filter((line) => line.startsWith("User-agent:"))
|
||||
.map((line) => line.split(":")[1].trim().replace(/\s/gi, ".*"));
|
||||
|
||||
return {
|
||||
txt: txt,
|
||||
htaccess: bots.join('|'),
|
||||
};
|
||||
export default {
|
||||
txt: txt,
|
||||
htaccess: htaccess.replace(/\s/gi, ".*"),
|
||||
}
|
||||
|
||||
@@ -6,5 +6,6 @@ User-agent: *
|
||||
Disallow:
|
||||
|
||||
{{ robots.txt }}
|
||||
Disallow: /
|
||||
|
||||
Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml
|
||||
Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml
|
||||
|
||||
Reference in New Issue
Block a user