diff --git a/robots.txt b/robots.txt new file mode 100644 index 0000000..f01ac7d --- /dev/null +++ b/robots.txt @@ -0,0 +1,125 @@ +# many entries lifted from https://seirdy.one/robots.txt +# several also come from https://radar.cloudflare.com/bots#verified-bots +# and still others from https://github.com/ai-robots-txt/ai.robots.txt + +# adtech + +User-Agent: peer39_crawler/1.0 +Disallow: / + + + + + +# IP-violation scanners + +User-Agent: TurnitinBot +Disallow: / + +User-Agent: AcademicBotRTU +Disallow: / + +User-Agent: NPBot +Disallow: / + +User-Agent: SlySearch +Disallow: / + +User-Agent: BLEXBot +Disallow: / + +User-agent: CheckMarkNetwork/1.0 (+https://www.checkmarknetwork.com/spider.html) +Disallow: / + +User-agent: BrandVerity/1.0 +Disallow: / + + + + +# Identity data scraper + +User-agent: PiplBot +Disallow: / + + + + + +# Gen-AI data scrapers + +User-agent: GPTBot +User-agent: ChatGPT-User +User-agent: OAI-SearchBot +Disallow: / + +User-agent: Google-Extended +User-Agent: GoogleOther +User-Agent: Google-CloudVertexBot +User-Agent: Gemini-Deep-Research +User-Agent: GoogleAgent-Mariner +Disallow: / + +User-agent: Applebot +User-agent: Applebot-Extended +Disallow: / + +User-agent: ClaudeBot +User-agent: Claude-User +User-agent: Claude-Web +Disallow: / + +User-Agent: FacebookBot +User-Agent: meta-externalagent +User-Agent: Meta-ExternalAgent +User-Agent: meta-externalfetcher +User-Agent: Meta-ExternalFetcher +Disallow: / + +User-agent: Cotoyogi +Disallow: / + +User-agent: Webzio-extended +Disallow: / + +User-agent: Kangaroo Bot +Disallow: / + +User-Agent: GenAI +Disallow: / + +User-Agent: SemrushBot-OCOB +User-Agent: SemrushBot-FT +Disallow: / + +User-Agent: VelenPublicWebCrawler +Disallow: / + +User-Agent: Amazonbot +Disallow: / + +User-Agent: PetalBot +Disallow: / + +User-Agent: PerplexityBot +User-Agent: Perplexity-User +Disallow: / + +User-Agent: DuckAssistBot +Disallow: / + +User-Agent: anthropic-ai +Disallow: / + +User-Agent: MistralAI-User +User-Agent: MistralAI-User/1.0 +Disallow: / + +User-Agent: cohere-ai +Disallow: / + +User-Agent: Quora-Bot +Disallow: / + +User-Agent: Scrapy +Disallow: /