126 lines
1.8 KiB
Plaintext
126 lines
1.8 KiB
Plaintext
# many entries lifted from https://seirdy.one/robots.txt
|
|
# several also come from https://radar.cloudflare.com/bots#verified-bots
|
|
# and still others from https://github.com/ai-robots-txt/ai.robots.txt
|
|
|
|
# adtech
|
|
|
|
User-Agent: peer39_crawler/1.0
|
|
Disallow: /
|
|
|
|
|
|
|
|
|
|
|
|
# IP-violation scanners
|
|
|
|
User-Agent: TurnitinBot
|
|
Disallow: /
|
|
|
|
User-Agent: AcademicBotRTU
|
|
Disallow: /
|
|
|
|
User-Agent: NPBot
|
|
Disallow: /
|
|
|
|
User-Agent: SlySearch
|
|
Disallow: /
|
|
|
|
User-Agent: BLEXBot
|
|
Disallow: /
|
|
|
|
User-agent: CheckMarkNetwork/1.0 (+https://www.checkmarknetwork.com/spider.html)
|
|
Disallow: /
|
|
|
|
User-agent: BrandVerity/1.0
|
|
Disallow: /
|
|
|
|
|
|
|
|
|
|
# Identity data scraper
|
|
|
|
User-agent: PiplBot
|
|
Disallow: /
|
|
|
|
|
|
|
|
|
|
|
|
# Gen-AI data scrapers
|
|
|
|
User-agent: GPTBot
|
|
User-agent: ChatGPT-User
|
|
User-agent: OAI-SearchBot
|
|
Disallow: /
|
|
|
|
User-agent: Google-Extended
|
|
User-Agent: GoogleOther
|
|
User-Agent: Google-CloudVertexBot
|
|
User-Agent: Gemini-Deep-Research
|
|
User-Agent: GoogleAgent-Mariner
|
|
Disallow: /
|
|
|
|
User-agent: Applebot
|
|
User-agent: Applebot-Extended
|
|
Disallow: /
|
|
|
|
User-agent: ClaudeBot
|
|
User-agent: Claude-User
|
|
User-agent: Claude-Web
|
|
Disallow: /
|
|
|
|
User-Agent: FacebookBot
|
|
User-Agent: meta-externalagent
|
|
User-Agent: Meta-ExternalAgent
|
|
User-Agent: meta-externalfetcher
|
|
User-Agent: Meta-ExternalFetcher
|
|
Disallow: /
|
|
|
|
User-agent: Cotoyogi
|
|
Disallow: /
|
|
|
|
User-agent: Webzio-extended
|
|
Disallow: /
|
|
|
|
User-agent: Kangaroo Bot
|
|
Disallow: /
|
|
|
|
User-Agent: GenAI
|
|
Disallow: /
|
|
|
|
User-Agent: SemrushBot-OCOB
|
|
User-Agent: SemrushBot-FT
|
|
Disallow: /
|
|
|
|
User-Agent: VelenPublicWebCrawler
|
|
Disallow: /
|
|
|
|
User-Agent: Amazonbot
|
|
Disallow: /
|
|
|
|
User-Agent: PetalBot
|
|
Disallow: /
|
|
|
|
User-Agent: PerplexityBot
|
|
User-Agent: Perplexity-User
|
|
Disallow: /
|
|
|
|
User-Agent: DuckAssistBot
|
|
Disallow: /
|
|
|
|
User-Agent: anthropic-ai
|
|
Disallow: /
|
|
|
|
User-Agent: MistralAI-User
|
|
User-Agent: MistralAI-User/1.0
|
|
Disallow: /
|
|
|
|
User-Agent: cohere-ai
|
|
Disallow: /
|
|
|
|
User-Agent: Quora-Bot
|
|
Disallow: /
|
|
|
|
User-Agent: Scrapy
|
|
Disallow: /
|