dairydemon.net/robots.txt

126 lines
1.8 KiB
Plaintext

# many entries lifted from https://seirdy.one/robots.txt
# several also come from https://radar.cloudflare.com/bots#verified-bots
# and still others from https://github.com/ai-robots-txt/ai.robots.txt
# adtech
User-Agent: peer39_crawler/1.0
Disallow: /
# IP-violation scanners
User-Agent: TurnitinBot
Disallow: /
User-Agent: AcademicBotRTU
Disallow: /
User-Agent: NPBot
Disallow: /
User-Agent: SlySearch
Disallow: /
User-Agent: BLEXBot
Disallow: /
User-agent: CheckMarkNetwork/1.0 (+https://www.checkmarknetwork.com/spider.html)
Disallow: /
User-agent: BrandVerity/1.0
Disallow: /
# Identity data scraper
User-agent: PiplBot
Disallow: /
# Gen-AI data scrapers
User-agent: GPTBot
User-agent: ChatGPT-User
User-agent: OAI-SearchBot
Disallow: /
User-agent: Google-Extended
User-Agent: GoogleOther
User-Agent: Google-CloudVertexBot
User-Agent: Gemini-Deep-Research
User-Agent: GoogleAgent-Mariner
Disallow: /
User-agent: Applebot
User-agent: Applebot-Extended
Disallow: /
User-agent: ClaudeBot
User-agent: Claude-User
User-agent: Claude-Web
Disallow: /
User-Agent: FacebookBot
User-Agent: meta-externalagent
User-Agent: Meta-ExternalAgent
User-Agent: meta-externalfetcher
User-Agent: Meta-ExternalFetcher
Disallow: /
User-agent: Cotoyogi
Disallow: /
User-agent: Webzio-extended
Disallow: /
User-agent: Kangaroo Bot
Disallow: /
User-Agent: GenAI
Disallow: /
User-Agent: SemrushBot-OCOB
User-Agent: SemrushBot-FT
Disallow: /
User-Agent: VelenPublicWebCrawler
Disallow: /
User-Agent: Amazonbot
Disallow: /
User-Agent: PetalBot
Disallow: /
User-Agent: PerplexityBot
User-Agent: Perplexity-User
Disallow: /
User-Agent: DuckAssistBot
Disallow: /
User-Agent: anthropic-ai
Disallow: /
User-Agent: MistralAI-User
User-Agent: MistralAI-User/1.0
Disallow: /
User-Agent: cohere-ai
Disallow: /
User-Agent: Quora-Bot
Disallow: /
User-Agent: Scrapy
Disallow: /