add robots.txt
This commit is contained in:
		
							parent
							
								
									8cb98a5735
								
							
						
					
					
						commit
						91df2b8529
					
				| 
						 | 
				
			
			@ -0,0 +1,125 @@
 | 
			
		|||
# many entries lifted from https://seirdy.one/robots.txt
 | 
			
		||||
# several also come from https://radar.cloudflare.com/bots#verified-bots
 | 
			
		||||
# and still others from https://github.com/ai-robots-txt/ai.robots.txt
 | 
			
		||||
 | 
			
		||||
# adtech
 | 
			
		||||
 | 
			
		||||
User-Agent: peer39_crawler/1.0
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# IP-violation scanners
 | 
			
		||||
 | 
			
		||||
User-Agent: TurnitinBot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: AcademicBotRTU
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: NPBot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: SlySearch
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: BLEXBot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-agent: CheckMarkNetwork/1.0 (+https://www.checkmarknetwork.com/spider.html)
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-agent: BrandVerity/1.0
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Identity data scraper
 | 
			
		||||
 | 
			
		||||
User-agent: PiplBot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Gen-AI data scrapers
 | 
			
		||||
 | 
			
		||||
User-agent: GPTBot
 | 
			
		||||
User-agent: ChatGPT-User
 | 
			
		||||
User-agent: OAI-SearchBot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-agent: Google-Extended
 | 
			
		||||
User-Agent: GoogleOther
 | 
			
		||||
User-Agent: Google-CloudVertexBot
 | 
			
		||||
User-Agent: Gemini-Deep-Research
 | 
			
		||||
User-Agent: GoogleAgent-Mariner
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-agent: Applebot
 | 
			
		||||
User-agent: Applebot-Extended
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-agent: ClaudeBot
 | 
			
		||||
User-agent: Claude-User
 | 
			
		||||
User-agent: Claude-Web
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: FacebookBot
 | 
			
		||||
User-Agent: meta-externalagent
 | 
			
		||||
User-Agent: Meta-ExternalAgent
 | 
			
		||||
User-Agent: meta-externalfetcher
 | 
			
		||||
User-Agent: Meta-ExternalFetcher
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-agent: Cotoyogi
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-agent: Webzio-extended
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-agent: Kangaroo Bot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: GenAI
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: SemrushBot-OCOB
 | 
			
		||||
User-Agent: SemrushBot-FT
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: VelenPublicWebCrawler
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: Amazonbot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: PetalBot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: PerplexityBot
 | 
			
		||||
User-Agent: Perplexity-User
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: DuckAssistBot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: anthropic-ai
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: MistralAI-User
 | 
			
		||||
User-Agent: MistralAI-User/1.0
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: cohere-ai
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: Quora-Bot
 | 
			
		||||
Disallow: /
 | 
			
		||||
 | 
			
		||||
User-Agent: Scrapy
 | 
			
		||||
Disallow: /
 | 
			
		||||
		Loading…
	
		Reference in New Issue