# robots.txt — Wasema.com # Updated: 2026-06-06 # Strategy: Allow AI SEARCH bots (citations + referral traffic), block AI TRAINING bots (no traffic in return) # ───────────────────────────────────────────── # ✅ ALLOW — Search-focused AI bots (bring citations and referral traffic) # ───────────────────────────────────────────── User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / User-agent: PerplexityBot Allow: / User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: Applebot-Extended Allow: / User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Bingbot Allow: / # ───────────────────────────────────────────── # ❌ BLOCK — Training-only bots (scrape content, give nothing back) # ───────────────────────────────────────────── # OpenAI training crawler (different from ChatGPT-User search bot) User-agent: GPTBot Disallow: / # Google Gemini training (does NOT affect Google Search rankings — safe to block) User-agent: Google-Extended Disallow: / # Anthropic training crawler (separate from ClaudeBot search citations) User-agent: anthropic-ai Disallow: / # Common Crawl — feeds many AI training datasets User-agent: CCBot Disallow: / # ByteDance / TikTok training crawler User-agent: Bytespider Disallow: / # Cohere AI training User-agent: cohere-ai Disallow: / # ───────────────────────────────────────────── # General crawl settings # ───────────────────────────────────────────── User-agent: * Allow: / Disallow: /admin/ Disallow: /login Disallow: /register Disallow: /go/ Crawl-delay: 2 # Sitemap Sitemap: https://wasema.com/sitemap.xml # LLM context file (new 2026 standard) # LLMs: https://wasema.com/llms.txt