# ======================================================================
# robots.txt for https://propt.global
# Goal: maximum visibility in classic search AND AI search / answer engines.
# Last updated: 2026-06-29
# NOTE: robots.txt grants permission to CRAWL. Getting cited also needs
# server-side rendering, clean structure and schema.
# A crawler with its own named group below ignores the "*" group, so each
# allowed bot simply sees "Allow: /".
# ======================================================================

# ----------------------------------------------------------------------
# DEFAULT — allow every crawler to read all public content
# ----------------------------------------------------------------------
User-agent: *
Allow: /

# Keep non-public areas out of ALL crawlers:
# Disallow: /admin/
# Disallow: /wp-admin/
# Disallow: /api/
# Disallow: /search

# ----------------------------------------------------------------------
# AI SEARCH / CITATION CRAWLERS
# ----------------------------------------------------------------------

# OpenAI — ChatGPT search results & user-triggered fetches
User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# Anthropic — Claude search citations & user-triggered fetches
User-agent: Claude-SearchBot
Allow: /

User-agent: Claude-User
Allow: /

# Perplexity — index & user-triggered fetches
User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

# Microsoft Bing — powers Bing + Copilot
User-agent: Bingbot
Allow: /

# Google — powers Search + AI Overviews + AI Mode
User-agent: Googlebot
Allow: /

# Apple — Siri / Spotlight / Apple Intelligence answers
User-agent: Applebot
Allow: /

# Amazon — Alexa / Rufus answers
User-agent: Amazonbot
Allow: /

# DuckDuckGo — DuckAssist answers
User-agent: DuckAssistBot
Allow: /

# Meta AI
User-agent: meta-externalagent
Allow: /

# ----------------------------------------------------------------------
# AI TRAINING / GROUNDING TOKENS  
# ----------------------------------------------------------------------

# OpenAI training crawler
User-agent: GPTBot
Allow: /

# Anthropic training crawler
User-agent: ClaudeBot
Allow: /

# Google — Gemini / Vertex training & grounding
User-agent: Google-Extended
Allow: /

# Apple — Apple Intelligence training
User-agent: Applebot-Extended
Allow: /

# Common Crawl (feeds many open LLM datasets)
User-agent: CCBot
Allow: /

# ----------------------------------------------------------------------
# SITEMAPS
# ----------------------------------------------------------------------
Sitemap: https://propt.global/sitemap.xml
Sitemap: https://propt.global/news-sitemap.xml