# Atiga Restaurant — robots.txt
# https://www.atigarestaurant.ca

# ==========================================
# All crawlers — default rules
# ==========================================
User-agent: *
Allow: /

# Internal / non-public paths
Disallow: /config/
Disallow: /search/
Disallow: /account/
Disallow: /api/
Disallow: /static/
Disallow: /components/

# Block URL parameter junk
Disallow: /*?*author=*
Disallow: /*?*tag=*
Disallow: /*?*month=*
Disallow: /*?*view=*
Disallow: /*?*format=*

# Legal pages — do not index
Disallow: /privacy-policy.html
Disallow: /terms-of-use.html

# AI reference file — noindex (handled via X-Robots-Tag header; listed here for bots that respect it)
Noindex: /llms.txt

# ==========================================
# AI / LLM crawlers — allow full access
# (llms.txt is intentionally accessible to these bots)
# ==========================================
User-agent: GPTBot
User-agent: ChatGPT-User
User-agent: CCBot
User-agent: anthropic-ai
User-agent: Claude-Web
User-agent: Google-Extended
User-agent: FacebookBot
User-agent: cohere-ai
User-agent: PerplexityBot
User-agent: YouBot
Allow: /
Allow: /llms.txt
Disallow: /privacy-policy.html
Disallow: /terms-of-use.html

# ==========================================
# Google Ads bots — allow all
# ==========================================
User-agent: AdsBot-Google
User-agent: AdsBot-Google-Mobile
User-agent: AdsBot-Google-Mobile-Apps
Allow: /

# ==========================================
# Throttle heavy / low-value crawlers
# ==========================================
User-agent: Baiduspider
Crawl-delay: 10

User-agent: SemrushBot
Crawl-delay: 5

User-agent: AhrefsBot
Crawl-delay: 5

# ==========================================
# Sitemap
# ==========================================
Sitemap: https://www.atigarestaurant.ca/sitemap.xml