# ============================================ # Default rules - applies to all crawlers # ============================================ User-agent: * # WordPress core - admin and internals Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/themes/ Disallow: /wp-content/cache/ Disallow: /wp-content/upgrade/ Disallow: /wp-content/uploads/wpforms/ Disallow: /wp-json/ Disallow: /xmlrpc.php Disallow: /readme.html Disallow: /license.txt # Allow the admin-ajax.php file Allow: /wp-admin/admin-ajax.php # Login, registration, and user pages Disallow: /wp-login.php Disallow: /login/ Disallow: /register/ Disallow: /lost-password/ # Internal search results Disallow: /*?s= Disallow: /search/ # WordPress query parameters that create duplicate content Disallow: /*?replytocom= Disallow: /*?unapproved= Disallow: /*?attachment_id= Disallow: /*?p= Disallow: /*?preview=true # Feeds and trackbacks Disallow: /*/feed/ Disallow: /*/trackback/ Disallow: /comments/feed/ # Author archives Disallow: /author/ # ============================================ # Block AI training scrapers # ============================================ User-agent: GPTBot Disallow: / User-agent: ClaudeBot Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Google-Extended Disallow: / User-agent: PerplexityBot Disallow: / User-agent: Bytespider Disallow: / # ============================================ # Block known aggressive SEO scrapers # ============================================ User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / # ============================================ # Sitemap # ============================================ Sitemap: https://cm-sousel.pt/sitemap_index.xml