# robots.txt for https://dramsch.net # Last updated: 2025 # Primary sitemap reference Sitemap: https://dramsch.net/sitemapindex.xml # ========================================== # Search Engine Crawlers - Full Access # ========================================== User-agent: Googlebot User-agent: Bingbot User-agent: Slurp User-agent: DuckDuckBot User-agent: Baiduspider User-agent: YandexBot User-agent: AhrefsBot User-agent: SemrushBot User-agent: MJ12bot User-agent: DotBot Allow: / Crawl-delay: 1 # ========================================== # Social Media Crawlers - Full Access # ========================================== User-agent: FacebookBot User-agent: facebookexternalhit User-agent: Meta-ExternalAgent User-agent: LinkedInBot User-agent: WhatsApp User-agent: TwitterBot User-agent: Twitterbot User-agent: Pinterest User-agent: Pinterestbot Allow: / # ========================================== # AI Training Scrapers - Blocked # ========================================== User-agent: GPTBot User-agent: ChatGPT-User User-agent: CCBot User-agent: anthropic-ai User-agent: Claude-Web User-agent: ClaudeBot User-agent: cohere-ai User-agent: Bytespider User-agent: img2dataset User-agent: Scrapy User-agent: Timpibot User-agent: VelenPublicWebCrawler User-agent: YouBot User-agent: PetalBot User-agent: omgili User-agent: omgilibot User-agent: Diffbot User-agent: Apify User-agent: DataForSeoBot Disallow: / # ========================================== # Search Engine AI Features - Allowed # ========================================== User-agent: Google-Extended User-agent: GoogleOther User-agent: GoogleOther-Image User-agent: GoogleOther-Video User-agent: Applebot-Extended User-agent: perplexity-ai User-agent: PerplexityBot User-agent: OAI-SearchBot User-agent: Amazonbot Allow: / # ========================================== # All Other Crawlers - Standard Rules # ========================================== User-agent: \* # Block private/temporary pages Disallow: /thank-you/ Disallow: /goodbye/ Disallow: /files/ Disallow: /admin/ Disallow: /private/ Disallow: /tmp/ Disallow: /temp/ Disallow: /cache/ Disallow: /.git/ Disallow: /.claude/ # Block duplicate content paths Disallow: /tag/ Disallow: /category/ Allow: /tag/machine-learning/ Allow: /tag/python/ Allow: /tag/data-science/ Allow: /category/tutorial/ Allow: /category/research/ # Block search and filter pages Disallow: /*?*sort= Disallow: /*?*filter= Disallow: /*?*page= Disallow: /search? Disallow: /search/ # Allow important assets Allow: /images/ Allow: /css/ Allow: /js/ Allow: /_.css$ Allow: /_.js$ Allow: /*.jpg$ Allow: /_.jpeg$ Allow: /_.png$ Allow: /*.webp$ Allow: /\*.svg$ # ========================================== # Crawl Rate Recommendations # ========================================== # Respect crawl delays for non-critical bots User-agent: SemrushBot Crawl-delay: 2 User-agent: AhrefsBot Crawl-delay: 2 User-agent: MJ12bot Crawl-delay: 5 User-agent: DotBot Crawl-delay: 5 # ========================================== # Additional Directives # ========================================== # Specify the preferred host (without www) Host: dramsch.net # Clean URLs - help crawlers understand URL structure Clean-param: utm_source&utm_medium&utm_campaign&utm_term&utm_content / Clean-param: ref&source / # Request indexing of important sections User-agent: \* Allow: /writing/ Allow: /projects/ # ========================================== # Bad Bots - Complete Block # ========================================== User-agent: SEOkicks User-agent: Alexibot User-agent: SurveyBot User-agent: Xenu's User-agent: Xenu's Link Sleuth 1.1c User-agent: rogerbot User-agent: 360Spider User-agent: psbot User-agent: TurnitinBot User-agent: Clickagy User-agent: Nutch User-agent: BLEXBot User-agent: Ezooms User-agent: Majestic-12 User-agent: Majestic-SEO User-agent: DSearch User-agent: MegaIndex User-agent: BlekkoBot User-agent: NerdyBot User-agent: JamesBOT User-agent: TinEye User-agent: TinEye-bot User-agent: Konqueror User-agent: SearchmetricsBot User-agent: SeznamBot User-agent: ExtLinksBot User-agent: Mail.RU_Bot User-agent: spbot User-agent: LinkpadBot User-agent: BDCbot User-agent: Updownerbot User-agent: VoilaBot User-agent: ZumBot User-agent: YoudaoBot User-agent: ApiTool User-agent: Thumbnail.CZ robot User-agent: MJ12bot User-agent: heritrix User-agent: CopyRightCheck User-agent: Aboundex User-agent: Bytedance User-agent: Bytedance Spider User-agent: magpie-crawler Disallow: / # ========================================== # End of robots.txt # ==========================================