# cyclearchive.com — single source of truth for robots rules.
# (Served from static/. There is intentionally no templates/robots.txt;
#  a static file and a rendered template would otherwise both claim /robots.txt.)

User-agent: *
Allow: /
# In-browser PDF viewer app — no indexable content of its own.
Disallow: /pdfjs/
# /magazines/ and /books/ are thin "Redirect" stubs that bounce to the homepage
# (redirect_to). The $ anchors the exact path so real pages below them
# (/magazines/the-wheel/ etc.) stay crawlable.
Disallow: /magazines/$
Disallow: /books/$

Sitemap: https://cyclearchive.com/sitemap.xml

# AI crawlers are intentionally allowed (no GPTBot/CCBot/etc. blocks).
#
# NOTE: raw PDFs are served from the Digital Ocean Spaces CDN
# (cycle-archive.lon1.cdn.digitaloceanspaces.com). robots.txt rules only apply
# to the host that serves the file, so to discourage crawling of the raw PDFs a
# robots.txt must be hosted on the CDN domain itself — it cannot be done here.