diff --git a/html/robots.txt b/html/robots.txt
index 52f6d45a..e92e503a 100644
--- a/html/robots.txt
+++ b/html/robots.txt
@@ -1,6 +1,16 @@
-User-agent: SiteimproveBot
-Disallow: /versions
-Disallow: /old_site
-
+# General bots
+# Note: Google does not respect Crawl-delay, but that's fine.
User-agent: *
-Allow: /
+Allow: /project/*/users/*
+Disallow: /project/
+Disallow: /api/
+Crawl-delay: 10
+
+# AI crawlers — slower, same path rules duplicated
+User-agent: GPTBot
+User-agent: ClaudeBot
+User-agent: PerplexityBot
+Allow: /project/*/users/*
+Disallow: /project/
+Disallow: /api/
+Crawl-delay: 30
diff --git a/models/projects.lua b/models/projects.lua
index 7f9aa476..986d469d 100644
--- a/models/projects.lua
+++ b/models/projects.lua
@@ -117,7 +117,7 @@ local ActiveProjects = Model:extend('active_projects', {
'#present:Username=' .. escape(self.username) ..
'&ProjectName=' .. escape(self.projectname) ..
'&editMode&noRun',
- download = '/project/' .. escape(self.id),
+ download = '/api/v1/project/' .. escape(self.id),
site = '/project?username=' .. escape(self.username) ..
'&projectname=' .. escape(self.projectname),
author = '/user?username=' .. escape(self.username),
diff --git a/nginx.conf b/nginx.conf
index 1b6f60a4..d751fdb1 100644
--- a/nginx.conf
+++ b/nginx.conf
@@ -46,6 +46,7 @@ http {
include nginx.conf.d/logging.conf;
include nginx.conf.d/mime.types;
+ include nginx.conf.d/snap-bot-mitigation.conf;
resolver ${{DNS_RESOLVER}};
diff --git a/nginx.conf.d/locations.conf b/nginx.conf.d/locations.conf
index 2b2998c6..468349f1 100644
--- a/nginx.conf.d/locations.conf
+++ b/nginx.conf.d/locations.conf
@@ -1,6 +1,10 @@
# Shared location configurations for all environments
# These are included in both the main SSL server block (prod) and non-SSL server block (dev).
+# Bot mitigation (robots.txt + UA blocklist). Included here so it is applied
+# to every server block — both prod hosts, both staging hosts, and dev.
+include nginx.conf.d/snap-bot-mitigation-server.conf;
+
# Specify the cloud domain each page should use.
set_by_lua_block $cloud_url { return os.getenv('CLOUD_URL') }
@@ -48,6 +52,19 @@ location @lapisapp {
}
}
+# Raw project XML at /project/ — Lapis-served and the largest
+# crawler egress source, so it gets rate-limited via the snap_project zone
+# (defined in snap-bot-mitigation.conf). The upcoming HTML route
+# /project//users/ does not match this regex and is unaffected.
+location ~ ^/project/[0-9]+/?$ {
+ limit_req zone=snap_project burst=120 nodelay;
+ access_log logs/lapis_access.log main_ext if=$should_log;
+ default_type text/html;
+ content_by_lua_block {
+ require("lapis").serve("app")
+ }
+}
+
# Static content from snapCloud
# Avoid unnecessarily logging data for CSS, JS, etc.
location /static/ {
diff --git a/nginx.conf.d/snap-bot-mitigation-server.conf b/nginx.conf.d/snap-bot-mitigation-server.conf
new file mode 100644
index 00000000..163d475a
--- /dev/null
+++ b/nginx.conf.d/snap-bot-mitigation-server.conf
@@ -0,0 +1,18 @@
+# Bot-mitigation directives that live in server{} context. Included from
+# locations.conf so every server block (prod + staging, both hosts, plus
+# dev) picks them up. The http-context map and limit_req_zone live in
+# nginx.conf.d/snap-bot-mitigation.conf.
+
+# Serve robots.txt as a static file from the repo. Host-independent — does
+# not depend on the per-host static root or the Lapis app.
+location = /robots.txt {
+ access_log off;
+ default_type text/plain;
+ root html;
+}
+
+# 403 any user-agent flagged by $snap_block_ua. `return` is one of the few
+# directives that is safe to use inside `if`.
+if ($snap_block_ua) {
+ return 403;
+}
diff --git a/nginx.conf.d/snap-bot-mitigation.conf b/nginx.conf.d/snap-bot-mitigation.conf
new file mode 100644
index 00000000..51329a0d
--- /dev/null
+++ b/nginx.conf.d/snap-bot-mitigation.conf
@@ -0,0 +1,24 @@
+# Bot-mitigation directives that must live in the http{} context. The
+# matching server-context directives are in snap-bot-mitigation-server.conf,
+# included from locations.conf.
+
+# UA blocklist for SEO crawlers that ignore robots.txt. \b word-boundaries
+# guard against substring false positives (e.g. "yeti" inside a longer UA).
+map $http_user_agent $snap_block_ua {
+ default 0;
+ "~*ahrefsbot" 1;
+ "~*semrushbot" 1;
+ "~*\bdotbot\b" 1;
+ "~*mj12bot" 1;
+ "~*sleepbot" 1;
+ "~*\byeti\b" 1;
+ "~*blexbot" 1;
+ "~*petalbot" 1;
+}
+
+# Rate-limit zone for the raw XML at /project/. The rate is
+# deliberately generous because Snap! is used in classrooms where 20–40
+# students share one NAT IP; tighten only with evidence from error.log
+# ("limiting requests" lines).
+limit_req_zone $binary_remote_addr zone=snap_project:10m rate=60r/s;
+limit_req_status 429;
diff --git a/views/partials/flag_list.etlua b/views/partials/flag_list.etlua
index 86b9924c..a5c1083a 100644
--- a/views/partials/flag_list.etlua
+++ b/views/partials/flag_list.etlua
@@ -43,7 +43,7 @@
'Are you sure you want to remove this flag?',
() => {
cloud.delete(
- '/project/<%= project.id %>/flag',
+ '/api/v1/project/<%= project.id %>/flag',
null,
{ flagger: flagger }
);
@@ -56,7 +56,7 @@
'report the flagger for abusing the flagging system?',
() => {
cloud.delete(
- '/project/<%= project.id %>/flag',
+ '/api/v1/project/<%= project.id %>/flag',
null,
{ flagger: flagger, report: true }
);
diff --git a/views/partials/project_buttons.etlua b/views/partials/project_buttons.etlua
index c407de74..ae5eb508 100644
--- a/views/partials/project_buttons.etlua
+++ b/views/partials/project_buttons.etlua
@@ -10,14 +10,14 @@
aria-label="<%= locale.get('unbookmark') %>"
title="<%= locale.get('unbookmark') %>"
onclick="cloud.delete(
- '/project/<%= project.id %>/bookmark/<%= current_user.id %>')"
+ '/api/v1/project/<%= project.id %>/bookmark/<%= current_user.id %>')"
>
<% else %>
<% end %>
<% end %>
@@ -28,7 +28,8 @@
%>
<%= locale.get('download') %>
@@ -128,7 +129,7 @@ function confirmDelete () {
'<%- package.loaded.dialog(
'confirm_delete',
{ item_name = 'project'}) %>',
- () => { cloud.delete('/project/<%= project.id %>'); }
+ () => { cloud.delete('/api/v1/project/<%= project.id %>'); }
);
}
@@ -143,7 +144,7 @@ function confirmFlag () {
var form =
document.querySelector('form.reasons');
cloud.post(
- '/project/<%= project.id %>/flag',
+ '/api/v1/project/<%= project.id %>/flag',
null,
{
reason: form.querySelector(
@@ -161,7 +162,7 @@ function confirmFlag () {
}
function unflagProject() {
- cloud.delete('/project/<%= project.id %>/flag')
+ cloud.delete('/api/v1/project/<%= project.id %>/flag')
}
function markAsRemix () {
@@ -170,7 +171,7 @@ function markAsRemix () {
input => {
var url = new URL(input);
cloud.post(
- '/project/<%= project.id %>/mark_as_remix',
+ '/api/v1/project/<%= project.id %>/mark_as_remix',
null,
{
username: '<%= project.username %>',
diff --git a/views/partials/project_details.etlua b/views/partials/project_details.etlua
index a3e8e5d1..db0f3cfb 100644
--- a/views/partials/project_details.etlua
+++ b/views/partials/project_details.etlua
@@ -42,34 +42,34 @@
<% elseif project.ispublic then %>
<% else %>
<% end %>
<% end %>
diff --git a/views/static/resources.lua b/views/static/resources.lua
index 17fe4620..f515b624 100644
--- a/views/static/resources.lua
+++ b/views/static/resources.lua
@@ -40,7 +40,7 @@ local materials = {
{
title = "Get Coding with Snap!",
author = 'openSAP',
- url = "https://open.sap.com/courses/snap1",
+ url = "https://learning.sap.com/learning-journeys/get-coding-with-snap-building-up-to-ai",
language = {"English"},
type = "course",
level = 'beginner',
@@ -51,7 +51,7 @@ local materials = {
{
title = "From Media Computation to Data Science",
author = 'openSAP',
- url = "https://open.sap.com/courses/snap2",
+ url = "https://learning.sap.com/courses/from-media-computation-to-data-science",
language = {"English"},
type = "course",
level = nil,
@@ -70,7 +70,7 @@ local materials = {
description = nil,
image = nil
},
- {
+ {
title = "BJC Sparks",
author = 'UC Berkeley and EDC',
url = "https://bjc.berkeley.edu/sparks/",