/ tools / WebFetchTool / preapproved.ts
preapproved.ts
  1  // For legal and security concerns, we typically only allow Web Fetch to access
  2  // domains that the user has provided in some form. However, we make an
  3  // exception for a list of preapproved domains that are code-related.
  4  //
  5  // SECURITY WARNING: These preapproved domains are ONLY for WebFetch (GET requests only).
  6  // The sandbox system deliberately does NOT inherit this list for network restrictions,
  7  // as arbitrary network access (POST, uploads, etc.) to these domains could enable
  8  // data exfiltration. Some domains like huggingface.co, kaggle.com, and nuget.org
  9  // allow file uploads and would be dangerous for unrestricted network access.
 10  //
 11  // See test/utils/sandbox/webfetch-preapproved-separation.test.ts for verification
 12  // that sandbox network restrictions require explicit user permission rules.
 13  
 14  export const PREAPPROVED_HOSTS = new Set([
 15    // Anthropic
 16    'platform.claude.com',
 17    'code.claude.com',
 18    'modelcontextprotocol.io',
 19    'github.com/anthropics',
 20    'agentskills.io',
 21  
 22    // Top Programming Languages
 23    'docs.python.org', // Python
 24    'en.cppreference.com', // C/C++ reference
 25    'docs.oracle.com', // Java
 26    'learn.microsoft.com', // C#/.NET
 27    'developer.mozilla.org', // JavaScript/Web APIs (MDN)
 28    'go.dev', // Go
 29    'pkg.go.dev', // Go docs
 30    'www.php.net', // PHP
 31    'docs.swift.org', // Swift
 32    'kotlinlang.org', // Kotlin
 33    'ruby-doc.org', // Ruby
 34    'doc.rust-lang.org', // Rust
 35    'www.typescriptlang.org', // TypeScript
 36  
 37    // Web & JavaScript Frameworks/Libraries
 38    'react.dev', // React
 39    'angular.io', // Angular
 40    'vuejs.org', // Vue.js
 41    'nextjs.org', // Next.js
 42    'expressjs.com', // Express.js
 43    'nodejs.org', // Node.js
 44    'bun.sh', // Bun
 45    'jquery.com', // jQuery
 46    'getbootstrap.com', // Bootstrap
 47    'tailwindcss.com', // Tailwind CSS
 48    'd3js.org', // D3.js
 49    'threejs.org', // Three.js
 50    'redux.js.org', // Redux
 51    'webpack.js.org', // Webpack
 52    'jestjs.io', // Jest
 53    'reactrouter.com', // React Router
 54  
 55    // Python Frameworks & Libraries
 56    'docs.djangoproject.com', // Django
 57    'flask.palletsprojects.com', // Flask
 58    'fastapi.tiangolo.com', // FastAPI
 59    'pandas.pydata.org', // Pandas
 60    'numpy.org', // NumPy
 61    'www.tensorflow.org', // TensorFlow
 62    'pytorch.org', // PyTorch
 63    'scikit-learn.org', // Scikit-learn
 64    'matplotlib.org', // Matplotlib
 65    'requests.readthedocs.io', // Requests
 66    'jupyter.org', // Jupyter
 67  
 68    // PHP Frameworks
 69    'laravel.com', // Laravel
 70    'symfony.com', // Symfony
 71    'wordpress.org', // WordPress
 72  
 73    // Java Frameworks & Libraries
 74    'docs.spring.io', // Spring
 75    'hibernate.org', // Hibernate
 76    'tomcat.apache.org', // Tomcat
 77    'gradle.org', // Gradle
 78    'maven.apache.org', // Maven
 79  
 80    // .NET & C# Frameworks
 81    'asp.net', // ASP.NET
 82    'dotnet.microsoft.com', // .NET
 83    'nuget.org', // NuGet
 84    'blazor.net', // Blazor
 85  
 86    // Mobile Development
 87    'reactnative.dev', // React Native
 88    'docs.flutter.dev', // Flutter
 89    'developer.apple.com', // iOS/macOS
 90    'developer.android.com', // Android
 91  
 92    // Data Science & Machine Learning
 93    'keras.io', // Keras
 94    'spark.apache.org', // Apache Spark
 95    'huggingface.co', // Hugging Face
 96    'www.kaggle.com', // Kaggle
 97  
 98    // Databases
 99    'www.mongodb.com', // MongoDB
100    'redis.io', // Redis
101    'www.postgresql.org', // PostgreSQL
102    'dev.mysql.com', // MySQL
103    'www.sqlite.org', // SQLite
104    'graphql.org', // GraphQL
105    'prisma.io', // Prisma
106  
107    // Cloud & DevOps
108    'docs.aws.amazon.com', // AWS
109    'cloud.google.com', // Google Cloud
110    'learn.microsoft.com', // Azure
111    'kubernetes.io', // Kubernetes
112    'www.docker.com', // Docker
113    'www.terraform.io', // Terraform
114    'www.ansible.com', // Ansible
115    'vercel.com/docs', // Vercel
116    'docs.netlify.com', // Netlify
117    'devcenter.heroku.com', // Heroku
118  
119    // Testing & Monitoring
120    'cypress.io', // Cypress
121    'selenium.dev', // Selenium
122  
123    // Game Development
124    'docs.unity.com', // Unity
125    'docs.unrealengine.com', // Unreal Engine
126  
127    // Other Essential Tools
128    'git-scm.com', // Git
129    'nginx.org', // Nginx
130    'httpd.apache.org', // Apache HTTP Server
131  ])
132  
133  // Split once at module load so lookups are O(1) Set.has() for the common
134  // hostname-only case, falling back to a small per-host path-prefix list
135  // for the handful of path-scoped entries (e.g., "github.com/anthropics").
136  const { HOSTNAME_ONLY, PATH_PREFIXES } = (() => {
137    const hosts = new Set<string>()
138    const paths = new Map<string, string[]>()
139    for (const entry of PREAPPROVED_HOSTS) {
140      const slash = entry.indexOf('/')
141      if (slash === -1) {
142        hosts.add(entry)
143      } else {
144        const host = entry.slice(0, slash)
145        const path = entry.slice(slash)
146        const prefixes = paths.get(host)
147        if (prefixes) prefixes.push(path)
148        else paths.set(host, [path])
149      }
150    }
151    return { HOSTNAME_ONLY: hosts, PATH_PREFIXES: paths }
152  })()
153  
154  export function isPreapprovedHost(hostname: string, pathname: string): boolean {
155    if (HOSTNAME_ONLY.has(hostname)) return true
156    const prefixes = PATH_PREFIXES.get(hostname)
157    if (prefixes) {
158      for (const p of prefixes) {
159        // Enforce path segment boundaries: "/anthropics" must not match
160        // "/anthropics-evil/malware". Only exact match or a "/" after the
161        // prefix is allowed.
162        if (pathname === p || pathname.startsWith(p + '/')) return true
163      }
164    }
165    return false
166  }