JavaScript - Parse Amazon S3 URLs

Function to parse a variety of S3 URLs including various “virtual hosted-style” and “path style” S3 URLs.

Using the function.

const url = "https://my-bucket.s3.us-west-2.amazonaws.com/puppy.png";
const s3 = parseS3Url(url);
console.log(JSON.stringify(s3));

Output.

{
  "bucketName": "my-bucket",
  "bucketRegion": "us-west-2",
  "objectKey": "puppy.png"
}
  • Returns undefined if failed to parse.
  • If the URL does not include the object key, returns an empty string ("") for objectKey.
  • For S3 URL formats that do not include the bucket region, returns an empty string ("") for bucketRegion.

Implementation.

const parseS3Url = function(url) {
  const s3UrlPatterns = [
    
    // Virtual hosted-style
    
    // https://my-bucket.s3.us-west-2.amazonaws.com/puppy.png
    {regex: /^https?:\/\/([a-z0-9.\-]+)\.s3\.([a-z0-9\-]+)\.amazonaws\.com(\.cn)?(\/([^${}]*))?$/, captureGroupIndexes: {bucketName: 0, bucketRegion: 1, objectKey: 4}},
    // https://jbarr-public.s3.amazonaws.com/images/ritchie_and_thompson_pdp11.jpeg
    {regex: /^https?:\/\/([a-z0-9.\-]+)\.s3\.amazonaws\.com(\.cn)?(\/([^${}]*))?$/, captureGroupIndexes: {bucketName: 0, bucketRegion: -1, objectKey: 3}},
    // https://awsmp-fulfillment-cf-templates-prod.s3-external-1.amazonaws.com/key
    {regex: /^https?:\/\/([a-z0-9.\-]+)\.s3-[a-z0-9.\-]+\.amazonaws\.com(\.cn)?(\/([^${}]*))?$/, captureGroupIndexes: {bucketName: 0, bucketRegion: -1, objectKey: 3}},
    
    // Path styles
    
    // https://s3.us-west-2.amazonaws.com/mybucket/puppy.jpg
    {regex: /^https?:\/\/s3\.([a-z0-9\-]+)\.amazonaws\.com(\.cn)?(\/([^${}\/]*)(\/([^${}]*))?)?$/, captureGroupIndexes: {bucketName: 3, bucketRegion: 0, objectKey: 5}},
    // https://s3.amazonaws.com/jsb-public/classic_amazon_door_desk.png
    {regex: /^https?:\/\/s3\.amazonaws\.com(\.cn)?(\/([^${}\/]*)(\/([^${}]*))?)?$/, captureGroupIndexes: {bucketName: 2, bucketRegion: -1, objectKey: 4}},
    // https://s3-us-east-2.amazonaws.com/jsb-public/classic_amazon_door_desk.png
    {regex: /^https?:\/\/s3-([a-z0-9\-]+)\.amazonaws\.com(\.cn)?(\/([^${}\/]*)(\/([^${}]*))?)?$/, captureGroupIndexes: {bucketName: 3, bucketRegion: 0, objectKey: 5}}
  ];
  const offset = 1; // Results of match will include str input at first index followed by each capturing group
  for (const pattern of s3UrlPatterns) {
    const matched = url.match(pattern.regex);
    if (matched) {
      const arr = [...matched];
      const result = {};
      // Build returned object
      for (const [key, value] of Object.entries(pattern.captureGroupIndexes)) {
        // Set to "" for not explicitly found or if capture group is nested in another capture group not captured
        result[key] = value == -1 || !arr[offset + value] ? "" : arr[offset + value];
      }
      return result;
    }
  }
  return undefined;
};