// ==UserScript==
// @name          AutoLink
// @namespace     http://www.squarefree.com/userscripts
// @description   Turns plain text URLs, email addresses, bug numbers, ISBNs, and US phone numbers into links. You can add new filters if you know how to use JavaScript regular expressions.
// @include http://*
// @include https://*
// @include file://*
// ==/UserScript==

/*

  Included filters:

    * Plain text link
    * Email address
    * Bug number (links to bugzilla.mozilla.org)
    * Bug number with comment number
    * ISBN (links to Amazon)
    * US Phone number (creates a callto: link for Skype)
  
  Features:

    * You can add new filters if you know how to use JavaScript regular expressions.
    * Works even on pages with dynamic content, such as Gmail.
    * Avoids slowing down Firefox by calling setTimeout after working for a while.
    
  Author: Jesse Ruderman - http://www.squarefree.com/
  Contributors: David James - http://www.cs.toronto.edu:/~james
  
  Test page: http://www.squarefree.com/userscripts/test-pages/autolink-test.html

  License: MPL, GPL, LGPL.

  Version history:
    
    2005-05-23 17:42: Correctly handle URLs which have HTML tags in the middle of them.
                      E.g. http://www.<b>google</b>.com
                      From David James - http://www.cs.toronto.edu/~james

    2005-05-22 05:30: Make skipping work correctly.
    
    2005-05-22 05:00: Use fewer deprecated features of regular expressions.  See
                        http://developer-test.mozilla.org/docs/Core_JavaScript_1.5_Reference:Objects:RegExp and
                        http://developer-test.mozilla.org/docs/Core_JavaScript_1.5_Reference:Deprecated_Features
                        
    2005-05-22 01:00: Initial release. See http://www.squarefree.com/2005/05/22/autolink/.

*/


const timeBefore = new Date();

/***********************************
 *             Filters             *
 ***********************************/

/*

  I encourage you to create new filters in your copy of AutoLink. 

  Filters have three fields:

   * name (string)
       Used for tooltip on created links, e.g. "Link added by AutoLink filter: Plain Text Links".
       Used for class attribute of created links, e.g. "autolink autolink-plain-text-links".

   * regexp (regular expression)
       The entire text matching the regular expression will be linked.
       Must be global (/g).
       May be case-insensitive (/i).

   * href (function)
       Arguments: |match|, an output of regexp.exec.  (May also treat RegExp.leftContext, etc. as inputs.)
       Returns: The URL to be used for a link, or |null| to cancel link creation.
       Must not use filter.regexp, but may use other regular expressions.
    
  This regular expression reference might be useful:
  http://developer-test.mozilla.org/docs/Core_JavaScript_1.5_Reference:Objects:RegExp
  
  If multiple filters match a string, the first filter will win.

*/


const filters = [
 {
    name: "Plain text link",
    regexp: /https?\:\/\/[^"\s\<\>]*[^.,;'">\:\s\<\>\)\]\!]/g,
    href: function(match) { return match[0]; }
  },
  {
    name: "Email address",
    regexp: /[a-z0-9_\-+=.]+@[a-z0-9\-]+(\.[a-z0-9-]+)+/ig,
    href: function(match) { return "mailto:" + match[0]; }
  },
  {
    name: "Bug number with comment number",
    regexp: /bug \#?(\d+) comment \#?(\d+)/ig,
    href: function(match) { return "https://bugzilla.mozilla.org/show_bug.cgi?id=" + match[1] + "#c" + match[2]; }
  },
  {
    name: "Bug number",
    regexp: /bug \#?(\d+)/ig,
    href: function(match) { return "https://bugzilla.mozilla.org/show_bug.cgi?id=" + match[1]; }
  },
  {
    name: "ISBN --> Amazon",
    regexp: /ISBN( number)?:? \#?((\d(-)?){9}[\dx])\b/ig,
    href: function(match) { return "http://www.amazon.com/exec/obidos/ASIN/" + alphanumerics(match[2]); }
  },
  {
    name: "US phone number --> Skype",
    regexp: /((\(\d{3}\)[\s-]?)|(\b\d{3}(\s|-)))\d{3}[\s-]\d{4}\b/g,
    href: function(match) { return "callto:+1 " + digits(match[0]); }
  }
];


/***********************************
 *  Helper functions for filters   *
 ***********************************/


function digits(s)
{
  return s.replace(/[^0-9]/g, "");
}

function alphanumeric(s)
{
  return s.replace(/[^A-Z0-9]/g, "");
}

/***********************************
 *           Link styling          *
 ***********************************/
    
/*

  You can make links generated by AutoLink look different from normal links
  by editing styleLink below and/or by setting up user style sheet rules.
  
  Example: on squarefree.com, make autolinked plain text links orange. (Firefox trunk only.)
  
    @-moz-document domain(squarefree.com) { 
      .autolink-plain-text-link { color: orange ! important; }
    }
      
*/

function styleLink(a, filter)
{
  a.style.borderBottom = "1px solid orange";
}


/***********************************
 *           Fix filters           *
 ***********************************/

function fixFilters()
{
  var i, r;
  for (i = 0; r = filters[i]; ++i) {
    // lowercase, and replace each run of non-alphanumerics with a single hyphen
    r.classNamePart = r.name.toLowerCase().replace(/[^0-9a-z]+/ig, "-");
    if(!r.regexp.global)
      alert("AutoLink filter " + r.name + " is not global! This will break stuff!");
  }
}
fixFilters();


/***********************************
 *      When and where to run      *
 ***********************************/

var moddingDOM = false;

function initAutoLink()
{
  document.addEventListener("DOMNodeInserted", nodeInserted, false);
  window.setTimeout(createAutoLinks, 50, document.body);
}
window.addEventListener("load", initAutoLink, false);

// This makes it work at Gmail.
// 20% performance penalty on a plain text file with a link on almost every line.
// Tiny performance penalty on pages with few automatically added links.
function nodeInserted(e)
{
  // our own modifications should not trigger this.
  // (we don't want our regular expression objects getting confused)
  // (we want better control over when we recurse)
  
  if (!moddingDOM)
    createAutoLinks(e.target);
}



/***********************************
 *          DOM traversal          *
 ***********************************/


/*

  This script uses manual DOM traversal, in an iterative way without a stack!

  Advantages of snapshot XPath:
    * Much less code
    * 20-40% faster
    * May be possible to get another speed boost by including the regexp in the XPath expression - http://www.developer.com/xml/article.php/10929_3344421_3
    * All the cool people are using it
  
  Advantages of manual DOM traversal:
    * Lets us stop+continue (snapshot xpath doesn't let us)
    * Lets us modify DOM in strange ways without worrying.
    * Easier to control which elements we recurse into.

*/


// Ignore all children of these elements.
const skippedElements = { 
  a:        true, // keeps us from screwing with existing links. keeps us from recursing to death :)
  noscript: true, // noscript has uninterpreted, unshown text children; don't waste time+sanity there.
  head:     true,
  script:   true,
  style:    true,
  textarea: true,
  label:    true,
  select:   true,
  button:   true
}

// Allow some format tags to appear in the middle of links
const allowedInterLinkElements = { 
  b:         true,
  strong:    true,
  em:        true,
  i:         true,
  u:         true,
  span:      true,
  font:      true,
  big:       true,
  small:     true
}

const gmail = (location.host == "gmail.google.com");

// Fix bug #135928 in the 'surroundContents' function
Range.prototype.surroundContents = function(newParent) {
  newParent.appendChild(this.extractContents());
  this.insertNode(newParent);
}

function skipChildren(node)
{
  if (node.tagName)  // !
  {
    if (skippedElements[node.tagName.toLowerCase()])
      return true;
    if (gmail && node.tagName == "DIV" && node.className == "ac") // gmail autocomplete
      return true;
  }

  return false;
}


function createAutoLinks(traversalRoot)
{
  var m;
  var textAccumulator = "";
  var nodeAccumulator = [];
  
  // Ensure we're not already in a forbidden element.
  for (m = traversalRoot; m != undefined; m = m.parentNode) {
    if (skipChildren(m)) {
      return;
    }
  }

  // work around bug, or in case previous user scripts did crazy stuff
  traversalRoot.normalize();

  function cont(n, didChildren, nodeAccumulator, textAccumulator)
  {
    var k = 0; // split work into chunks so Firefox doesn't freeze
    var q;
    
    while (n && k < 100)
    {
      ++k;

      // Add this node to our text and node accumulators
      if (n.nodeType == 3) {
        if (!didChildren) {
          textAccumulator = textAccumulator + n.data;
          nodeAccumulator[nodeAccumulator.length] = n;
        }
      } else if (nodeAccumulator.length && n.tagName && !allowedInterLinkElements[n.tagName.toLowerCase()]) {
        // If we run into a type of node that can't occur in the middle of a link,
        // run our filters on our text so far and clear the accumulators   
        q = runFiltersOnTextNodes(nodeAccumulator, textAccumulator);
        nodeAccumulator = [];
        textAccumulator = "";
        
        if (q) {
            n = q;
            didChildren = false;
            continue;
        }
      }
      
      // Traverse to the "next" node in depth-first order
      if (didChildren && n == traversalRoot)
        break;
      else if (!didChildren && n.firstChild && !skipChildren(n)) {
        n = n.firstChild;
        // didChildren is already false and should stay false
      } else {
        if (n.nextSibling) {
          n = n.nextSibling;
          didChildren = false;
        }
        else {
          n = n.parentNode;
          didChildren = true;
        }
      }
    } // end while
  
    if (n && n != traversalRoot) {
      // Continue after 10ms.
      window.setTimeout(cont, 10, n, didChildren, nodeAccumulator, textAccumulator);
    } else {
      delete nodeAccumulator;
      delete textAccumulator;
      // Done.
      // alert("AutoLink time: " + (new Date() - timeBefore))
    }
    
  } // end function cont
  
  cont(traversalRoot, false, nodeAccumulator, textAccumulator);
}


/***********************************
 *         Running filters         *
 ***********************************/

// runFiltersOnTextNodes(nodeAccumulator, textAccumulator)
// Return: node at which to continue traversal, or null to mean no changes were made.

function runFiltersOnTextNodes(nodeAccumulator, textAccumulator)
{
  // Too many variables.  Good hint that I need to split this function up :P
  var j, regexp, match, k, filter, href; // things
  var a, range; // nodes
  var accumLength = 0;  
  var firstUnused = null;
  
  // runFiltersOnTextNodes has its own do-too-much-at-once avoider thingie.
  // assumption: if there is one text node with a lot of matches,
  // it's more important to finish quickly than be transparent.
  // (e.g. plain text file FULL of links)
  // assumption: 40 * 100 = 140.
  k=0;
    
  for (j = 0; filter = filters[j]; ++j) {
    regexp = filter.regexp;
    
    regexp.lastIndex = 0;
    if (regexp.test(textAccumulator)) {
      regexp.lastIndex = 0;
      
      // Optimization from the linkify that came with Greasemonkey(?):
      // instead of splitting a text node multiple times, take advantage
      // of global regexps and substring.

      for (match = null; k < 40 && (match = regexp.exec(textAccumulator)); ) {

        // this should happen first, so RegExp.foo is still good :)
        href = genLink(filter, match); 
        
        if (href != null && href != location.href) { 
          ++k;

          var firstMatchIndex = match.index;
          for (var i = 0; i < nodeAccumulator.length && firstMatchIndex + match[0].length > 0; i++) {
            var oldNode = nodeAccumulator[i];
            var len = oldNode.data.length;

            if (firstMatchIndex < len) {

              // Set up range object
              range = document.createRange();
              range.setStart(oldNode, Math.max(0, firstMatchIndex));
              range.setEnd(oldNode, Math.min(firstMatchIndex + match[0].length, len));

              // Create link
              a = document.createElement("a");
              a.target = "_top";
              a.href = href;
              a.title = "Link added by AutoLink filter: " + filter.name;
              a.className = "autolink autolink-" + filter.classNamePart;
              styleLink(a, filter);
              
              // Surround this text node with a link
              moddingDOM = true;
              range.surroundContents(a);
              moddingDOM = false;
              
              // Free memory used by the Range object              
              range.detach();
              
              // Recurse on any unused portions
              if (firstUnused == null) {
                firstUnused = a.previousSibling;
              }
            }
            firstMatchIndex = firstMatchIndex - len;
          }
          
          // Start after the last match
          nodeAccumulator.splice(0, i, a.nextSibling);
          regexp.lastIndex = 0;
          
          // Recalculate the contents of our text accumulator
          textAccumulator = "";
          for (i = 0; i < nodeAccumulator.length; i++) {
            textAccumulator = textAccumulator + nodeAccumulator[i].data;
          }
        }
      }
    }
  }
  return firstUnused;
}

// Create a link for a given match object
function genLink(filter, match)
{
  try {
    return filter.href(match); 
  }
  catch(er) {
    return "data:text/plain,Error running AutoLink function for filter: " + encodeURIComponent(filter.name) + "%0A%0A" + encodeURIComponent(er);
  }
}

