• atwiki
  • TEST
  • JavaScript for HTML Browsers: HTML Outlines and Microdata

TEST

JavaScript for HTML Browsers: HTML Outlines and Microdata

最終更新:

eriax

- view
管理者のみ編集可

制限

  • セクション構造からのアウトライン抽出は HTML: 4.4.11.1 に準拠。効率は考慮外。
  • Microdata アイテム抽出は HTML: 5.2.5, 5.5.1 に準拠。ただし絶対 URI のチェックはしない。
  • どちらも抽出するだけ。

使用例

アウトライン

var hapi = HTML_Outlines_and_Microdata_for_HTML_Browsers;

// 文字列として確認
alert(hapi.createOutlineString(document));

// ul 要素を生成
var toc = hapi.createOutlineList(document);

Microdata

var hapi = HTML_Outlines_and_Microdata_for_HTML_Browsers;

// 文書内のアイテムを JSON で抽出
var items = hapi.findAllItems(document);
var jsonData = JSON.stringify(items);

ソースコード

if ('undefined' === typeof Array.prototype.map) {
  Array.prototype.map = function (callbackfn) {
    var thisArg = arguments[1];
    var result;
    var count;
    var i;
    
    if ('function' !== typeof callbackfn) {
      throw new Error;
    }
    count = this.length;
    result = new Array(count);
    
    for (i = 0; i < count; i++) {
      if (i in this) {
        result[i] = callbackfn.call(thisArg, this[i], i, this);
      }
    }
    return result;
  };
}

if ('undefined' === typeof Array.prototype.indexOf) {
  Array.prototype.indexOf = function (ceil, floor) {
    return function (searchElement) {
      var fromIndex = arguments[1];
      var count = this.length;
      var i = Number(fromIndex) || 0;
      i = (i < 0) ? ceil(i) : floor(i);
      
      if (i < 0) {
        i += count;
      }
      for (; i < count; i++) {
        if (i in this) {
          if (this[i] === searchElement) {
            return i;
          }
        }
      }
      return -1;
    };
  }(Math.ceil, Math.floor);
}

if ('undefined' === typeof Array.prototype.filter) {
  Array.prototype.filter = function (callbackfn) {
    var thisArg = arguments[1];
    var result;
    var count;
    var i;
    var v;
    
    if ('function' !== typeof callbackfn) {
      throw new Error;
    }
    count = this.length;
    result = new Array;
    
    for (i = 0; i < count; i++) {
      if (i in this) {
        if (callbackfn.call(thisArg, v = this[i], i, this)) {
          result[result.length] = v;
        }
      }
    }
    return result;
  };
}

////////////////////////////////////////////////////////////////////////

var HTML_Outlines_and_Microdata_for_HTML_Browsers = new function () { /*@cc_on@*/
  
  //////////////////////
  // Outlines
  //
  var isSectioningContent = function () {
    var Names = /^(?:section|nav|article|aside)$/i;
    return function (node) {
      return node && (node.nodeType === 1) && Names.test(node.tagName);
    };
  }();
  
  var isSectioningRoot = function () {
    var Names = /^(?:body|blockquote|details|fieldset|figure|td)$/i;
    return function (node) {
      return node && (node.nodeType === 1) && Names.test(node.tagName);
    };
  }();
  
  var isHeadingContent = function () {
    var Names = /^(?:h[1-6]|hgroup)$/i;
    return function (node) {
      return node && (node.nodeType === 1) && Names.test(node.tagName);
    };
  }();
  
  var isHeadingGroupContent = function () {
    var Names = /^hgroup$/i;
    return function (node) {
      return node && (node.nodeType === 1) && Names.test(node.tagName);
    };
  }();
  
  var getLevel = function (elt) {
    if (isHeadingGroupContent(elt)) {
      return getHeadingGroupLevel(elt);
    }
    return getSingleHeadingLevel(elt);
  };
  
  var getSingleHeadingLevel = function (elt) {
    return parseInt(elt.tagName.slice(-1));
  };
  
  var getHeadingGroupLevel = function (elt) {
    var maxLevel = Infinity;
    var currentLevel;
    var nodes = elt.childNodes;
    var node;
    var i;
    for (i = 0; node = nodes[i++];) {
      if (isHeadingContent(node)) {
        currentLevel = getSingleHeadingLevel(node);
        if (currentLevel < maxLevel) {
          maxLevel = currentLevel;
        }
      }
    }
    return maxLevel;
  };
  
  //
  function HTMLOutlinee(arg) {
    if (arguments.length > 0) {
      if (arg) {
        var p;
        for (p in arg) {
          if (arg.hasOwnProperty(p)) {
            this[p] = arg[p];
          }
        }
      }
    }
  }
  HTMLOutlinee.create = function (element) {
    return new HTMLOutlinee({
      element: element,
      sections: [],
      parent: null
    });
  };
  
  HTMLOutlinee.prototype = new function () {
    this.element = null;
    this.sections = null;
    this.parent = null;
    this.type = '#outlinee';
    
    this.addSection = function (section) {
      this.sections.push(section);
      section.outlinee = this;
      return section;
    };
    
    this.getFirstSection = function () {
      return this.sections[0];
    };
    
    this.getLastSection = function () {
      var sections = this.sections;
      return sections[sections.length - 1];
    };
    
    this.toString = function () {
      return '[object HTMLOutlinee]';
    };
  };
  
  function HTMLSection(arg) {
    if (arguments.length > 0) {
      if (arg) {
        var p;
        for (p in arg) {
          if (arg.hasOwnProperty(p)) {
            this[p] = arg[p];
          }
        }
      }
    }
  }
  HTMLSection.create = function (heading, parent, outlinee) {
    return new HTMLSection({
      heading: heading,
      parent: parent,
      children: [],
      outlinee: outlinee,
      associated: []
    });
  };
  
  (function () {
    this.heading = null;
    this.parent = null;
    this.children = null;
    this.outlinee = null;
    this.associated = null;
    this.type = '#section';
    
    this.appendChild = function (div) {
      this.children.push(div);
      div.parent = this;
      return div;
    };
    
    this.hasChildSections = function () {
      var nodes = this.children;
      var nodeCount = nodes.length;
      var i;
      for (i = 0; i < nodeCount; i++) {
        if (nodes[i] instanceof HTMLSection) {
          return true;
        }
      }
      return false;
    };
    
    this.getLastSection = function () {
      var nodes = this.children;
      var nodeCount = nodes.length;
      var node;
      var i;
      for (i = nodeCount; i > 0;) {
        node = nodes[i -= 1];
        if (node instanceof HTMLSection) {
          return node;
        }
      }
      return null;
    };
    
    this.associate = function (node) {
      this.associated.push(node);
      return node;
    };
    
    this.toString = function () {
      return '[object HTMLSection]';
    };
  }).call(HTMLSection.prototype);
  
  function createOutlinee(root) {
    var currentOutlinee = null;
    var currentSection = null;
    var stack = [];
    
    (function (root, callbackfn) {
      var node = root;
      var n;
      
      while (node) {
        callbackfn(node, 'enter');
        
        if ((n = node.firstChild)) {
          node = n;
          continue;
        }
        do {
          callbackfn(node, 'exit');
          if (node == root) {
            node = null;
            break;
          }
          if ((n = node.nextSibling)) {
            node = n;
            break;
          }
        }
        while ((node = node.parentNode))
      }
    })(root, function (node, mode) {
      var top = stack[stack.length - 1];
      
      if (top === node && mode === 'exit') {
        stack.pop();
      }
      else if (isHeadingContent(top)) {
        ;
      }
      else if (mode === 'enter' && (isSectioningContent(node) || isSectioningRoot(node))) {
        if (currentOutlinee !== null && currentSection.heading === null) {
          currentSection.heading = undefined;
        }
        if (currentOutlinee !== null) {
          stack.push(currentOutlinee);
        }
        currentOutlinee = HTMLOutlinee.create(node);
        currentSection = HTMLSection.create(null, currentSection, currentOutlinee);
        currentOutlinee.addSection(currentSection);
      }
      else if (mode === 'exit' && (isSectioningContent(node) && stack.length > 0)) {
        var outlinee = currentOutlinee;
        currentOutlinee = stack.pop();
        currentSection = currentOutlinee.getLastSection();
        currentSection.appendChild(outlinee);
      }
      else if (mode === 'exit' && (isSectioningRoot(node) && stack.length > 0)) {
        var outlinee = currentOutlinee;
        currentOutlinee = stack.pop();
        currentSection = currentOutlinee.getLastSection();
        while (currentSection.hasChildSections()) {
          currentSection = currentSection.getLastSection();
        }
        currentSection.appendChild(outlinee);
      }
      else if (mode === 'exit' && (isSectioningContent(node) || isSectioningRoot(node))) {
        currentSection = currentOutlinee.getFirstSection();
      }
      else if (currentOutlinee === null) {}
      else if (mode === 'enter' && isHeadingContent(node)) {
        if (currentSection.heading == null) {
          currentSection.heading = node;
        }
        else if (getLevel(node) <= getLevel(currentOutlinee.getLastSection().heading)) {
          currentSection = currentOutlinee.addSection(HTMLSection.create(node, currentSection.parent, currentOutlinee));
        }
        else {
          var candidateSection = currentSection;
          while (true) {
            if (getLevel(node) > getLevel(candidateSection.heading)) {
              currentSection = candidateSection.appendChild(HTMLSection.create(node, candidateSection.parent, currentOutlinee));
              break;
            }
            var newCandidateSection = candidateSection.parent;
            candidateSection = newCandidateSection;
          }
        }
        stack.push(node);
      }
      else {} if (mode === 'exit' && currentSection !== null) {
        currentSection.associate(node);
      }
    });
    
    if (currentOutlinee === null) {
      return null;
    }
    return currentOutlinee;
  }
  
  var getTextContent = function (n) {
    switch (n.nodeType) {
      
    case 1:
      if (/^img$/i.test(n.nodeName)) {
        return n.alt;
      }
      if (/^input$/i.test(n.nodeName)) {
        return n.value;
      }
      return Array.prototype.concat.apply([], Array.prototype.map.call(n.childNodes, arguments.callee)).join('');
    case 3:
    case 4:
      return n.data;
      
    default:
      return '';
    }
  };
  
  (function () {
    
    this.isSectioningRoot = function () {
      return isSectioningRoot(this.element);
    };
    
    this.hasEffectiveSections = function () {
      var sections = this.sections;
      var section;
      switch (sections.length) {
      case 0:
        return false;
      case 1:
        return sections[0].isEffectiveSection();
      default:
        return true;
      }
    };
    
    this.toJSON = function (internal) {
      var children = [];
      var sections = this.sections;
      var sectionCount = sections.length;
      var i;
    
      for (i = 0; i < sectionCount; i++) {
        children = children.concat(sections[i].toJSON(true));
      }
      if (internal && !this.isSectioningRoot()) {
        return children;
      }
      return {
        type: '#root',
        context: this.element.tagName,
        children: children
      };
    };
  }).call(HTMLOutlinee.prototype);
  
  (function () {
    
    this.isEffectiveSection = function () {
      return isSectioningContent(this.outlinee.element) || this.hasChildSections() || this.heading !== null;
    };
    
    this.toJSON = function () {
      var heading = this.heading;
      var tagName = this.outlinee.element.tagName;
      var id;
      var label;
    
      if (heading) {
        id = heading.id;
        label = getTextContent(heading);
      }
      else {
        if (heading === null) {
          label = '(anonymous\x20' + tagName + ')';
        }
        else {
          label = '(undefined\x20' + tagName + ')';
        }
      }
      var children = this.children;
      var childCount = children.length;
      var result = [];
      var i;
    
      for (i = 0; i < childCount; i++) {
        result = result.concat(children[i].toJSON(true));
      }
      return {
        context: tagName,
        type: '#section',
        id: id,
        label: label,
        children: result
      };
    };
  }).call(HTMLSection.prototype);
  
  //
  var outlineToList = function (jsonData, params) {
    var doc = params.ownerDocument;
    var list0 = doc.createElement(params.tagName || 'ul');
    var item0 = doc.createElement('li');
    var frag0 = doc.createDocumentFragment();
    item0.appendChild(doc.createElement('a')).appendChild(doc.createTextNode('_'));
    
    return (function (data) {
      var list;
      var item;
      var frag = frag0.cloneNode(false);
      var children = data.children;
      var childCount = children.length;
      var child;
      var i;
      var n;
      var s;
      
      if (childCount > 0) {
        for (i = 0; i < childCount; i++) {
          child = children[i];
          item = item0.cloneNode(true);
          n = item.firstChild;
          
          if (child.type === '#section') {
            if ((s = child.id)) {
              n.href = '#' + encodeURIComponent(s);
            }
            n.firstChild.data = child.label;
            frag.appendChild(item);
            frag.appendChild(arguments.callee(child));
          }
        }
        list = list0.cloneNode(false);
        list.appendChild(frag);
      }
      else {
        list = frag;
      }
      return list;
    })(jsonData);
  };
  
  this.createOutlineList = function (root, doc) {
    var outlinee = createOutlinee(root);
    if (!doc) {
      doc = (root.nodeType === 9) ? root : root.ownerDocument;
    }
    return outlineToList(outlinee.toJSON(), {
      ownerDocument: doc,
      tagName: 'ul'
    });
  };
  
  var outlineToString = function (jsonData, level) {
    var tab = 2;
    
    if (!level) {
      level = 1;
    }
    var padding;
    var result = [];
    
    if (jsonData.type === '#root') {
      padding = new Array(level).join('\x20\x20') + '= ';
      result.push(padding + '[' + jsonData.context + ']');
      level += tab;
    }
    var children = jsonData.children;
    var childCount = children.length;
    var child;
    var i;
    var c;
    
    if (childCount > 0) {
      padding = new Array(level).join('\x20\x20') + '+ ';
  
      for (i = 0; i < childCount; i++) {
        child = children[i];
  
        if (child.type === '#section') {
          result.push(padding + '[' + child.context + '] ' + child.label.replace(/^\s+|\s+$/g, '').replace(/(?:\r\n|\r|\n)+/g, ' -- ').replace(/\s{2,}/g, '\x20'));
        }
        if ((c = arguments.callee(child, level + tab))) {
          result.push(c);
        }
      }
    }
    return result.join('\n');
  };
  
  this.createOutlineString = function (root) {
    var outlinee = createOutlinee(root);
    return outlineToString (outlinee.toJSON());
  };
  
  //////////////////////
  // Microdata
  //
  var findItemProperties = function (root) {
    var push = Array.prototype.push;
    var filter = Array.prototype.filter;
    var isElement = function (n) {
      return n.nodeType === 1;
    };
    var results = [];
    var memory = [];
    var pending = [];
    
    results.push(root);
    push.apply(pending, filter.call(root.childNodes, isElement));
    
    /*@if(1)var a;if((a=root.getAttributeNode('itemref'))&&a.specified){@else@*/
    if (root.hasAttribute('itemref')) { /*@end@*/
      var refs;
      var TrailWS = /^\s+|\s+$/g;
      var WS = /\s+/;
      
      if ((refs = root.getAttribute('itemref'/*@,0@*/)) && (refs = refs.replace(TrailWS, ''))) {
        refs = refs.split(WS);
        var refCount = refs.length;
        var doc = root.ownerDocument;
        var m;
        var i;
        for (i = 0; i < refCount; i++) {
          if ((m = doc.getElementById(refs[i]))) {
            pending.push(m);
          }
        }
      }
    }
    while (pending.length > 0) {
      var current = pending.shift();
      if (memory.indexOf(current) >= 0) {
        continue;
      }
      memory.push(current);
      
      /*@if(1)var a;if(!(a=current.getAttributeNode('itemscope'))||!a.specified){@else@*/
      if (!current.hasAttribute('itemscope')) { /*@end@*/
        push.apply(pending, filter.call(current.childNodes, isElement));
      }
      
      /*@if(1)var a;if((a=current.getAttributeNode('itemprop'))&&a.specified){@else@*/
      if (current.hasAttribute('itemprop')) { /*@end@*/
        results.push(current);
      }
    }
    if ('undefined' !== typeof root.compareDocumentPosition) {
      results.sort(function (e1, e2) {
        if (e1.isSameNode(e2)) {
          return 0;
        }
        if (0 !== (e1.compareDocumentPosition(e2) & Node.DOCUMENT_POSITION_FOLLOWING)) {
          return -1;
        }
        return 1;
      });
    }
    else {
      /*@if(1)results.sort(function(e1,e2){return e1.sourceIndex-e2.sourceIndex});@else@*/
      var r1 = root.ownerDocument.createRange();
      var r2 = root.ownerDocument.createRange();
      results.sort(function (e1, e2) {
        r1.selectNode(e1);
        r2.selectNode(e2);
        return r1.compareBoundaryPoints(Range.START_TO_START, r2);
      }); /*@end@*/
    }
    return results;
  };
  
  var findAllItems_JSON = function (root) {
    var result = {};
    var items = [];
    var elems = root.getElementsByTagName('*');
    var elemCount = elems.length;
    var elem;
    var i;
    for (i = 0; i < elemCount; i++) {
      elem = elems[i];
      
      /*@if(1)var a;if(elem.nodeType===1&&(a=elem.getAttributeNode('itemscope'))&&a.specified){@else@*/
      if (elem.hasAttribute('itemscope')) { /*@end@*/
        items.push(getObject_JSON(elem, []));
      }
    }
    result['items'] = items;
    return result;
  };
  
  var getObject_JSON = function (item, memory) {
    var result = {};
    memory.push(item);
    
    /*@if(1)var a;if((a=item.getAttributeNode('itemtype'))&&a.specified){@else@*/
    if (item.hasAttribute('itemtype')) { /*@end@*/
      result['type'] = item.getAttribute('itemtype' /*@,0@*/ );
    }
    
    /*@if(1)var a;if((a=item.getAttributeNode('itemid'))&&a.specified){@else@*/
    if (item.hasAttribute('itemid')) { /*@end@*/
      result['id'] = item.getAttribute('itemid' /*@,0@*/ );
    }
    var properties = {};
    var elems = findItemProperties(item);
    var elemCount = elems.length;
    var elem;
    var i;
    var TrailWS = /^\s+|\s+$/g;
    var WS = /\s+/;
    
    for (i = 0; i < elemCount; i++) {
      elem = elems[i];
      
      /*@if(1)var a;if(!(a=elem.getAttributeNode('itemprop'))||!a.specified){@else@*/
      if (!elem.hasAttribute('itemprop')) { /*@end@*/
        continue;
      }
      var names = elem.getAttribute('itemprop' /*@,0@*/ ).replace(TrailWS, '');
      if (!names) {
        continue;
      }
      names = names.split(WS);
      var value;
      
      /*@if(1)var a;if((a=elem.getAttributeNode('itemscope'))&&a.specified){@else@*/
      if (elem.hasAttribute('itemscope')) { /*@end@*/
        if (memory.indexOf(elem) < 0) {
          value = getObject_JSON(elem, memory);
        }
        else {
          value = 'ERROR';
        }
      }
      else {
        switch (elem.tagName.toUpperCase()) {
        case 'META':
          value = elem.content || '';
          break;
        case 'EMBED':
        case 'IFRAME':
        case 'IMG':
          value = elem.src || '';
          break;
        case 'AUDIO':
        case 'SOURCE':
        case 'TRACK':
        case 'VIDEO':
          value = elem.src;
          if ('undefined' === typeof value) {
            value = elem.getAttribute('src' /*@,0@*/ ) || '';
          }
          break;
        case 'A':
        case 'AREA':
        case 'LINK':
          value = elem.href;
          break;
        case 'OBJECT':
          value = elem.data;
          break;
        case 'TIME':
          value = elem.dateTime;
          if ('undefined' === typeof value) {
            value = elem.getAttribute('datetime' /*@,0@*/ ) || '';
          }
          break;
        default:
          /*@if(1)var s;if('undefined'!==typeof(s=elem.innerText)){value=s}@else@*/
          value = elem.textContent; /*@end@*/
          break;
        }
      }
      var nameCount = names.length;
      var name;
      var j;
      
      for (j = 0; j < nameCount; j++) {
        name = names[j];
        if (properties.hasOwnProperty(name)) {
          properties[name].push(value);
        }
        else {
          properties[name] = [value];
        }
      }
    }
    result['properties'] = properties;
    return result;
  };
  
  this.findAllItems = findAllItems_JSON;
};
  • 初出 2011-09-04/05
  • 修正 2012-03-05
目安箱バナー