String.implement({
sanitiseWord: function() {
var s = this.replace(/\r/g, '\n').replace(/\n/g, ' ');
var rs = [];
rs.push(/<!--.+?-->/g); // comments
rs.push(/<title>.+?<\/title>/g); // Title
rs.push(/<style[^>]*?>.+?<\/style>/g); // Style info
rs.push(/<(\/)?(meta|link|style|div|head|html|body|span|table|colgroup|col|tbody|thead|tfoot|tr|td|!\[)[^>]*?>/g);//Unnecessary tags
rs.push(/<[^>\s]*?:[^>]*?>/g); // Namespaced elements
rs.push(/<\?[^>]*?>/g); // Processing instructions
rs.push(/<[^>]*?\?>/g); // Processing instructions
rs.push(/ v:.*?=".*?"/g); // Weird nonsense attributes
rs.push(/ style=".*?"/g); // Styles
rs.push(/ class=".*?"/g); // Classes
rs.push(/(&nbsp;){2,}/g); // Redundant&nbsp;s
//rs.push(/<p>(\s|&nbsp;)*?<\/p>/g); // Empty paragraphs
for (var i = 0; i< rs.length; i++) {
s = s.replace(rs[i], '');
}
s = s.replace(/\s+/g, ' ');

return s;
//var el = new Element('span');
//return el.set('html', s).get('html'); // Balance unbalanced tags
}
}); 
