Remove MS Word formatting on paste

4,228 views
Skip to first unread message

Ove Ranheim

unread,
May 14, 2012, 3:10:58 PM5/14/12
to cleditor
Hi,

Is there a way to have MS Word Formatting removed on paste? When
content is pasted from any source, I only want it to be inserted as
plain text.

Any other options?

Regards,
Ove

Mike McKee

unread,
May 14, 2012, 8:36:30 PM5/14/12
to cled...@googlegroups.com

You have to write that code yourself, and it's not easy. You can look at how the plugin for NicEdit or TinyMCE does this and reimpliment in cleditor.

lowpass

unread,
May 16, 2012, 8:35:02 PM5/16/12
to cled...@googlegroups.com
I've been looking for a decent solution to this for years. I've yet to
get anything working that completely removes all the formatting every
time. The problem seems to be that each new version of MSWord adds its
own unique crap. Whatever JS misses gets cleaned out server-side using
HTMLPurifier. That's not ideal for a WYSIWYG widget, though.

Keith Mashinter

unread,
May 19, 2012, 8:28:44 AM5/19/12
to cled...@googlegroups.com
Below is an extension I use to the updateTextArea that includes filtering of Microsoft word if / endif garbage and normalizing the tags to a combination of xhtml / html5 friendly ones: br|b|del|ins|i|li|ol|p|ul
 
(function($) {
 $.cleditor.defaultOptions.docType = "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>";
 $.cleditor.defaultOptions.docCSSFile = AkmeWindowUtil.CONTEXT_PATH+"/embed/cleditor/jquery.cleditor.iframe.css";
 
 $.cleditor.defaultOptions.updateTextArea = function(html) { //if (document.forms[0].debugArea) document.forms[0].debugArea.value = html;
  // Normalize to xhtml/html5 common standards and only keep allowed tags.
  var ary = html.split("<");
  var end = -1;
  for (var i=0; i<ary.length; i++) {
   if (ary[i].lastIndexOf("!--[if ", 7) === 0) { // handle Microsoft <!--[if ... <![endif]-->
    ary[i] = "";
    var found = false;
    for (i++; i<ary.length; i++) {
     if (ary[i].lastIndexOf("![endif]-->", 11) === 0) {found = true;}
     ary[i] = "";
     if (found) break;
    }
    if (i>=ary.length) break;
   }
   end = ary[i].indexOf(">");
   if (end == -1) continue;
   ary[i] = ary[i].substring(0,end).toLowerCase()+ary[i].substring(end);
   var search = ["strong>","em>","strike>","u>","br>"];
   var replace = ["b>","i>","del>","ins>","br/>"];
   for (var j=0; j<search.length; j++) {
    var pos = ary[i].lastIndexOf(search[j], search[j].length+1);
    if (pos == 0 || (pos == 1 && ary[i].charAt(0) == '/')) {
     ary[i] = (pos == 1 ? "/" : "")+ replace[j] +ary[i].substring(search[j].length+pos);
    }
   }
   var spellcheckerRE = /^\/?span[^\/>]*\/?>/m;
   var cleanupRE = /^(\/?)(br|b|del|ins|i|li|ol|p|ul)[^a-zA-Z\/>]*[^\/>]*(\/?)>/m;
   if (spellcheckerRE.test(ary[i])) {
    ary[i] = ary[i].replace(spellcheckerRE, "");
   } else if (cleanupRE.test(ary[i])) {
    ary[i] = ary[i].replace(cleanupRE, "<$1$2$3>");
    ary[i] = ary[i].replace(/^<p>/, "");
    ary[i] = ary[i].replace(/^<\/?p\/?>/, "<br/>");
   } else {
    ary[i] = end+1 < ary[i].length ? ary[i].substring(end+1) : "";
   }
   ary[i] = ary[i].replace(/&nbsp;/gm, "");
   ary[i] = ary[i].replace(/\n\n/gm, "\n");
  }
  html = ary.join("");
  // Trim leading whitespace.
  var trimRE = /^(\s+|&nbsp;|<br\/?>|<p>(&nbsp;)*<\/p>)+/m;
  if (trimRE.test(html)) {
   html = html.replace(trimRE, "");
  }
  // Test if there is any actual non-whitespace text content.
  var body = document.getElementsByTagName("body")[0];
  var div = document.createElement("div");
  div.style.display = "none";
  body.appendChild(div);
  div.innerHTML = html;
  var text = div.innerText || div.textContent;
  body.removeChild(div);
  var trimRE = /\S/m;
  if (!trimRE.test(text)) html = "";
  return html;
 };
})(jQuery);
 

Ove Ranheim

unread,
May 19, 2012, 10:03:24 AM5/19/12
to cled...@googlegroups.com
Hi Keith,

Thanks a lot for your helpful script!

This did work out for me, though it's cleaning up a bit too much. It seems like it removes p and br tags, hence paragraphs gets lost.

Such a feature should be default in CLEditor. MS Word content breaks my web page when used in my CMS impl.

Best regards,
Ove

On May 19, 2012, at 2:28 PM, Keith Mashinter wrote:

Below is an extension I use to the updateTextArea that includes filtering of Microsoft word if / endif garbage and normalizing the tags to a combination of xhtml / html5 friendly ones: [..cut..]
 

jonee

unread,
Jan 23, 2013, 3:10:15 AM1/23/13
to cled...@googlegroups.com, keith.m...@frameworks.ca
I know this at least 8 months old- but this is great code.

Only thing I noticed is that the iframe is not updated. For example you paste in something with a big font. The iframe still shows a big font yet viewing the source revealed a cleaned up html.

How do you add an update to the iframe for this please?

Regards!
 
Message has been deleted
Message has been deleted
Message has been deleted
Message has been deleted

Carlos Sanchez

unread,
May 31, 2013, 1:16:06 PM5/31/13
to cled...@googlegroups.com
Use this to paste while updating the Iframe:
    var editor = $(".editor").cleditor();
    $(".cleditorMain iframe").each(function(index,
obj){

     $(obj).contents().find('body').bind('paste', function () {

        setTimeout(function () {
            editor[index].updateTextArea(editor[index]);
            $(editor[index].doc.body).html(editor[index].$area.val());

        }, 200);


    });
    });


it forces the data from textarea in to the iframe element. This block goes on the page (or external js file) ALONG with your CLEditor box. This does not go on the CLEditor js file.


On Friday, May 31, 2013 1:08:41 PM UTC-4, Carlos Sanchez wrote:
Use this to update the Iframe upon Pasting



On Friday, May 31, 2013 8:58:44 AM UTC-4, Carlos Sanchez wrote:
I would also like for a way to update the Iframe.. This solution was gold none the less!
Reply all
Reply to author
Forward
0 new messages