Knol will be unavailable during scheduled maintenance starting at Mon, 09 Nov 2009 18:30:00 GMT. We expect the maintenance to be completed at Mon, 09 Nov 2009 20:00:00 GMT.
Version: Baidi441

JavaScript: HTML format links in text

dynamic link creation from plaintext

/*
* hyperlink replaces all text links within a passed text string to html links
* Note: it's better to do this as a one time filter when passing the data into the database
* rather than filtering every time the UI is rendered. But here it is:
* @param {string} txt, the text to format
* @return {string} the newly formatted text
*/


/*
 * hyperlink replaces all text links within a passed text string to html links
 * Note: it's better to do this as a one time filter when passing the data into the database
 * rather than filtering every time the UI is rendered. But here it is:
 * @param {string} txt, the text to format
 * @return {string} the newly formatted text
*/
hyperlink = function(txt) {
    var regUrl = /(^|[^>\"\/])(http:\/\/|www\.)(?:[^\"])\S*([\s\)\!]|$)/gi;
    /*
    Regex breakdown for regUrl:
    globally search for URLs within text that are not already attributes of tags
    /^|[^>\"\/]/ match start of txt or a character that is not a quote or > (avoid matching a/img links: <a href="http://...">http://...</a>) and do not match http://www as a duplicate of matching http://
    /(http:\/\/|www\.)/ can begin with 'http://' or 'wwww.'
    /\S*([\s\)\!]|$)/ match all non-whitespace characters until reach a space, closing parenthetical, exclamation or end of text
   
    // match captures the following, attribed to values in the replace function:
    // $1 = $href:     full match (link plus first and last char)
    // $2 = $start:    first character (not a quote, not part of the URL)
    // $3 = $urlStart: 'http://' or 'www.'
    // $4 = $end:      concluding character (' ',')','!','')
    // $5 = $pos:      position of match in txt
    // $6 = $txt:      full txt parsed
    */
    var aTag = '<a href="{0}">{1}</a>';
    var regUrlTail = /[\!\)\]\.\?]+$/g; // capture trailing non-url, non-space characters from the end of a string.
   
    txt = txt.replace(regUrl, function($href,$start,$urlStart,$end,$pos,$txt){
        if(!$href) // no match
            return ''; // nothing to replace
        if($start) // a character (not just the begining of the txt)
            $href=$href.substr(1,$href.length-1); // remove start character
        if($end) // end will be one character but might include an extra trailing characters (such as '!!!')
            $href=$href.substr(0,$href.length-1); // remove end character
        // capture trailing non-space, non-url characters (matched as part of \S*
        var trail = $href.match(regUrlTail);
        if(trail) $href = $href.replace(regUrlTail,''); // strip trail from link
        if($href.search(/http/i)!=0) $href = 'http://' + $href; // must start with http
        var lnk = ellipsis($href,45); // keep visible link short
        return $start + String.format(aTag,stripMal($href),stripMal(lnk)) + (trail?trail[0]:'') + $end; // add the start and trail+end back on
    });
    return txt;
};

/*
* stripMal strips all malicious script injection code
* @param txt {string} is the text to filter
* @return {string} text stripped of any malicious script declaraion ('javascript:', 'vbscript:', etc...)
*/

stripMal = function(txt) {
    return txt.replace(/(?:java|vb)?(?:script|data):/gi,'');
};

Comments

Adam Eivy
Adam Eivy
Web Development at ShadowPuppet, LLC
Seattle
Article rating:
Your rating:

Activity for this knol

This week:

16pageviews

Totals:

663pageviews