关联链接
关联链接是指在文章内容中找到“关联链接名称”加上“关联链接网址”.
如,我们添加一个“xtgxiso”关联到“http://www.xtgxiso.cn”,在发布文章的时候如果出现”xtgxiso“这个关联词,就替换成 < a target=”_blank” href=”http://www.xtgxiso.cn”>xtgxiso</a>
一般的思路应该是这样的:
1:后台来管理这个关联链接
2:前台在发布或显示的时候,根据关键链接和内容逐个匹配,然后返回生成后的内容。
关键是:如何高效的匹配关联链接。
先来说说phpcms的方法:
1:所有的关联链接在后台中管理。且数据在缓存文件中有一份。
2:前台匹配
<!--?php class member_output { var $fields; var $data; function __construct($modelid,$catid = 0,$categorys = array()) { $this--->modelid = $modelid; $this->catid = $catid; $this->categorys = $categorys; $this->fields = getcache('model_field_'.$modelid,'model'); } function get($data) { $this->data = $data; $this->id = $data['id']; $info = array(); foreach($this->fields as $field=>$v) { if(!isset($data[$field])) continue; $func = $v['formtype']; $value = $data[$field]; $result = method_exists($this, $func) ? $this->$func($field, $data[$field]) : $data[$field]; if($result !== false) $info[$field] = $result; } return $info; } function editor($field, $value) { $setting = string2array($this->fields[$field]['setting']); if($setting['enablekeylink']) { $data = $this->_keylinks($value, $setting['replacenum'],$setting['link_mode']); } return $data; } function _base64_encode($t,$str) { return $t."\"".base64_encode($str)."\""; } function _base64_decode($t,$str) { return $t."\"".base64_decode($str)."\""; } function _keylinks($txt, $replacenum = '',$link_mode = 1) { $txt = addslashes($txt); $search = "/(alt\s*=\s*|title\s*=\s*)[\"|\'](.+?)[\"|\']/ise"; $replace = "$this->_base64_encode('\\1','\\2')"; $replace1 = "$this->_base64_decode('\\1','\\2')"; $txt = preg_replace($search, $replace, $txt); $keywords = $this->data['keywords']; if($keywords) $keywords = strpos(',',$keywords) === false ? explode(' ',$keywords) : explode(',',$keywords); if($link_mode && !empty($keywords)) { foreach($keywords as $keyword) { $linkdatas[] = $keyword; } } else { //TODO $linkdatas = array( 0 => array(0=>'网站',1=>'http://www.phpip.com'), 1 => array(0=>'百度',1=>'http://www.baidu.com'), ); } if($linkdatas) { $word = $replacement = array(); foreach($linkdatas as $v) { if($link_mode && $keywords) { $word1[] = '/'.preg_quote($v, '/').'/'; $word2[] = $v; $replacement[] = '<a href="javascript:;" class="keylink">'.$v.'</a>'; } else { $word1[] = '/'.preg_quote($v[0], '/').'/'; $word2[] = $v[0]; $replacement[] = '<a href="'.$v[1].'" target="_blank" class="keylink">'.$v[0].'</a>'; } } if($replacenum != '') { $txt = preg_replace($word1, $replacement, $txt, $replacenum); } else { $txt = str_replace($word2, $replacement, $txt); } } $txt = preg_replace($search, $replace1, $txt); $txt = stripslashes($txt); return $txt; } function box($field, $value) { extract(string2array($this->fields[$field]['setting'])); if($outputtype) { return $value; } else { $options = explode("\n",$this->fields[$field]['options']); foreach($options as $_k) { $v = explode("|",$_k); $k = trim($v[1]); $option[$k] = $v[0]; } $string = ''; switch($this->fields[$field]['boxtype']) { case 'radio': $string = $option[$value]; break; case 'checkbox': $value_arr = explode(',',$value); foreach($value_arr as $_v) { if($_v) $string .= $option[$_v].' 、'; } break; case 'select': $string = $option[$value]; break; case 'multiple': $value_arr = explode(',',$value); foreach($value_arr as $_v) { if($_v) $string .= $option[$_v].' 、'; } break; } return $string; } } function images($field, $value) { return string2array($value); } function datetime($field, $value) { $setting = string2array($this->fields[$field]['setting']); extract($setting); if($fieldtype=='date') { $format_txt = 'Y-m-d'; } elseif($fieldtype=='datetime') { $format_txt = 'Y-m-d H:i:s'; } else { $format_txt = $format; } if(strlen($format_txt)<6) { $isdatetime = 0; } else { $isdatetime = 1; } if(!$value) $value = SYS_TIME; $value = date($format_txt,$value); return $value; } function linkage($field, $value) { $setting = string2array($this->fields[$field]['setting']); $datas = getcache($setting['linkageid'],'linkage'); $infos = $datas['data']; if($setting['showtype']==1) { $result = $this->_get_parent($value, $setting['linkageid'], $setting['space']); } elseif($setting['showtype']==2) { $result = $value; } else { $result = $infos[$value]['name']; } return $result; } function _get_parent($linkageid, $keyid, $space = '>', $result = array(), $infos = array()) { if($space=='' || !isset($space))$space = '>'; if(!$infos) { $datas = getcache($keyid,'linkage'); $infos = $datas['data']; } if(array_key_exists($linkageid,$infos)) { $result[]=$infos[$linkageid]['name']; return $this->_get_parent($infos[$linkageid]['parentid'], $keyid, $space, $result, $infos); } else { if(count($result)>0) { krsort($result); $result = implode($space,$result); return $result; } else { return $result; } } } } ?>
在方法 _keylinks 中可以明显看到。思想就是
1:先将不应该替换的替换成编码后的内容
2:遍历关联链接生成要替换的相应数据。
3:执行替换,返回数据。
再来说说discuz的方法:
1:和phpcms一样,所有的关联链接在后台中管理。且数据在缓存文件中有一份。
2:前台匹配
<!--?php /** * [Discuz!] (C)2001-2099 Comsenz Inc. * This is NOT a freeware, use is subject to license terms * * $Id: helper_seo.php 32836 2013-03-14 08:10:02Z zhangguosheng $ */ if(!defined('IN_DISCUZ')) { exit('Access Denied'); } class helper_seo { public static function get_seosetting($page, $data = array(), $defset = array()) { global $_G; $searchs = array('{bbname}'); $replaces = array($_G['setting']['bbname']); $seotitle = $seodescription = $seokeywords = ''; $titletext = $defset['seotitle'] ? $defset['seotitle'] : $_G['setting']['seotitle'][$page]; $descriptiontext = $defset['seodescription'] ? $defset['seodescription'] : $_G['setting']['seodescription'][$page]; $keywordstext = $defset['seokeywords'] ? $defset['seokeywords'] : $_G['setting']['seokeywords'][$page]; preg_match_all("/\{([a-z0-9_-]+?)\}/", $titletext.$descriptiontext.$keywordstext, $pageparams); if($pageparams) { foreach($pageparams[1] as $var) { $searchs[] = '{'.$var.'}'; if($var == 'page') { $data['page'] = $data['page'] --> 1 ? lang('core', 'page', array('page' => $data['page'])) : ''; } $replaces[] = $data[$var] ? strip_tags($data[$var]) : ''; } if($titletext) { $seotitle = helper_seo::strreplace_strip_split($searchs, $replaces, $titletext); } if($descriptiontext && (isset($_G['makehtml']) || CURSCRIPT == 'forum' || IS_ROBOT || $_G['adminid'] == 1)) { $seodescription = helper_seo::strreplace_strip_split($searchs, $replaces, $descriptiontext); } if($keywordstext && (isset($_G['makehtml']) || CURSCRIPT == 'forum' || IS_ROBOT || $_G['adminid'] == 1)) { $seokeywords = helper_seo::strreplace_strip_split($searchs, $replaces, $keywordstext); } } return array($seotitle, $seodescription, $seokeywords); } public static function strreplace_strip_split($searchs, $replaces, $str) { $searchspace = array('((\s*\-\s*)+)', '((\s*\,\s*)+)', '((\s*\|\s*)+)', '((\s*\t\s*)+)', '((\s*_\s*)+)'); $replacespace = array('-', ',', '|', ' ', '_'); return trim(preg_replace($searchspace, $replacespace, str_replace($searchs, $replaces, $str)), ' ,-|_'); } public static function get_title_page($navtitle, $page){ if($page > 1) { $navtitle .= ' - '.lang('core', 'page', array('page' => $page)); } return $navtitle; } public static function get_related_link($extent) { global $_G; loadcache('relatedlink'); $allextent = array('article' => 0, 'forum' => 1, 'group' => 2, 'blog' => 3); $links = array(); if($_G['cache']['relatedlink'] && isset($allextent[$extent])) { foreach($_G['cache']['relatedlink'] as $link) { $link['extent'] = sprintf('%04b', $link['extent']); if($link['extent'][$allextent[$extent]] && $link['name'] && $link['url']) { $links[] = daddslashes($link); } } } rsort($links); return $links; } public static function parse_related_link($content, $extent) { global $_G; loadcache('relatedlink'); $allextent = array('article' => 0, 'forum' => 1, 'group' => 2, 'blog' => 3); if($_G['cache']['relatedlink'] && isset($allextent[$extent])) { $searcharray = $replacearray = array(); foreach($_G['cache']['relatedlink'] as $link) { $link['extent'] = sprintf('%04b', $link['extent']); if($link['extent'][$allextent[$extent]] && $link['name'] && $link['url']) { $searcharray[$link[name]] = '/('.preg_quote($link['name']).')/i'; $replacearray[$link[name]] = "<a href="\"$link[url]\"" target="\"_blank\"" class="\"relatedlink\"">$link[name]</a>"; } } if($searcharray && $replacearray) { $_G['trunsform_tmp'] = array(); $content = preg_replace("/(<script\s+.*?>.*?<\/script>)|(<a\s+.*?>.*?<\/a>)|(<img\s+.*?[\ ]?="">)|(\[attach\](\d+)\[\/attach\])/ies", "helper_seo::base64_transform('encode', '', '\\1\\2\\3\\4', '')", $content); $content = preg_replace($searcharray, $replacearray, $content, 1); $content = preg_replace("/(.*?)<\/relatedlink>/ies", "helper_seo::base64_transform('decode', '', '\\1', '')", $content); } } return $content; } public static function base64_transform($type, $prefix, $string, $suffix) { global $_G; if($type == 'encode') { $_G['trunsform_tmp'][] = base64_encode(str_replace("\\\"", "\"", $string)); return $prefix.(count($_G['trunsform_tmp']) - 1).$suffix; } elseif($type == 'decode') { return $prefix.base64_decode($_G['trunsform_tmp'][$string]).$suffix; } } } ?>
再代码中我们再次看到,思想都是一样的。只是正则稍微不同而已。以上两种都没有考虑到如果关联链接如果多的话性能的问题.discuz思路是可以在客户端生成来缓解服务器的压力,但同样量大的话,客户端同样也会卡死的。
再来说说自己的小思路,问题的关键是如何快速的匹配出一个内容里究竟包含几个关联链接
现提供一个小sql就可以轻松解决,测试十几W条关联链接没什么大问题,是一个小而快的解决方案,大家有更的方法欢迎交流.
select id,keyword,url from fmb_keywordlink where INSTR(‘张玉珊修身堂(长宁龙之梦店)aaaBodyConcept普拉提’,shop_name);
including hip tunics
snooki weight loss Fashion and Makeup Tips for Brunette Girls
gay pornStarting Your Clothing Line Business in 8 Steps