关联链接
关联链接是指在文章内容中找到“关联链接名称”加上“关联链接网址”.
如,我们添加一个“xtgxiso”关联到“http://www.xtgxiso.cn”,在发布文章的时候如果出现”xtgxiso“这个关联词,就替换成 < a target=”_blank” href=”http://www.xtgxiso.cn”>xtgxiso</a>
一般的思路应该是这样的:
1:后台来管理这个关联链接
2:前台在发布或显示的时候,根据关键链接和内容逐个匹配,然后返回生成后的内容。
关键是:如何高效的匹配关联链接。
先来说说phpcms的方法:
1:所有的关联链接在后台中管理。且数据在缓存文件中有一份。
2:前台匹配
<!--?php
class member_output {
var $fields;
var $data;
function __construct($modelid,$catid = 0,$categorys = array()) {
$this--->modelid = $modelid;
$this->catid = $catid;
$this->categorys = $categorys;
$this->fields = getcache('model_field_'.$modelid,'model');
}
function get($data) {
$this->data = $data;
$this->id = $data['id'];
$info = array();
foreach($this->fields as $field=>$v) {
if(!isset($data[$field])) continue;
$func = $v['formtype'];
$value = $data[$field];
$result = method_exists($this, $func) ? $this->$func($field, $data[$field]) : $data[$field];
if($result !== false) $info[$field] = $result;
}
return $info;
}
function editor($field, $value) {
$setting = string2array($this->fields[$field]['setting']);
if($setting['enablekeylink']) {
$data = $this->_keylinks($value, $setting['replacenum'],$setting['link_mode']);
}
return $data;
}
function _base64_encode($t,$str) {
return $t."\"".base64_encode($str)."\"";
}
function _base64_decode($t,$str) {
return $t."\"".base64_decode($str)."\"";
}
function _keylinks($txt, $replacenum = '',$link_mode = 1) {
$txt = addslashes($txt);
$search = "/(alt\s*=\s*|title\s*=\s*)[\"|\'](.+?)[\"|\']/ise";
$replace = "$this->_base64_encode('\\1','\\2')";
$replace1 = "$this->_base64_decode('\\1','\\2')";
$txt = preg_replace($search, $replace, $txt);
$keywords = $this->data['keywords'];
if($keywords) $keywords = strpos(',',$keywords) === false ? explode(' ',$keywords) : explode(',',$keywords);
if($link_mode && !empty($keywords)) {
foreach($keywords as $keyword) {
$linkdatas[] = $keyword;
}
} else {
//TODO
$linkdatas = array(
0 => array(0=>'网站',1=>'http://www.phpip.com'),
1 => array(0=>'百度',1=>'http://www.baidu.com'),
);
}
if($linkdatas) {
$word = $replacement = array();
foreach($linkdatas as $v) {
if($link_mode && $keywords) {
$word1[] = '/'.preg_quote($v, '/').'/';
$word2[] = $v;
$replacement[] = '<a href="javascript:;" class="keylink">'.$v.'</a>';
} else {
$word1[] = '/'.preg_quote($v[0], '/').'/';
$word2[] = $v[0];
$replacement[] = '<a href="'.$v[1].'" target="_blank" class="keylink">'.$v[0].'</a>';
}
}
if($replacenum != '') {
$txt = preg_replace($word1, $replacement, $txt, $replacenum);
} else {
$txt = str_replace($word2, $replacement, $txt);
}
}
$txt = preg_replace($search, $replace1, $txt);
$txt = stripslashes($txt);
return $txt;
}
function box($field, $value) {
extract(string2array($this->fields[$field]['setting']));
if($outputtype) {
return $value;
} else {
$options = explode("\n",$this->fields[$field]['options']);
foreach($options as $_k) {
$v = explode("|",$_k);
$k = trim($v[1]);
$option[$k] = $v[0];
}
$string = '';
switch($this->fields[$field]['boxtype']) {
case 'radio':
$string = $option[$value];
break;
case 'checkbox':
$value_arr = explode(',',$value);
foreach($value_arr as $_v) {
if($_v) $string .= $option[$_v].' 、';
}
break;
case 'select':
$string = $option[$value];
break;
case 'multiple':
$value_arr = explode(',',$value);
foreach($value_arr as $_v) {
if($_v) $string .= $option[$_v].' 、';
}
break;
}
return $string;
}
}
function images($field, $value) {
return string2array($value);
}
function datetime($field, $value) {
$setting = string2array($this->fields[$field]['setting']);
extract($setting);
if($fieldtype=='date') {
$format_txt = 'Y-m-d';
} elseif($fieldtype=='datetime') {
$format_txt = 'Y-m-d H:i:s';
} else {
$format_txt = $format;
}
if(strlen($format_txt)<6) {
$isdatetime = 0;
} else {
$isdatetime = 1;
}
if(!$value) $value = SYS_TIME;
$value = date($format_txt,$value);
return $value;
}
function linkage($field, $value) {
$setting = string2array($this->fields[$field]['setting']);
$datas = getcache($setting['linkageid'],'linkage');
$infos = $datas['data'];
if($setting['showtype']==1) {
$result = $this->_get_parent($value, $setting['linkageid'], $setting['space']);
} elseif($setting['showtype']==2) {
$result = $value;
} else {
$result = $infos[$value]['name'];
}
return $result;
}
function _get_parent($linkageid, $keyid, $space = '>', $result = array(), $infos = array()) {
if($space=='' || !isset($space))$space = '>';
if(!$infos) {
$datas = getcache($keyid,'linkage');
$infos = $datas['data'];
}
if(array_key_exists($linkageid,$infos)) {
$result[]=$infos[$linkageid]['name'];
return $this->_get_parent($infos[$linkageid]['parentid'], $keyid, $space, $result, $infos);
} else {
if(count($result)>0) {
krsort($result);
$result = implode($space,$result);
return $result;
}
else {
return $result;
}
}
}
}
?>
在方法 _keylinks 中可以明显看到。思想就是
1:先将不应该替换的替换成编码后的内容
2:遍历关联链接生成要替换的相应数据。
3:执行替换,返回数据。
再来说说discuz的方法:
1:和phpcms一样,所有的关联链接在后台中管理。且数据在缓存文件中有一份。
2:前台匹配
<!--?php
/**
* [Discuz!] (C)2001-2099 Comsenz Inc.
* This is NOT a freeware, use is subject to license terms
*
* $Id: helper_seo.php 32836 2013-03-14 08:10:02Z zhangguosheng $
*/
if(!defined('IN_DISCUZ')) {
exit('Access Denied');
}
class helper_seo {
public static function get_seosetting($page, $data = array(), $defset = array()) {
global $_G;
$searchs = array('{bbname}');
$replaces = array($_G['setting']['bbname']);
$seotitle = $seodescription = $seokeywords = '';
$titletext = $defset['seotitle'] ? $defset['seotitle'] : $_G['setting']['seotitle'][$page];
$descriptiontext = $defset['seodescription'] ? $defset['seodescription'] : $_G['setting']['seodescription'][$page];
$keywordstext = $defset['seokeywords'] ? $defset['seokeywords'] : $_G['setting']['seokeywords'][$page];
preg_match_all("/\{([a-z0-9_-]+?)\}/", $titletext.$descriptiontext.$keywordstext, $pageparams);
if($pageparams) {
foreach($pageparams[1] as $var) {
$searchs[] = '{'.$var.'}';
if($var == 'page') {
$data['page'] = $data['page'] --> 1 ? lang('core', 'page', array('page' => $data['page'])) : '';
}
$replaces[] = $data[$var] ? strip_tags($data[$var]) : '';
}
if($titletext) {
$seotitle = helper_seo::strreplace_strip_split($searchs, $replaces, $titletext);
}
if($descriptiontext && (isset($_G['makehtml']) || CURSCRIPT == 'forum' || IS_ROBOT || $_G['adminid'] == 1)) {
$seodescription = helper_seo::strreplace_strip_split($searchs, $replaces, $descriptiontext);
}
if($keywordstext && (isset($_G['makehtml']) || CURSCRIPT == 'forum' || IS_ROBOT || $_G['adminid'] == 1)) {
$seokeywords = helper_seo::strreplace_strip_split($searchs, $replaces, $keywordstext);
}
}
return array($seotitle, $seodescription, $seokeywords);
}
public static function strreplace_strip_split($searchs, $replaces, $str) {
$searchspace = array('((\s*\-\s*)+)', '((\s*\,\s*)+)', '((\s*\|\s*)+)', '((\s*\t\s*)+)', '((\s*_\s*)+)');
$replacespace = array('-', ',', '|', ' ', '_');
return trim(preg_replace($searchspace, $replacespace, str_replace($searchs, $replaces, $str)), ' ,-|_');
}
public static function get_title_page($navtitle, $page){
if($page > 1) {
$navtitle .= ' - '.lang('core', 'page', array('page' => $page));
}
return $navtitle;
}
public static function get_related_link($extent) {
global $_G;
loadcache('relatedlink');
$allextent = array('article' => 0, 'forum' => 1, 'group' => 2, 'blog' => 3);
$links = array();
if($_G['cache']['relatedlink'] && isset($allextent[$extent])) {
foreach($_G['cache']['relatedlink'] as $link) {
$link['extent'] = sprintf('%04b', $link['extent']);
if($link['extent'][$allextent[$extent]] && $link['name'] && $link['url']) {
$links[] = daddslashes($link);
}
}
}
rsort($links);
return $links;
}
public static function parse_related_link($content, $extent) {
global $_G;
loadcache('relatedlink');
$allextent = array('article' => 0, 'forum' => 1, 'group' => 2, 'blog' => 3);
if($_G['cache']['relatedlink'] && isset($allextent[$extent])) {
$searcharray = $replacearray = array();
foreach($_G['cache']['relatedlink'] as $link) {
$link['extent'] = sprintf('%04b', $link['extent']);
if($link['extent'][$allextent[$extent]] && $link['name'] && $link['url']) {
$searcharray[$link[name]] = '/('.preg_quote($link['name']).')/i';
$replacearray[$link[name]] = "<a href="\"$link[url]\"" target="\"_blank\"" class="\"relatedlink\"">$link[name]</a>";
}
}
if($searcharray && $replacearray) {
$_G['trunsform_tmp'] = array();
$content = preg_replace("/(<script\s+.*?>.*?<\/script>)|(<a\s+.*?>.*?<\/a>)|(<img\s+.*?[\ ]?="">)|(\[attach\](\d+)\[\/attach\])/ies", "helper_seo::base64_transform('encode', '', '\\1\\2\\3\\4', '')", $content);
$content = preg_replace($searcharray, $replacearray, $content, 1);
$content = preg_replace("/(.*?)<\/relatedlink>/ies", "helper_seo::base64_transform('decode', '', '\\1', '')", $content);
}
}
return $content;
}
public static function base64_transform($type, $prefix, $string, $suffix) {
global $_G;
if($type == 'encode') {
$_G['trunsform_tmp'][] = base64_encode(str_replace("\\\"", "\"", $string));
return $prefix.(count($_G['trunsform_tmp']) - 1).$suffix;
} elseif($type == 'decode') {
return $prefix.base64_decode($_G['trunsform_tmp'][$string]).$suffix;
}
}
}
?>
再代码中我们再次看到,思想都是一样的。只是正则稍微不同而已。以上两种都没有考虑到如果关联链接如果多的话性能的问题.discuz思路是可以在客户端生成来缓解服务器的压力,但同样量大的话,客户端同样也会卡死的。
再来说说自己的小思路,问题的关键是如何快速的匹配出一个内容里究竟包含几个关联链接
现提供一个小sql就可以轻松解决,测试十几W条关联链接没什么大问题,是一个小而快的解决方案,大家有更的方法欢迎交流.
select id,keyword,url from fmb_keywordlink where INSTR(‘张玉珊修身堂(长宁龙之梦店)aaaBodyConcept普拉提’,shop_name);
including hip tunics
snooki weight loss Fashion and Makeup Tips for Brunette Girls
gay pornStarting Your Clothing Line Business in 8 Steps
