golang全文搜索--使用sphinx
不多廢話,測試環(huán)境 `ubuntu 13.10`
## 安裝
sudo apt-get install sphinxsearch
## 配置
nano /etc/sphinxsearch/sphinx.conf
# 數(shù)據(jù)源配置
source default
{
type = xmlpipe2
xmlpipe_command = /path/xmlpipe2
xmlpipe_fixup_utf8 = 1
}
# 索引配置
index default
{
type = plain
source = default
# 索引文件路徑
path = /path/filename
# 不存儲文檔信息
docinfo = none
#最小索引詞長度
min_word_len = 2
charset_type = utf-8
# 指定utf-8的編碼表
charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
# 簡單分詞,只支持0和1,如果要搜索中文,請指定為1
ngram_len = 1
# 需要分詞的字符,如果要搜索中文,去掉前面的注釋
ngram_chars = U+3000..U+2FA1F
}
## xmlpipe2的格式
...
...
...
...
只要配置文件中 xmlpipe_command 字段配置的可執(zhí)行文件輸出為相應(yīng)的xml流即可,這樣幾乎適配了所有數(shù)據(jù)源
## 生成索引
$ indexer default
Sphinx 2.0.4-release (r3135)
Copyright (c) 2001-2012, Andrew Aksyonoff
Copyright (c) 2008-2012, Sphinx Technologies Inc (http://sphinxsearch.com)
using config file '/etc/sphinxsearch/sphinx.conf'...
indexing index 'default'...
WARNING: Attribute count is 0: switching to none docinfo
WARNING: collect_hits: mem_limit=0 kb too low, increasing to 12288 kb
collected 4 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 4 docs, 47 bytes
total 0.000 sec, 54970 bytes/sec, 4678.36 docs/sec
total 2 reads, 0.000 sec, 0.0 kb/call avg, 0.0 msec/call avg
total 6 writes, 0.000 sec, 0.0 kb/call avg, 0.0 msec/call avg
## 查詢
$ search 55
Sphinx 2.0.4-release (r3135)
Copyright (c) 2001-2012, Andrew Aksyonoff
Copyright (c) 2008-2012, Sphinx Technologies Inc (http://sphinxsearch.com)
using config file '/etc/sphinxsearch/sphinx.conf'...
index 'default': query '55 ': returned 1 matches of 1 total in 0.000 sec
displaying matches:
1. document=233221, weight=1695
words:
1. '55': 1 documents, 1 hits
## 配置searchd
編輯sphinx配置文件,添加:
## 監(jiān)聽地址
searchd
{
# 監(jiān)聽地址(Unix socket)
listen = /var/log/searchd.sock
# 日志文件
log = /var/log/searchd.log
# 查詢?nèi)罩? query_log = /var/log/query.log
# 客戶端讀取超時時間
read_timeout = 5
# 客戶端請求超時時間
client_timeout = 3000
# PID 文件
pid_file = /var/log/searchd.pid
}
## 運行searchd
$ sudo searchd
Sphinx 2.0.4-release (r3135)
Copyright (c) 2001-2012, Andrew Aksyonoff
Copyright (c) 2008-2012, Sphinx Technologies Inc (http://sphinxsearch.com)
using config file '/etc/sphinxsearch/sphinx.conf'...
WARNING: compat_sphinxql_magics=1 is deprecated; please update your application and config
listening on UNIX socket /var/log/searchd.sock
precaching index 'default'
precached 1 indexes in 0.000 sec
驗證一下狀態(tài)
$ searchd --status
Sphinx 2.0.4-release (r3135)
Copyright (c) 2001-2012, Andrew Aksyonoff
Copyright (c) 2008-2012, Sphinx Technologies Inc (http://sphinxsearch.com)
using config file '/etc/sphinxsearch/sphinx.conf'...
searchd status
--------------
uptime: 7
connections: 1
maxed_out: 0
## golang客戶端
package main
import (
"github.com/yunge/sphinx"
"log"
)
func main() {
// 鏈接參數(shù)
opts := &sphinx.Options{
Socket: "/var/log/searchd.sock",
Timeout: 5000,
}
// 創(chuàng)建客戶端
spClient := &sphinx.Client{Options: opts}
if err := spClient.Error(); err != nil {
log.Fatal(err)
}
// 打開鏈接
if err := spClient.Open(); err != nil {
log.Fatal(err)
}
// 獲取實例信息
status, err := spClient.Status()
if err != nil {
log.Fatal(err)
}
for _, row := range status {
log.Printf("%20s:\t%s\n", row[0], row[1])
}
// 查詢
res, err := spClient.Query("33", "default", "Test Query()")
if err != nil {
log.Fatal(err)
}
log.Println(res)
}
輸出:
2013/12/05 01:14:55 uptime: 148
2013/12/05 01:14:55 connections: 2
2013/12/05 01:14:55 maxed_out: 0
2013/12/05 01:14:55 command_search: 0
2013/12/05 01:14:55 command_excerpt: 0
2013/12/05 01:14:55 command_update: 0
2013/12/05 01:14:55 command_keywords: 0
2013/12/05 01:14:55 command_persist: 2
2013/12/05 01:14:55 command_status: 2
2013/12/05 01:14:55 command_flushattrs: 0
2013/12/05 01:14:55 agent_connect: 0
2013/12/05 01:14:55 agent_retry: 0
2013/12/05 01:14:55 queries: 0
2013/12/05 01:14:55 dist_queries: 0
2013/12/05 01:14:55 query_wall: 0.000
2013/12/05 01:14:55 query_cpu: OFF
2013/12/05 01:14:55 dist_wall: 0.000
2013/12/05 01:14:55 dist_local: 0.000
2013/12/05 01:14:55 dist_wait: 0.000
2013/12/05 01:14:55 query_reads: OFF
2013/12/05 01:14:55 query_readkb: OFF
2013/12/05 01:14:55 query_readtime: OFF
2013/12/05 01:14:55 avg_query_wall: 0.000
2013/12/05 01:14:55 avg_query_cpu: OFF
2013/12/05 01:14:55 avg_dist_wall: 0.000
2013/12/05 01:14:55 avg_dist_local: 0.000
2013/12/05 01:14:55 avg_dist_wait: 0.000
2013/12/05 01:14:55 avg_query_reads: OFF
2013/12/05 01:14:55 avg_query_readkb: OFF
2013/12/05 01:14:55 avg_query_readtime: OFF
2013/12/05 01:14:55 &{[content] [] [] [] 1 1 0.001 [{33 1 1}] 0}
api參考 http://gowalker.org/github.com/yunge/sphinx
sphinx配置參考 http://www.coreseek.cn/docs/coreseek_4.1-sphinx_2.0.1-beta.html
浙公網(wǎng)安備 33010602011771號