[学习笔记]IK分词器的学习
IK分词器有几种模式
# 测试分词器
POST /_analyze
{
"text":"黑马程序员学习java太棒了",
"analyzer": "standard"
}
# 测试分词器
POST /_analyze
{
"text":"黑马程序员学习java太棒了",
"analyzer": "ik_max_word"
}
# 测试分词器
POST /_analyze
{
"text":"黑马程序员学习java太棒了",
"analyzer": "ik_smart"
}
总结
ik_max_word最细切分, 占用内存多,但是分词多
ik_smart最少切分,占用内存少,但是分词少
standard中文没用
IK分词器如何拓展词条?如何停用词条
配置你的ext.dic和stopword.dic
ext.dic
mapping属性
文档操作
分词规则总结
CREATE TABLE `tb_hotel` (
`id` bigint(20) NOT NULL COMMENT '酒店id',
`name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '酒店名称',
`address` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '酒店地址',
`price` int(10) NOT NULL COMMENT '酒店价格',
`score` int(2) NOT NULL COMMENT '酒店评分',
`brand` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '酒店品牌',
`city` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '所在城市',
`star_name` varchar(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '酒店星级,1星到5星,1钻到5钻',
`business` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '商圈',
`latitude` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '纬度',
`longitude` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '经度',
`pic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '酒店图片',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Compact;
# 酒店的mapping
PUT /hotel
{
"mappings": {
"properties": {
"id":{
"type": "keyword"
},
"name":{
"type": "text",
"analyzer": "ik_max_word",
"copy_to": "all"
},
"address":{
"type":"keyword",
"index": false
},
"price":{
"type":"integer"
},
"score":{
"type":"integer"
},
"brand":{
"type": "keyword"
},
"city":{
"type":"keyword"
},
"star_name":{
"type": "keyword"
},
"business":{
"type": "keyword",
"copy_to": "all"
},
"location":{
"type":"geo_point"
},
"pic":{
"type": "keyword",
"index": false
},
"all":{
"type": "text",
"analyzer": "ik_max_word"
}
}
}
Java的测试代码
package cn.itcast.hotel.constants;
public class HotelIndexConstants {
public static final String MAPPING_TEMPLATE = "{\n" +
" \"mappings\": {\n" +
" \"properties\": {\n" +
" \"id\":{\n" +
" \"type\": \"keyword\"\n" +
" },\n" +
" \"name\":{\n" +
" \"type\": \"text\",\n" +
" \"analyzer\": \"ik_max_word\",\n" +
" \"copy_to\": \"all\"\n" +
" },\n" +
" \"address\":{\n" +
" \"type\":\"keyword\",\n" +
" \"index\": false\n" +
" },\n" +
" \"price\":{\n" +
" \"type\":\"integer\"\n" +
" },\n" +
" \"score\":{\n" +
" \"type\":\"integer\"\n" +
" },\n" +
" \"brand\":{\n" +
" \"type\": \"keyword\"\n" +
" },\n" +
" \"city\":{\n" +
" \"type\":\"keyword\"\n" +
" },\n" +
" \"star_name\":{\n" +
" \"type\": \"keyword\"\n" +
" },\n" +
" \"business\":{\n" +
" \"type\": \"keyword\",\n" +
" \"copy_to\": \"all\"\n" +
" },\n" +
" \"location\":{\n" +
" \"type\":\"geo_point\"\n" +
" },\n" +
" \"pic\":{\n" +
" \"type\": \"keyword\",\n" +
" \"index\": false\n" +
" },\n" +
" \"all\":{\n" +
" \"type\": \"text\",\n" +
" \"analyzer\": \"ik_max_word\"\n" +
" }\n" +
" }\n" +
" }\n" +
" ";
}
package cn.itcast.hotel;
import cn.itcast.hotel.pojo.Hotel;
import cn.itcast.hotel.pojo.HotelDoc;
import cn.itcast.hotel.service.impl.HotelService;
import com.alibaba.fastjson.JSON;
import org.apache.http.HttpHost;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.common.xcontent.XContentType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.IOException;
import static cn.itcast.hotel.constants.HotelIndexConstants.MAPPING_TEMPLATE;
/**
* @author lst
* @date 2023年11月23日 13:38
*/
@SpringBootTest
public class HotelT {
private RestHighLevelClient restHighLevelClient;
@Autowired
HotelService hotelService;
@BeforeEach
public void before() {
restHighLevelClient = new RestHighLevelClient(RestClient.builder(HttpHost.create("http://localhost:9200")));
}
@AfterEach
void tearDown() throws IOException {
restHighLevelClient.close();
}
@Test
public void testCreateIndex() throws IOException {
CreateIndexRequest request = new CreateIndexRequest("hotel");
request.source(MAPPING_TEMPLATE, XContentType.JSON);
restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
}
@Test
public void testDeleteIndex() throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest("hotel");
restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);
}
}