Lucene的使用方法与Luke工具(2)
文章目录
第2章 Lucene快速入门
2.1 项目搭建
2.1.1 SQL语句
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for goods
-- ----------------------------
DROP TABLE IF EXISTS `goods`;
CREATE TABLE `goods` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
`name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '商品名称',
`title` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '标题',
`price` decimal(10, 2) NULL DEFAULT NULL COMMENT '价格',
`pic` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '图片',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 29 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of goods
-- ----------------------------
INSERT INTO `goods` VALUES (1, '华为nova 5i Pro', '华为 HUAWEI nova 5i Pro 前置3200万人像超级夜景4800万AI四摄极点全面屏6GB+128GB翡冷翠全网通双4G手机', 2199.00, 'https://img12.360buyimg.com/n5/jfs/t1/57784/26/5843/534057/5d39087fEb9cd66b7/d66c941633b410dd.jpg');
INSERT INTO `goods` VALUES (2, '华为荣耀20', ' 荣耀20 4800万超广角AI四摄 3200W美颜自拍 麒麟Kirin980全网通版8GB+128GB 冰岛白 移动联通电信4G全面屏', 2699.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t28603/102/1236695962/227407/29d12d49/5ce41500N146e357e.jpg');
INSERT INTO `goods` VALUES (3, '华为 HUAWEI nova 5 Pro', '华为 HUAWEI nova 5 Pro 前置3200万人像超级夜景4800万AI四摄麒麟980芯片8GB+128GB绮境森林全网通双4G手机', 2999.00, 'https://img13.360buyimg.com/n5/s54x54_jfs/t1/47193/2/3369/231278/5d11cb39Ef3674059/ba3c0a1d956429e2.jpg');
INSERT INTO `goods` VALUES (4, '小米 CC 9e', '小米 CC 9e屏幕指纹 3200万美颜自拍 4800万超广角三摄 6GB+64GB 深蓝星球 全网通 水滴全面屏游戏拍照手机', 1349.00, 'https://img10.360buyimg.com/n5/s54x54_jfs/t1/40290/27/11774/242367/5d2d4b7dE2e3fac58/4006d5155fec96cd.jpg');
INSERT INTO `goods` VALUES (5, '小米红米Note7', '小米 红米Redmi Note7 幻彩渐变AI双摄 6GB+64GB 梦幻蓝 全网通4G 双卡双待 水滴全面屏拍照游戏智能手机', 1299.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t1/25067/14/4953/146200/5c371c8dE0999a312/d44bdb1c5e5f709a.jpg');
INSERT INTO `goods` VALUES (6, '小米 红米Redmi Note7Pro', '小米 红米Redmi Note7Pro AI双摄 6GB+128GB 梦幻蓝 全网通4G 双卡双待 水滴屏拍照游戏手机', 1499.00, 'https://img10.360buyimg.com/n5/s54x54_jfs/t1/22746/35/11162/104085/5c8b6a19Eb8e8f34e/9cd57e3a481c7160.jpg');
INSERT INTO `goods` VALUES (7, '小米9 SE', '小米9 SE 4800万超广角三摄 骁龙712 水滴全面屏 游戏智能拍照手机 8GB+128GB 全息幻彩蓝 全网通4G双卡双待', 2249.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t1/42543/32/5265/77884/5cebcaf1Ec3ac2ab6/1fd1f6fab7811447.jpg');
INSERT INTO `goods` VALUES (8, '魅族 16s', '魅族 16s 骁龙855全面屏拍照游戏手机 6GB+128GB 碳纤黑 全网通移动联通电信4G手机 双卡双待', 2699.00, 'https://img13.360buyimg.com/n5/s54x54_jfs/t1/32804/25/14881/277224/5cbf0a08Ecfe815f9/2351cebef4cbe443.jpg');
INSERT INTO `goods` VALUES (9, '魅族16Xs', '魅族16Xs 全面屏三摄拍照游戏手机 6GB+128GB骑士黑 4000mAh大电池全网通移动联通电信4G智能手机 双卡双待', 1699.00, 'https://img14.360buyimg.com/n5/s54x54_jfs/t1/47393/33/1179/77607/5cef4f50E2e0fd9e6/2835154527d724e7.jpg');
INSERT INTO `goods` VALUES (10, '魅族 Note8', '魅族 Note8 全面屏手机 4GB+64GB 曜黑 全网通移动联通电信4G手机 双卡双待', 899.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t1/2481/15/12216/274259/5bd1af8bE2de8c15f/c56a6788061f4d46.jpg');
INSERT INTO `goods` VALUES (11, '神舟战神', '神舟(HASEE)战神Z7-CT5NA 英特尔酷睿i5-9300H GTX1660Ti 6G独显15.6英寸窄边框游戏笔记本电脑(8G 512G SSD)', 5989.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t28834/286/1327928640/227342/e6558c29/5cdd0695Nb1405cc3.jpg');
INSERT INTO `goods` VALUES (12, '神舟精盾', '神舟(HASEE)精盾U45A1畅玩版 英特尔酷睿i5-8265U 14英寸窄边框轻薄笔记本电脑8G 512GPCIe SSD MX250 2G IPS', 3989.00, 'https://img10.360buyimg.com/n5/s54x54_jfs/t1/53197/35/2666/156443/5d07d3f8E3b647e9a/3aa8dcb79c66c33d.jpg');
INSERT INTO `goods` VALUES (13, '神舟(HASEE) 优雅XS-3000S1/X4-KL7S1 I7-7500U X3G1商务本 X1 X4-KL7S1I7-7500U/16/256+1', '神舟(HASEE) 优雅XS-3000S1/X4-KL7S1 I7-7500U X3G1商务本 X1 X4-KL7S1I7-7500U/16/256+1', 5199.00, 'https://img12.360buyimg.com/n5/jfs/t1/44399/21/3002/73822/5ccd2f9aE69ddb0b9/c6dbb67d5aa0ce3a.jpg');
INSERT INTO `goods` VALUES (14, '神舟战神', '神舟(HASEE)战神G8-CT7NK 英特尔酷睿i7-9750H RTX2060 72%色域144Hz17.3英寸游戏笔记本电脑16G 256GSSD+1T', 8489.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t29797/23/1432190332/154288/3b194518/5cdf7017N86940fe7.jpg');
INSERT INTO `goods` VALUES (15, '神舟战神Z7M', '神舟(HASEE)战神Z7M-KP7GZ 英特尔酷睿i7-8750H GTX1050Ti 15.6英寸72%色域商务设计师游戏本(8G 128G+1T)', 5389.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t1/32189/20/11013/145627/5cb40149E7b953a44/4e64910f84c4648e.jpg');
INSERT INTO `goods` VALUES (16, '华为MateBook X Pro', '华为HUAWEI MateBook X Pro 2019款 英特尔酷睿i7 13.9英寸全面屏轻薄笔记本(i7 16G 1T MX250 3K触控) 灰', 13999.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t1/38530/28/1693/191138/5cbdc69aE65d57b0e/7748fdd0fcd0563f.jpg');
INSERT INTO `goods` VALUES (17, '华为MateBook 14', '华为HUAWEI MateBook 14 全面屏轻薄性能笔记本电脑(英特尔酷睿i5 8G 512G MX250 office 2K 一碰传)灰', 5999.00, 'https://img10.360buyimg.com/n5/jfs/t1/16018/40/13941/131110/5ca3286cE7bbb1c23/2c0a1de00945af08.jpg');
INSERT INTO `goods` VALUES (18, '小米pro', '小米(MI)Pro 2019款 15.6英寸金属轻薄笔记本(第八代英特尔酷睿i7-8550U 16G 512GSSD MX250 2G独显) 深空灰', 7199.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t1/83950/36/557/45350/5cebfa41E02cee56c/8bc3f60029d6c5a0.jpg');
INSERT INTO `goods` VALUES (19, '小米(MI)RedmiBook 14英寸MX250 2G独显 全金属超轻薄便携红米游戏本笔记本电脑 i5-8265U 8G 512G MX250独显', '小米(MI)RedmiBook 14英寸MX250 2G独显 全金属超轻薄便携红米游戏本笔记本电脑 i5-8265U 8G 512G MX250独显', 3999.00, 'https://img11.360buyimg.com/n5/jfs/t1/63604/40/808/33300/5cf087e6E4edd35f3/24a842fd24539ef4.jpg');
INSERT INTO `goods` VALUES (20, '联想小新Air14英寸轻薄超极笔记本电脑满血版四核办公游戏本 i7-8565 20G 1TB MX250 定制银 正版win10+office', '联想小新Air14英寸轻薄超极笔记本电脑满血版四核办公游戏本 i7-8565 20G 1TB MX250 定制银 正版win10+office', 7099.00, 'https://img14.360buyimg.com/n5/jfs/t1/82062/36/2718/34167/5d0f2cacE0586f769/e70326356821455f.jpg');
INSERT INTO `goods` VALUES (21, '联想(Lenovo)小新air', '联想轻薄本小新Air14超薄笔记本电脑窄边框镜面屏带指纹背光学生办公设计独显超极本air pro升级 15.6英寸尊贵银 镜面屏 定制i7-8550U 16G 1T固态', 8999.00, 'https://img10.360buyimg.com/n5/jfs/t1/31412/29/6259/54365/5c8cbd9eEfa4efb6f/fa51739fc476d503.jpg');
INSERT INTO `goods` VALUES (22, '华硕(ASUS)灵耀Deluxe14 14.0英寸 92%全面屏 轻薄笔记本电脑超薄笔记本 冰钻银 i7-8565U 8G 512G固态 92%屏占比', '华硕(ASUS)灵耀Deluxe14 14.0英寸 92%全面屏 轻薄笔记本电脑超薄笔记本 冰钻银 i7-8565U 8G 512G固态 92%屏占比', 7599.00, 'https://img10.360buyimg.com/n5/jfs/t1/14412/14/13290/64393/5c9e39e1E725e62ea/ff96d311347a88d6.jpg');
INSERT INTO `goods` VALUES (23, '华为荣耀笔记本MagicBook 2019超薄本14英寸轻薄本学生商务办公便携手提笔记本电脑超极本 2019版R7+8G+512G PCIE银', '华为荣耀笔记本MagicBook 2019超薄本14英寸轻薄本学生商务办公便携手提笔记本电脑超极本 2019版R7+8G+512G PCIE银', 4999.00, 'https://img13.360buyimg.com/n5/jfs/t1/27680/33/15329/41379/5cb05173E9805dccb/eb8ae8ff323e2020.jpg');
INSERT INTO `goods` VALUES (24, '惠普(HP)星14青春版 笔记本超轻薄微窄边框锐龙R7游戏学生本金属商务办公手提电脑【官方新品】 星空银:R7-3700U【14寸 IPS高清屏】 配置五:8G/512G固态+1T', '惠普(HP)星14青春版 笔记本超轻薄微窄边框锐龙R7游戏学生本金属商务办公手提电脑【官方新品】 星空银:R7-3700U【14寸 IPS高清屏】 配置五:8G/512G固态+1T', 4699.00, 'https://img14.360buyimg.com/n5/jfs/t1/19237/21/13795/81184/5ca21fb5E5e21d713/c23f316c1f24db77.jpg');
INSERT INTO `goods` VALUES (25, 'ThinkPad联想游侠E485(0HCD) 14英寸轻薄商务办公锐龙7笔记本 升配(16G 256G固态+1T双硬盘 R7-2700u FHD屏 office)', 'ThinkPad联想游侠E485(0HCD) 14英寸轻薄商务办公锐龙7笔记本 升配(16G 256G固态+1T双硬盘 R7-2700u FHD屏 office)', 6999.00, 'https://img14.360buyimg.com/n5/jfs/t1/15146/30/9562/83903/5c80d3f8Eb1cee110/720a999eedd964a2.jpg');
INSERT INTO `goods` VALUES (26, '戴尔G3', '戴尔DELL游匣G3 15.6英寸轻薄游戏笔记本电脑(九代i7-9750H 8G双通道 128GSSD 1T GTX1660TiMax-Q 6G 72色域)', 7099.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t1/71785/30/5471/166703/5d39293bEd8fce664/48f30488a9ec0ad4.jpg');
INSERT INTO `goods` VALUES (27, '机械革命轻薄游戏笔记本电脑', '机械革命(MECHREVO)Z2 Air i7 15.6英寸轻薄游戏笔记本电脑(i7-9750H 8G 512G SSD GTX1650 72%高色域)', 6089.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t1/61544/27/2452/146827/5d0b55feE229d5224/c7d799230f50f3be.jpg');
INSERT INTO `goods` VALUES (28, '东标方准笔记本电脑', '东标牌商务笔记本,价格实惠!', 9888.00, 'http://www.dongfangbiaozhun.com/picture/logo.gif');
SET FOREIGN_KEY_CHECKS = 1;
2.1.2 maven依赖
<dependencies>
<!--Lucene核心包-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>8.0.0</version>
</dependency>
<!--IK分词器-->
<dependency>
<groupId>com.github.magese</groupId>
<artifactId>ik-analyzer</artifactId>
<version>8.0.0</version>
</dependency>
<!--MySQL驱动-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
</dependency>
<!--测试单元-->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<!--lombok-->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.18</version>
</dependency>
</dependencies>
2.1.3 实体类:
package com.dfbz.entity;
@AllArgsConstructor
@NoArgsConstructor
@Data
public class Goods {
private Integer id;
private String name;
private String title;
private Double price;
private String pic;
}
2.1.4 编写DAO:
package com.dfbz.dao;
import com.dfbz.entity.Goods;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
public class GoodsDao {
public List<Goods> findAll() {
try {
Class.forName("com.mysql.jdbc.Driver");
Connection conn = DriverManager.getConnection("jdbc:mysql:///lucene_db", "root", "admin");
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery("select * from goods");
List<Goods> goodsList=new ArrayList<>();
while (rs.next()){
Integer id=rs.getInt("id");
String name = rs.getString("name");
String title = rs.getString("title");
Double price = rs.getDouble("price");
String pic = rs.getString("pic");
Goods goods=new Goods();
goods.setId(id);
goods.setName(name);
goods.setTitle(title);
goods.setPrice(price);
goods.setPic(pic);
goodsList.add(goods);
}
conn.close();
return goodsList;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
2.2 建立索引
2.2.1 步骤:
- 读取原始数据(从数据库读取)
- 创建文档对象(Document)、域对象(Field)。并把域对象添加到文档对象中
- 创建分析器(Analyzer),用于分词
- 创建索引库配置对象(IndexWriterConfig),配置索引库(传入分析器)
- 设置索引库打开方式(OpenModel)
- 创建索引库目录对象(Directory),指定索引库的目录
- 创建索引库操作对象(IndexWriter),用于把文档写入索引库中
- 释放资源(close)
2.2.2 实现代码:
package com.dfbz.demo01_lucene入门;
import com.dfbz.dao.GoodsDao;
import com.dfbz.entity.Goods;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
/**
* @author lscl
* @version 1.0
* @intro:
*/
public class Demo01_createIndex {
GoodsDao goodsDao = new GoodsDao();
@Test
public void test1() throws Exception {
List<Goods> goodsList = goodsDao.findAll();
List<Document> docs = new ArrayList<>();
for (Goods goods : goodsList) {
// 创建一篇文档
Document doc = new Document();
// 添加域
doc.add(new StringField("id", goods.getId() + "", Field.Store.YES));
doc.add(new TextField("name", goods.getName(), Field.Store.YES));
doc.add(new TextField("title", goods.getTitle(), Field.Store.YES));
doc.add(new DoublePoint("price", goods.getPrice()));
doc.add(new StoredField("pic", goods.getPic()));
docs.add(doc);
}
// 创建分析器
Analyzer analyzer = new IKAnalyzer();
// 创建索引库配置对象
IndexWriterConfig config = new IndexWriterConfig(analyzer);
/*
CREATE: 每次运行程序都会删除索引库
APPEND: 不会删除索引库,追加本次程序的内容
CREATE_OR_APPEND:如果没有索引库则创建,如果有所以库追加
*/
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
// 打开索引库(设置索引库的位置)
FSDirectory dir = FSDirectory.open(Paths.get("D:/index"));
// 索引操作对象
IndexWriter indexWriter = new IndexWriter(dir, config);
// 添加文档
indexWriter.addDocuments(docs);
// 释放资源
indexWriter.close();
dir.close();
}
}
- 查看索引目录:
2.3 Luke工具
Luke是一个用于Lucene搜索引擎的,方便开发和诊断的第三方工具,它可以访问现有Lucene的索引,并允许您显示和修改。
Luke的Github官网:https://github.com/DmitryKey/luke
Luke下载地址:https://github.com/DmitryKey/luke/tags
注意:Luke的版本必须和Lucene的版本一致!我们本次采用的版本是8.0.0
2.3.1 运行界面介绍:
1)主界面
2)文档界面:
3)搜索界面:
4)文本分析界面
2.3.2 添加扩展词
扩展IK词:“东标方准”:
将数据库中的数据减少,方便观察效果:
运行代码,未扩展词库之前:
扩展词库之后:
2.3.3 添加停用词
在数据库中添加一条记录:
INSERT INTO `goods` VALUES (29, '是很好的手机啊', '是很好的手机啊', 9999.9, 'https://www.baidu.com/favicon.ico');
添加停用词,运行测试代码;
Tips:停用词文件中的第一行不会被Lucene读取到,我们自己的停用词从第二行开始编写
未添加前:啊、是、的
添加停用词之后:
2.4 检索索引
2.4.1 步骤:
- 创建索引库目录对象(Directory),指定索引库目录
- 创建索引库读取对象(IndexReader),指定把索引库数据读取到内存中
- 创建索引库搜索对象(IndexSearcher),用于搜索索引库
- 创建分词器(Analyzer),用于搜索条件分词
- 创建查询解析器(QueryParse),传入分词器并指定查询的域
- 创建查询对象(Query),指定查询条件
- 使用索引库搜索对象(IndexSearcher)执行搜索,返回搜索结果(TopDocs)
- 处理结果集
- 关闭资源(close)
2.4.2 代码实现
package com.dfbz.demo01;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.nio.file.Paths;
/**
* @author lscl
* @version 1.0
* @intro:
*/
public class Demo02_QueryIndex {
@Test
public void test1() throws Exception {
// 1. 打开索引库
FSDirectory dir = FSDirectory.open(Paths.get("D:/index"));
// 2. 创建索引库读取对象
IndexReader reader = DirectoryReader.open(dir);
// 3. 创建索引搜索对象
IndexSearcher searcher = new IndexSearcher(reader);
// 4. 创建分析器
Analyzer analyzer = new IKAnalyzer();
// 5. 创建查询解析器
QueryParser queryParser = new QueryParser("title", analyzer);
// 6. 执行查询
Query query = queryParser.parse("梦幻蓝");
// 7. 开搜索
TopDocs topDocs = searcher.search(query, 10);
// 查询的条数
TotalHits totalHits = topDocs.totalHits;
System.out.println("共查询到【" + totalHits.value + "】篇文档");
// 包含了每篇文档的分值和id
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
// 文档的id
int docId = scoreDoc.doc;
// 文档的分值
float score = scoreDoc.score;
Document doc = searcher.doc(docId);
String id = doc.get("id");
String name = doc.get("name");
String title = doc.get("title");
String price = doc.get("price");
String pic = doc.get("pic");
System.out.println("匹配分值: " + score);
System.out.println("商品id: " + id);
System.out.println("商品名称: " + name);
System.out.println("商品标题: " + title);
System.out.println("商品价格: " + price);
System.out.println("商品图片: " + pic);
System.out.println("-------------------");
}
dir.close();
reader.close();
}
}