前言
在项目开发过程中遇到这样的业务需求,在网上也找了许多资料,但是都比较复杂,需要花点时间去理解,用了各种方法踩坑之后,也请教了一下大佬ES方面的相关知识,最主要还是因为刚用ES不久,所以对ES的用法,数据结构什么的,都不是很熟悉,导致花了比较长的时间去实现这个业务需求,现在就对这个聚合搜索的具体实现代码列出来,供大家参考。
正文
ES索引的Mapping
{
"mappings": {
"properties": {
"aid": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"content": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"tagCode": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"tagValue": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"createDateTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fromModule": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fromWeb": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"html": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"publisher": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"releaseDateTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"title": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"updateDateTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"webCla": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
在一开始使用ES的时候,并不知道说ES对精准查询的要求是对应字段的数据类型需要是Keyword才可以,一开始创建的时候都用的Text类型,所以导致进行模糊查询匹配的时候,一直出现不符合的数据。
在需要做时间范围筛选或者排序的字段上,记得用上date类型,并且加上format,可以保证多种格式都能自动转义。
JAVA代码
pom依赖
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
Entity 类
package com.crawler.service.docment;
import com.baomidou.mybatisplus.annotation.TableField;
import com.crawler.common.base.IdEntity;
import com.crawler.common.constant.GlobalConstant;
import lombok.Data;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
import java.util.List;
@Data
@Document(indexName = "idx_article", type = GlobalConstant.TYPE_OR_FAMILY)
public class Article extends IdEntity {
@Field(type = FieldType.Keyword)
private String aid;
@Field(type = FieldType.Keyword)
private String title;
@Field(type = FieldType.Keyword, analyzer = "ik_max_word", searchAnalyzer = "ik_max_word")
private String content;
@Field(type = FieldType.Text)
private String publisher;
@Field(type = FieldType.Date, fielddata=true)
private String releaseDateTime;
@Field(type = FieldType.Text)
private String url;
@Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_max_word")
private String html;
@Field(type = FieldType.Keyword)
private String fromWebId;
@Field(type = FieldType.Text)
private String fromWeb;
@Field(type = FieldType.Keyword)
private String fromModuleId;
@Field(type = FieldType.Text)
private String fromModule;
@Field(type = FieldType.Date)
private String createDateTime;
@Field(type = FieldType.Date)
private String updateDateTime;
@Field(type = FieldType.Keyword)
private String webCla;
@Field(type = FieldType.Keyword)
private String tagCode;
@Field(type = FieldType.Keyword)
private String tagValue;
@TableField(exist = false)
private List<String> webClasses;
}
Repository类
就简单的继承了ElasticsearchRepository父类
package com.crawler.service.repository;
import com.crawler.service.docment.Article;
import com.crawler.service.entity.Company;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
import org.springframework.stereotype.Repository;
@Repository
public interface ArticleRepository extends ElasticsearchRepository<Article, String> {
}
Controller
@ApiOperation(value = "获取舆情新闻列表", notes = "获取公司基本信息列表")
@RequestMapping(value = "/listByKeyword", method = RequestMethod.POST)
@ResponseBody
public CommonResult listByKeyword(@RequestBody Article article) {
Page<Article> articles = iArticleService.listByKeyword(article);
return CommonResult.success(articles);
}
Impl实现类
主要的逻辑都在这里实现,我写的比较简单,坑都是在这边踩完了,具体的说明都写在了注释里面。
package com.crawler.service.service.impl;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.crawler.service.docment.Article;
import com.crawler.service.mapper.ArticleMapper;
import com.crawler.service.repository.ArticleRepository;
import com.crawler.service.service.IArticleService;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Service;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
@Service
@Slf4j
public class IArticleServiceImpl extends ServiceImpl<ArticleMapper, Article> implements IArticleService {
@Autowired
private ArticleRepository articleRepository;
@Override
public Page<Article> listByKeyword(Article article) {
BoolQueryBuilder queryBuilder = new BoolQueryBuilder();
BoolQueryBuilder keyBuilder = new BoolQueryBuilder();
BoolQueryBuilder orBuilder = new BoolQueryBuilder();
if (StringUtils.isNotBlank(article.getKeyword())){
keyBuilder.should(QueryBuilders.wildcardQuery("title", "*"+article.getKeyword()+"*"));
keyBuilder.should(QueryBuilders.wildcardQuery("content", "*"+article.getKeyword()+"*"));
queryBuilder.must(keyBuilder);
}
if (article.getWebClasses() != null && article.getWebClasses().size() > 0){
article.getWebClasses().forEach(s ->
orBuilder.should(QueryBuilders.matchQuery("webCla",s)));
queryBuilder.must(orBuilder);
}
if (article.getTimeLimit() > 0){
Calendar calendar = Calendar.getInstance();
calendar.set(Calendar.DAY_OF_YEAR, calendar.get(Calendar.DAY_OF_YEAR) - article.getTimeLimit());
QueryBuilder queryRange = QueryBuilders.rangeQuery("releaseDateTime")
.gte(new SimpleDateFormat("yyyy-MM-dd")
.format(calendar.getTime()))
.lte(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
.format(new Date()));
queryBuilder.filter(queryRange);
}
Pageable pageable = PageRequest.of(article.getStart(), article.getLimit(), Sort.Direction.DESC,"releaseDateTime");
Page<Article> search = articleRepository.search(queryBuilder,pageable);
return search;
}
}
整个嵌套下来,相当于SQL select * from table where (title like concat(’%keyword%’) or content like concat(’%keyword%’) )and ( webCla = ‘1’ or webCla = ‘2’…) and (releaseDateTime >= ‘xx’ and releaseDateTime <= ‘xx’ (或者between and 一个意思)) order by releaseDateTime desc;
聚合查询结果

这个是分页的数据,总共hits是44条,我limit了1,所以展示1条

希望这篇文章能帮助到有这方面业务需求的coder~