本文共 1597 字,大约阅读时间需要 5 分钟。
public class FormatUtil { /** * 去除停用词 * @param oldString:原中文文本 * @return 去除停用词之后的中文文本 * @throws IOException */ public static String RemovalOfStopWords(String oldString) throws IOException { String newString = oldString; List termList = HanLP.segment(newString); System.out.println(termList); String filePath = "F:\\主文件夹\\知识图谱\\工具资源\\停用词.txt"; File file = new File(filePath); BufferedReader bufferedReader = new BufferedReader(new FileReader(file)); List stopWords = new ArrayList<>(); String temp = null; while ((temp = bufferedReader.readLine()) != null) { stopWords.add(temp.trim()); } List termStringList = new ArrayList<>(); for (Term term : termList) { termStringList.add(term.word); } termStringList.removeAll(stopWords); newString = ""; for (String string : termStringList) { newString += string; } return newString; }} 转载地址:http://epkzz.baihongyu.com/