使用Scanner类和正则表达式统计一篇英文中的单词,要求如下:
1、一共出现了多少个单词。
2、有多少个互不相同的单词。
3、按单词出现的频率大小输出单词。
package 第七次;
import java.util.*;
import java.io.*;
import java.util.regex.*;
public class word {
public static void main(String[] args) {
String inputFile = "src/java线上作业/第三章/crossion.txt";
Map<String,Integer> wordIndex = new HashMap<>();
Set<String> Words = new HashSet<>();
int count =0;
int uncount =0;
Map<Integer,String> dict;
try (BufferedReader reader = new BufferedReader(new FileReader(inputFile))) {
String line;
while ((line = reader.readLine()) != null) {
Pattern pattern = Pattern.compile("[A-Za-z][A-Za-z-]*");
Matcher matcher = pattern.matcher(line);
while (matcher.find()) {
String word = matcher.group().toLowerCase();
wordIndex.put(word,wordIndex.getOrDefault(word,0)+1);
count++;
if(!Words.contains(word)){
dict = new HashMap<>();
dict.put(count,word);
System.out.println(dict);
Words.add(word);
uncount++;
}
}
}
System.out.print("不相同的单词次数:"+uncount);
System.out.print("单词出现的数量:"+count);
}
catch (IOException e) {
e.printStackTrace();
}
List<Map.Entry<String, Integer>> sortedWords = new ArrayList<>(wordIndex.entrySet());
Collections.sort(sortedWords, Map.Entry.<String, Integer>comparingByValue().reversed());
for (Map.Entry<String, Integer> entry : sortedWords) {
int countvalue = Integer.valueOf(entry.getValue());
System.out.println("单词出现的频率:"+entry.getKey() + ": " + countvalue*0.01+"%");
}
}
}