WordCounter

题目:给出一个含有英文小说的文本文件,统计英文单词出现的频率并按照逆序打印。

完整实现如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
public class WordCounter {
public static void main(String[] args) {
try {
List<WordOccurrence> list = WordCounter.doCount("test.txt");
for(WordOccurrence e:list)
System.out.println(e.word + ":"+ e.count);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* The word count container.
*/
static class WordOccurrence implements Comparable<WordOccurrence>{
String word;
int count;
public WordOccurrence(String word, int count){
this.word = word;
this.count = count;
}
/**
* sort in reverse order.
* @param o
* @return
*/
@Override
public int compareTo(WordOccurrence o) {
if ( this.count < o.count )
return 1;
if ( this.count > o.count )
return -1;
return 0;
}
}
/**
* Count the occurrences of each word, and sort them in descending order.
* @param fileName the name of the file to be parsed
* @return the word list in descending order of occurrence
* @throws IOException
*/
private static List<WordOccurrence> doCount(String fileName) throws IOException {
File file = new File(fileName);
Reader fileReader = new FileReader(file);
BufferedReader bufferedReader = new BufferedReader(fileReader);
String line = null;
Map<String, Integer> wordMap = new HashMap<String, Integer>();
// read into a hashmap first
while( (line=bufferedReader.readLine())!= null){
String[] words = line.split(Pattern.compile("[ |,|.|!]").pattern());
for (int i = 0; i < words.length ; i++) {
if ( wordMap.containsKey(words[i])) {
wordMap.put(words[i], wordMap.get(words[i]).intValue()+1);
} else {
wordMap.put(words[i], 1);
}
}
}
// read into a list that can be sorted because element class(WordOccurrence) implements Comparable interface
List<WordOccurrence> wordList = new ArrayList();
for( Iterator iterator= wordMap.keySet().iterator();iterator.hasNext();){
String key = (String)iterator.next();
WordOccurrence wordOccurrence = new WordOccurrence(key, wordMap.get(key).intValue());
wordList.add(wordOccurrence);
}
Collections.sort(wordList);
return wordList;
}
}