import java.util.*;
import java.util.regex.*;
import java.io.*;
/**
* Collect and print Web statistics
* @author D. Spinellis
*/
class WebStats {
/**
* Increment the integer value of map's member by 1
* The member is obtained by using the matcher to extract
* the specified group from the string s
*/
static void increment(Map<String, Integer> map, String s, Matcher m, int group) {
String member = s.substring(m.start(group), m.end(group));
Integer i = map.get(member);
map.put(member, i == null ? 1 : i + 1);
}
/** List the contents of the given map */
static void list(String title, Map<String, Integer> map) {
System.out.println("\n" + title);
for (Map.Entry e : map.entrySet())
System.out.println(e.getValue() + " " + e.getKey());
}
/** List the contents of the given map ordered by their values.
* (You are not expected to undestand this).
*/
static void sortedList(String title, Map<String, Integer> map) {
System.out.println("\n" + title);
TreeSet <Map.Entry<String, Integer>> valueOrder
= new TreeSet<Map.Entry<String, Integer>>(new
Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> a,
Map.Entry<String, Integer> b) {
return (-a.getValue().compareTo(b.getValue()));
}
}
);
valueOrder.addAll(map.entrySet());
for (Map.Entry e : valueOrder)
System.out.println(e.getValue() + " " + e.getKey());
}
public static void main(String args[]) {
if (args.length != 1) {
System.err.println("Usage: WebStats file");
System.exit(1);
}
Pattern cre = null; // Compiled RE
try {
// A standard log line is a line like:
// 192.168.136.16 - - [26/Jan/2004:19:45:48 +0200] "GET /c136.html HTTP/1.1" 200 1674 "http://office/c120.html" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"
cre = Pattern.compile(
"([-\\w.]+)\\s+" + // 1. Host
"([-\\w]+)\\s+" + // 2. Logname
"([-\\w]+)\\s+" + // 3. User
"\\[(\\d+)/" + // 4. Date
"(\\w+)/" + // 5. Month
"(\\d+):" + // 6. Year
"(\\d+):" + // 7. Hour
"(\\d+)" + // 8. Minute
"([^]]+?)\\]\\s+" + // 9. Rest of time
"\"([-\\w]+)\\s*" + // 10. Request verb
"([^\\s]*)" + // 11. Request URL
"([^\"]*?)\"\\s+" + // 12. Request protocol etc.
"(\\d+)\\s+" + // 13. Status
"([-\\d]+)\\s+" + // 14. Bytes
"\"([^\"]*)\"\\s+" + // 15. Referrer URL
"\"([^\"]*)\"" // 16. Client
);
} catch (PatternSyntaxException e) {
System.err.println("Invalid RE syntax: " + e.getDescription());
System.exit(1);
}
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
} catch (FileNotFoundException e) {
System.err.println("Unable to open file " + args[1] + ": " + e.getMessage());
System.exit(1);
}
HashMap<String, Integer> host, hour, request, referrer;
host = new HashMap<String, Integer>();
hour = new HashMap<String, Integer>();
request = new HashMap<String, Integer>();
referrer = new HashMap<String, Integer>();
try {
String s;
while ((s = in.readLine()) != null) {
Matcher m = cre.matcher(s);
if (!m.matches())
System.out.println("Invalid line: " + s);
else {
increment(host, s, m, 1);
increment(hour, s, m, 7);
increment(request, s, m, 11);
increment(referrer, s, m, 15);
}
}
} catch (Exception e) {
System.err.println("Error reading line: " + e.getMessage());
System.exit(1);
}
sortedList("Host Access Counts", host);
sortedList("Hourly Access Counts", hour);
sortedList("Request URL Access Counts", request);
sortedList("Referrer URL Access Counts", referrer);
}
}