年底了能买到火车票是非常幸运的事儿, 比如我同事, 通过电话就订到了车票, 而我死活都没打进那个电话.
于是用groovy写了个程序, 用来抓取火车票信息, 网上相关的程序还不少, 我只是用groovy来练练手而已, 本来可以完善一下, 像这个( )可以从多个网站抓取, 像这个( )可以定时抓取, 本来我想通过定时抓取发消息的, 后来搞到了票, 就这样吧.
- class GetTicket {
- final static String host = "http://hz.58.com/huochepiao/?StartStation=%25u676D%25u5DDE&EndStation=%25u5B9C%25u660C"
-
- final static int earliest = 120
-
- final static List filterList = [
- "http://hz.58.com/huochepiao/4538967059457x.shtml",
- "http://hz.58.com/huochepiao/4536633437697x.shtml"
- ]
-
- def void get() {
- def htmlSource = new Http().get(host).source.toString()
- int i = 0
- LinkedList<Entry> list = [] as LinkedList<Entry>;
- htmlSource.eachLine{
- if (i > 0 && i <= 4) {
- switch(i) {
- case 1:
- list[list.size()-1].location = it.trim()
- break;
- case 2:
- list[list.size()-1].number = it.trim()
- break;
- case 3:
- list[list.size()-1].type = it.trim()
- break;
- case 4:
- def matcher = it.trim() =~ /(.+)<\/a>/
- def pair = matcher[0][1].split(" ")
- pair[1] = pair[1].replaceAll(/月|日/, "")
- list[list.size()-1].count = pair[0]
- list[list.size()-1].date = pair[1]
- if (Integer.valueOf(pair[1]) < earliest) {
- list.removeLast()
- }
- break;
- }
- i++
- return;
- }else {
- i = 0;
- }
-
- if (it ==~ /^ +<a href="http:\/\/hz\.58\.com\/huochepiao.+/){
- def matcher = it =~ /"(http:\/\/hz\.58\.com\/huochepiao.+?)"/
- def url = matcher[0][1].trim()
- if (filterList.contains(url)) {
- return;
- }
- Entry entry = [:] as Entry
- entry.url = url
- list << entry
- i++
-
-
- matcher = it =~ /.+>(.+)$/
- if (matcher.matches()) {
- entry.location = matcher[0][1].trim()
- i++
- }
- }
- }
-
- list = list.sort()
-
- list.each{ println "${it.date}\t${it.count}\t ${it.type}\t ${it.number}\t ${it.location}\t ${it.url}" }
- }
- }
- class Entry implements Comparable{
- def url
- def location
- def number
- def type
- def count
- def date
- int compareTo( def other) {
- return Integer.valueOf(other.date) - Integer.valueOf(date)
- }
-
- @Override
- public String toString() {
- return ToStringBuilder.reflectionToString(this);
- }
- }
class GetTicket { final static String host = "http://hz.58.com/huochepiao/?StartStation=%25u676D%25u5DDE&EndStation=%25u5B9C%25u660C" // 最早发车时间 final static int earliest = 120 // 已经确认无票的过滤掉 final static List filterList = [ "http://hz.58.com/huochepiao/4538967059457x.shtml", "http://hz.58.com/huochepiao/4536633437697x.shtml" ] def void get() { def htmlSource = new Http().get(host).source.toString() int i = 0 LinkedList list = [] as LinkedList ; htmlSource.eachLine{ if (i > 0 && i <= 4) { switch(i) { case 1: list[list.size()-1].location = it.trim() break; case 2: list[list.size()-1].number = it.trim() break; case 3: list[list.size()-1].type = it.trim() break; case 4: def matcher = it.trim() =~ /(.+)<\/a>/ def pair = matcher[0][1].split(" ") pair[1] = pair[1].replaceAll(/月|日/, "") list[list.size()-1].count = pair[0] list[list.size()-1].date = pair[1] if (Integer.valueOf(pair[1]) < earliest) { list.removeLast() } break; } i++ return; }else { i = 0; } if (it ==~ /^ + (.+)$/ if (matcher.matches()) { entry.location = matcher[0][1].trim() i++ } } } list = list.sort() list.each{ println "${it.date}\t${it.count}\t ${it.type}\t ${it.number}\t ${it.location}\t ${it.url}" } }}class Entry implements Comparable{ def url def location def number def type def count def date int compareTo( def other) { return Integer.valueOf(other.date) - Integer.valueOf(date) } @Override public String toString() { return ToStringBuilder.reflectionToString(this); }}
注: 解析html用到了com.googlecode.groovyhttp.Http 这个lib(http://code.google.com/p/groovy-http/)
输出结果:
131 1张 硬卧 K529 杭州 - 恩施 http://hz.58.com/huochepiao/4555300451203x.shtml
129 1张 硬卧 K529 杭州 - 荆门 http://hz.58.com/huochepiao/4547173468803x.shtml
128 1张 K529 杭州 - 恩施 http://hz.58.com/huochepiao/4547524308355x.shtml
124 1张 硬座 K253 杭州 - 宜昌 http://hz.58.com/huochepiao/4557214747137x.shtml
123 1张 硬卧 K253 杭州南 - 宜昌 http://hz.58.com/huochepiao/4557440945411x.shtml
123 1张 站票 K529 杭州 - 宜昌 http://hz.58.com/huochepiao/4532977245187x.shtml
122 1张 硬座 K253 杭州南 - 宜昌 http://hz.58.com/huochepiao/4557377085571x.shtml
122 1张 硬座 K253 杭州南 - 宜昌 http://hz.58.com/huochepiao/4544944871170x.shtml