自己亂寫的一個檢視網站原始檔的小程式
實在是因為雞仔對Spider很感興趣
但功力不夠
需要慢慢研究
蠻好玩的說
由其用eclipse寫 (以前只會用筆記本....)
越用越順手 XD
import java.net.*;
import java.util.regex.*;
import java.io.*;
public class JavaSpider {
public static void main(String[] args) throws Exception {
String str,line;
StringBuffer sb = new StringBuffer();
FileWriter fwriter;
URL u=new URL("http://www.yzu.edu.tw");
Object obj=u.getContent();
InputStreamReader isr=new InputStreamReader((InputStream) obj,"UTF-8");
BufferedReader br=new BufferedReader(isr);
/*while((line=br.readLine())!= null)
{
System.out.println(line);
}*/
while((line=br.readLine()) != null)
{
sb.append(line);
}
Pattern pattern = Pattern.compile("<a .*?>.*?</a>");
Matcher m = pattern.matcher(sb.toString());
fwriter = new FileWriter("yzu.txt"); //write file
while(m.find())
{
String s = m.group();
System.out.println(s);
fwriter.write(s + "\r\n");
}
fwriter.close();
}
}