import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.junit.Test;
public class SpiderDemo {
@Test//上网爬---相比上一版本,只要更改IO中的源就行了
public void getMailsNet() throws IOException{
String mailReg="\\w+@\\w+(\\.\\w+)+";//@163.com asw@zjut.edu.cn
Pattern p = Pattern.compile(mailReg);//正则表达式对象(正则工具)
URL url = new URL("http://www.sina.com");
BufferedReader br = new BufferedReader( new InputStreamReader(url.openStream()) );
String line = null;
while( (line=br.readLine())!=null){
Matcher m = p.matcher(line);
while(m.find()){
System.out.println(m.group());//group方法返回:符合正则规则的那段字符串
}
}
}
}
转载请注明原文地址: https://ju.6miu.com/read-1300104.html