用java写的,而且是用来爬邮箱的,关于邮箱的正则只是随便写写,需要优化,仅供娱乐。
import java.io.InputStreamReader;
import java.net.URL;import java.net.URLConnection;import java.util.regex.Matcher;import java.util.regex.Pattern;public class spider {
public static void main(String[] args) { try { getMail(); } catch (Exception e) { e.printStackTrace(); } }public static void getMail() throws Exception {
URL url = new URL("需要爬邮箱的网址"); URLConnection conn = url.openConnection(); BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); String regex = "[a-zA-Z0-9]{6,12}@[a-zA-Z]{2,8}(\\.[a-zA-Z]{2,3}){1,2}"; String line = null; while((line = in.readLine()) != null){ Pattern p = Pattern.compile(regex); Matcher m = p.matcher(line); while(m.find()){ System.out.println(m.group()); } } }}