-
一 01
-
一个简单的Java采集文字和图片类,实现从远程网站上获取文本或者图片.
import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; public class CollectData { //获取文本,返回字符串 start-开始字符串 end-结束字符串 //结果不包含开始串,不包含结束串 public String getText(String textUrl, String start,String end,String charset) { if(charset == null || charset.length() < 1) charset = "GBK"; try { URL url = new URL(textUrl); URLConnection con = url.openConnection(); InputStream is = con.getInputStream(); BufferedReader br = new BufferedReader( new InputStreamReader(is,charset)); String s; StringBuffer sb = new StringBuffer(); while((s = br.readLine())!=null) { sb.append(s); } br.close(); String htmlContent = sb.toString(); if(htmlContent.indexOf(start) != -1 && htmlContent.lastIndexOf(end) != -1) { String content = htmlContent.substring( htmlContent.indexOf(start) +start.length(),htmlContent.lastIndexOf(end)); return content; } }catch(Exception e) { e.printStackTrace(); } return ""; } //获取图片,返回字节数组 public byte [] getImage(String imgUrl) { try { URL url = new URL(imgUrl); URLConnection con = url.openConnection(); InputStream is = con.getInputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); byte [] b = new byte[1024]; int length = 0; while((length = is.read(b)) > 0) { baos.write(b,0,length); } return baos.toByteArray(); }catch(Exception e) { e.printStackTrace(); return null; } } public static void main(String [] args) { CollectData c = new CollectData(); String result = c.getText("http://www.baidu.com", "","","GBK"); System.out.println(result); c.getImage("http://www.google.cn/logos/newyear09.gif"); } }