-
一 01
-
一个简单的Java采集文字和图片类,实现从远程网站上获取文本或者图片.
-
import java.io.BufferedReader;
-
import java.io.ByteArrayOutputStream;
-
import java.io.InputStream;
-
import java.io.InputStreamReader;
-
import java.net.URL;
-
import java.net.URLConnection;
-
-
public class CollectData
-
{
-
//获取文本,返回字符串 start-开始字符串 end-结束字符串
-
//结果不包含开始串,不包含结束串
-
public String getText(String textUrl,
-
String start,String end,String charset)
-
{
-
if(charset == null || charset.length() < 1)
-
charset = "GBK";
-
try
-
{
-
URL url = new URL(textUrl);
-
URLConnection con = url.openConnection();
-
InputStream is = con.getInputStream();
-
BufferedReader br = new BufferedReader(
-
new InputStreamReader(is,charset));
-
String s;
-
StringBuffer sb = new StringBuffer();
-
while((s = br.readLine())!=null)
-
{
-
sb.append(s);
-
}
-
br.close();
-
String htmlContent = sb.toString();
-
if(htmlContent.indexOf(start) != -1 &&
-
htmlContent.lastIndexOf(end) != -1)
-
{
-
String content = htmlContent.substring(
-
htmlContent.indexOf(start)
-
+start.length(),htmlContent.lastIndexOf(end));
-
return content;
-
}
-
}catch(Exception e)
-
{
-
e.printStackTrace();
-
}
-
return "";
-
}
-
-
//获取图片,返回字节数组
-
public byte [] getImage(String imgUrl)
-
{
-
try
-
{
-
URL url = new URL(imgUrl);
-
URLConnection con = url.openConnection();
-
InputStream is = con.getInputStream();
-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
-
byte [] b = new byte[1024];
-
int length = 0;
-
while((length = is.read(b)) > 0)
-
{
-
baos.write(b,0,length);
-
}
-
return baos.toByteArray();
-
}catch(Exception e)
-
{
-
e.printStackTrace();
-
return null;
-
}
-
}
-
-
public static void main(String [] args)
-
{
-
CollectData c = new CollectData();
-
String result = c.getText("http://www.baidu.com",
-
"<html>","</html>","GBK");
-
System.out.println(result);
-
c.getImage("http://www.google.cn/logos/newyear09.gif");
-
}
-
}
-
1 楼 glamey
Post: 2010-06-06 8:00 上午
写的还真够呛啊,需要继续努力了。