当前位置:  编程技术>java/j2ee

Java获取网络文件并插入数据库的代码

    来源: 互联网  发布时间:2014-10-21

    本文导语:  获取百度的歌曲名,歌手和链接!! 代码如下: package webTools; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.util....

获取百度的歌曲名,歌手和链接!!
代码如下:

package webTools;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import dbTools.DBTools;
public class IOTOWeb {
public String getHtmlContent(String htmlURL) {
URL url = null;
String rowContent = "";
StringBuffer htmlContent = new StringBuffer();
try {
url = new URL(/tech-java/htmlURL/index.html);
BufferedReader in = new BufferedReader(new InputStreamReader(url
.openStream(), "gb2312"));
while ((rowContent = in.readLine()) != null) {
htmlContent.append(rowContent);
}
in.close();
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return htmlContent.toString();
}
public List getLink(String htmlContent) {
ArrayList listLink = new ArrayList();
String regex = "]*>[\(]*]*href=("([^"]*)"|'([^']*)'|([^\s>]*))[^>]*>(.*?)[\)]*[\s]*";
Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
listLink.add(matcher.group());
}
return listLink;
}
public List getHref(String htmlContent) {
String regex;
List listtHref = new ArrayList();
regex = "href=("([^"]*)"|'([^']*)'|([^\s>]*))"";
Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
Matcher ma = pa.matcher(htmlContent);
while (ma.find()) {
listtHref.add(ma.group().replaceFirst("href="", "").replace(""",
""));
}
return listtHref;
}
public List getPerson(String htmlContent) {
String regex;
List list = new ArrayList();
regex = "\(]*href=("([^"]*)"|'([^']*)'|([^\s>]*))[^>]*>(.*?)\)";
Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
Matcher ma = pa.matcher(htmlContent);
while (ma.find()) {
list.add(ma.group().replaceFirst("href="", "").replace(""", ""));
}
return list;
}
public List getSongName(String htmlContent) {
String regex;
List listPerson = new ArrayList();
regex = "]*href=("([^"]*)"|'([^']*)'|([^\s>]*))[^>]*>(.*?)\s";
Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
Matcher ma = pa.matcher(htmlContent);
while (ma.find()) {
listPerson.add(ma.group());
}
return listPerson;
}
public String getMainContent(String htmlContent) {
String regex = "(.*?)";
StringBuffer mainContent = new StringBuffer();
Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
mainContent.append(matcher.group());
}
return mainContent.toString();
}
public String outTag(final String s) {
return s.replaceAll("", "");
}
DBTools dbTools = new DBTools();
public void getFromBaiduMap3(String htmlURL) throws Throwable {
HashMap htmlContentMap = new HashMap();
String htmlContent = getHtmlContent(htmlURL);
String mainContent = getMainContent(htmlContent);
List listLink = getLink(mainContent);
for (int j = 0; j < listLink.size(); j++) {
String tdTag = listLink.get(j).toString();
List songNameList = getSongName(tdTag);
String songName = outTag(songNameList.get(0).toString());
List personList = getPerson(tdTag);
String songPerson = "";
if (personList.size() != 0) {
for (int n = 0; n < personList.size(); n++) {
// System.out.println(personList.get(n).toString());
songPerson = outTag(personList.get(n).toString());
}
} else {
songPerson = "无";
}
// System.out.print(songNameList.get(0).toString());
List hrefList = getHref(songNameList.get(0).toString());
String songHref = hrefList.get(0).toString();
System.out.println();
String sql = "insert into song(songName,songPerson,songHref) values(?,?,?)";
ArrayList list_values = new ArrayList();
list_values.add(songName);
list_values.add(songPerson);
list_values.add(songHref);
dbTools.update(sql, list_values);
}
}
}

DBTools数据库链接类:
代码如下:

package dbTools;
import java.util.ArrayList;
import java.sql.*;
public class DBTools {
private PreparedStatement preparedStatement;
private ResultSet resultSet;
private Connection connection;
public DBTools() {
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
connection = DriverManager.getConnection(
"jdbc:mysql://localhost:3306/TestURL", "root", "zhuyi");
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public ArrayList query(String sql, ArrayList list_values) throws Throwable {
ArrayList listRows = new ArrayList();
preparedStatement = connection.prepareStatement(sql);
for (int i = 0; i < list_values.size(); i++) {
preparedStatement.setObject(i + 1, list_values.get(i));
}
resultSet = preparedStatement.executeQuery();
while (resultSet.next()) {
String[] rowinfo = new String[resultSet.getMetaData()
.getColumnCount()];
for (int i = 0; i < rowinfo.length; i++) {
rowinfo[i] = resultSet.getString(i + 1);
}
listRows.add(rowinfo);
}
return listRows;
}
public void update(String sql, ArrayList list_values) throws Throwable {
preparedStatement = connection.prepareStatement(sql);
for (int i = 0; i < list_values.size(); i++) {
preparedStatement.setObject(i + 1, list_values.get(i));
}
preparedStatement.executeUpdate();
preparedStatement.close();
}
}

Servlet调用:
代码如下:

package controller;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.List;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import webTools.IOTOWeb;
public class TestURL extends HttpServlet {
/**
* Constructor of the object.
*/
public TestURL() {
super();
}
/**
* Destruction of the servlet.

*/
public void destroy() {
super.destroy(); // Just puts "destroy" string in log
// Put your code here
}
/**
* The doGet method of the servlet.

*
* This method is called when a form has its tag value method equals to get.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
try {
IOTOWeb iotoWeb = new IOTOWeb();
iotoWeb.getFromBaiduMap3("http://list.mp3.baidu.com/topso/mp3topsong.html?id=1?top2");
} catch (Throwable e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* The doPost method of the servlet.

*
* This method is called when a form has its tag value method equals to
* post.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
response.setContentType("text/html");
PrintWriter out = response.getWriter();
out
.println("");
out.println("");
out.println(" A Servlet");
out.println(" ");
out.print(" This is ");
out.print(this.getClass());
out.println(", using the POST method");
out.println(" ");
out.println("");
out.flush();
out.close();
}
/**
* Initialization of the servlet.

*
* @throws ServletException
* if an error occurs
*/
public void init() throws ServletException {
// Put your code here
}
}

获取金书网的图书名:
代码如下:

package webTools;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import dbTools.DBTools;
public class GetBook {
public String getHtmlContent(String htmlURL) throws Throwable {
URL url = null;
String rowContent = "";
StringBuffer htmlContent = new StringBuffer();
url = new URL(/tech-java/htmlURL/index.html);
BufferedReader in = new BufferedReader(new InputStreamReader(url
.openStream(), "gb2312"));
while ((rowContent = in.readLine()) != null) {
htmlContent.append(rowContent);
}
in.close();
return htmlContent.toString();
}
public String getBookName(String htmlContent) {
String bookName = "";
String regex = "[^>]*";
Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
Matcher matcher = pattern.matcher(htmlContent);
if (matcher.find()) {
bookName = matcher.group();
}
return bookName;
}
public String outTag(final String s) {
return s.replaceAll("", "");
}
DBTools dbtools = new DBTools();
public void getFromJINSHU(String htmlURL) throws Throwable {
String htmlContent = getHtmlContent(htmlURL);
String bookName = outTag(getBookName(htmlContent));
if (bookName != null && !"".equals(bookName)) {
System.out.println(bookName);
String sql = "insert into bookinfo(bookName) values(?)";
ArrayList list_values = new ArrayList();
list_values.add(bookName);
dbtools.update(sql, list_values);
}
}
}

调用Servlet:
代码如下:

package controller;
import java.io.IOException;
import java.io.PrintWriter;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import webTools.GetBook;
public class TestBook extends HttpServlet {
/**
* Constructor of the object.
*/
public TestBook() {
super();
}
/**
* Destruction of the servlet.

*/
public void destroy() {
super.destroy(); // Just puts "destroy" string in log
// Put your code here
}
/**
* The doGet method of the servlet.

*
* This method is called when a form has its tag value method equals to get.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
int i = 1;
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
GetBook bookinfo = new GetBook();
for (; i < 10000; i++) {
String bookURL = "http://www.golden-book.com/booksinfo/12/" + i
+ ".html";
try {
bookinfo.getFromJINSHU(bookURL);
} catch (Throwable e) {
i++;
doPost(request, response);
}
}
}
/**
* The doPost method of the servlet.

*
* This method is called when a form has its tag value method equals to
* post.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
GetBook bookinfo = new GetBook();
for (; i < 10000; i++) {
String bookURL = "http://www.golden-book.com/booksinfo/12/" + i
+ ".html";
try {
bookinfo.getFromJINSHU(bookURL);
} catch (Throwable e) {
i++;
doGet(request, response);
}
}
}
/**
* Initialization of the servlet.

*
* @throws ServletException
* if an error occurs
*/
public void init() throws ServletException {
// Put your code here
}
}

每种功能的实现方法有很多,希望各位可以交流不同的思想和方法。可以加QQ412546724。呵呵

    
 
 

您可能感兴趣的文章:

  • java 对树的操作,TreeSet,能否插入相同的数据,如果相同,如何解决
  • 急问题:在java中嵌入sql的插入语句,插入成功,但是出现异常
  • JAVA连数据库并插入数据的问题?
  • java二分查找插入法
  • java插入排序 Insert sort实例
  • 有关java.sql.ResultSet 利用SetDate往 oracle 中插入时间和日期的问题!!
  • Java获取最后插入MySQL记录的自增ID值的3种方法
  • JAVA算法起步之插入排序实例
  • java操作mysql入门代码实例(含插入、更新和查询)
  • java直接插入排序示例
  • java 下执行mysql 批量插入的几种方法及用时
  • java实现插入mysql二进制文件,blob类型,遇到问题及解决办法
  • Java直接插入排序算法实现
  • java 合并排序算法、冒泡排序算法、选择排序算法、插入排序算法、快速排序算法的描述
  • java Servlet获取和设置cookie实例代码
  • java获取系统路径字体、得到某个目录下的所有文件名、获取当前路径
  • java获取时间的方法总结
  • ***在线等待!!!***Java中怎样获取,应用图片?
  • 听说Java有获取硬件信息的类,那位知道是什么?
  • 如何获取Java 文件 编译时打印在控制台的错误信息.
  • java直接插入排序示例 iis7站长之家
  • java获取日期的方法
  • 怎样在JAVA里实现获取指定服务器上所有可用数据库的列表??
  • 在执行java程序时,如何获取当前.class文件的路径
  • unix环境如何通过shell或java获取windows机器上的csv文件?
  • 菜鸟问题:如何让jsp来获取与之关联的java 数据(赚分了)
  • hp_ux下如何使用java获取top信息
  • java从输入流中获取数据并返回字节数组示例
  • java获取当前日期使用实例
  • java 获取项目文件路径实现方法
  • java正则表达式获取url的host示例
  •  
    本站(WWW.)旨在分享和传播互联网科技相关的资讯和技术,将尽最大努力为读者提供更好的信息聚合和浏览方式。
    本站(WWW.)站内文章除注明原创外,均为转载、整理或搜集自网络。欢迎任何形式的转载,转载请注明出处。












  • 相关文章推荐
  • java操作excel2007文档介绍及代码例子
  • 寻找<<java2图形设计卷2SWING>>一书源代码和<<java网络高级编程>>一书源代码
  • java实现判断字符串是否全是数字的四种方法代码举例
  • 怎样将标准的C++代码转换成JAVA代码??
  • andriod下java socket网络编程:java socket客户端服务端代码示例
  • 哪位会使用代码保护工具WingGuard来保护java代码?
  • java Servlet实现Session创建存取以及url重写代码示例
  • Java代码分享工具 Java Gems
  • 各路JAVA高手们,能否给我一个用JAVA写的简单聊天室代码?
  • 你最喜欢去的JAVA网站或JAVA源代码下载网站是哪里???
  • JAVA APPLET与JSP有什么区别?好像都是把JAVA代码嵌到网页中。
  • java里有什么函数可以检查 java 代码并执行它?
  • 谁有Java源代码中floatToIntBits,intBitsToFloat的源代码?
  • 怎样看到java程序经过编译后的代码内容(bytecode的)或者在bytecode在JVM执行时JVM所解析的代码
  • 大哥大姐们小弟刚学JAVA,对它没点头绪啊!能告诉我JAVA在什么环境下编写代码和编译吗??
  • java与js代码互调示例代码
  • java文件复制代码片断(java实现文件拷贝)
  • 你认为最好的中文JAVA网站或有大量优秀JAVA源代码免费下载的网站是哪里???送分!!!
  • 有没有这样的软件:把一个不标准格式的JAVA原代码转换为具有标准(或比较标准)编码规范的代码。
  • 请问在java多线程中,是只有run(){}内的代码运行在一个新线程下呢?还是这个类中的代码都运行在一个新线程下?
  • 决定学Java,有没有小而好用的Java编辑器写代码,就象TurboC一样?
  • java命名空间java.sql类types的类成员方法: java_object定义及介绍
  • 我想学JAVA ,是买THINK IN JAVA 还是JAVA2核心技术:卷1 好???
  • java命名空间java.awt.datatransfer类dataflavor的类成员方法: imageflavor定义及介绍
  • 请问Java高手,Java的优势在那里??,Java主要适合于开发哪类应用程序
  • java命名空间java.lang.management类managementfactory的类成员方法: getcompilationmxbean定义及介绍
  • 如何将java.util.Date转化为java.sql.Date?数据库中Date类型对应于java的哪个Date呢
  • java命名空间java.lang.management接口runtimemxbean的类成员方法: getlibrarypath定义及介绍
  • 谁有电子版的《Java编程思想第二版(Thinking in java second)》和《Java2编程详解(special edition java2)》?得到给分
  • java命名空间java.lang.management接口runtimemxbean的类成员方法: getstarttime定义及介绍
  • 本人想学java,请问java程序员的待遇如何,和java主要有几个比较强的方向


  • 站内导航:


    特别声明:169IT网站部分信息来自互联网,如果侵犯您的权利,请及时告知,本站将立即删除!

    ©2012-2021,,E-mail:www_#163.com(请将#改为@)

    浙ICP备11055608号-3