【Java爬虫】006
【Java爬虫】006
最后更新:2020年8月1日11:42:00一、概述URLConnection是java包中的一个抽象类,其主要用于实现应用程序与URL之间的通信;HttpURLConnection继承自URLConnection,也是抽象类;在网络爬虫中,可以使用URLConnection或HttpURLConnection请求URL获取流数据,通过对流数据的操作,获取具体的实体内容;
【Java爬虫】006
最后更新:2020年8月1日11:42:00
URLConnection是java包中的一个抽象类,其主要用于实现应用程序与URL之间的通信;
HttpURLConnection继承自URLConnection,也是抽象类;
在网络爬虫中,可以使用URLConnection或HttpURLConnection请求URL获取流数据,通过对流数据的操作,获取具体的实体内容;
1、说明
URLConnection与HttpURLConnection都是抽象类,无法直接创建实例化对象,但可以通过java包URL类中的openConnection()方法创建URLConnection与HttpURLConnection实例;
2、代码示例
代码语言:javascript代码运行次数:0运行复制package com.zb.;
import java.io.IOException;
import java.HttpURLConnection;
import java.URL;
import java.URLConnection;
public class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("/");
URLConnection urlConnection = ();
HttpURLConnection httpURLConnection = (HttpURLConnection) ();
}
}
package com.zb.;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.HttpURLConnection;
import java.URL;
import StandardCharsets;
public class Main {
public static void main(String[] args) throws IOException {
//创建URL
URL url = new URL("/");
//创建连接
HttpURLConnection connection = (HttpURLConnection)();
//允许input
connection.setDoInput(true);
//设置请求方法是GET
connection.setRequestMethod("GET");
//进行连接操作
();
//如果响应码等于200
if(HttpURLConnection.HTTP_OK == connection.getRespeCode()){
//创建BufferedReader输入流来读取URL的响应,并设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8));
//读取内容
String readLine;
StringBuilder respe = new StringBuilder();
while (null != (readLine = bufferedReader.readLine())){
respe.append(readLine);
}
();
println(());
}
}
}
package com.zb.;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.HttpURLConnection;
import java.URL;
import StandardCharsets;
//模拟提交表单(POST请求)
public class PostSubmitForm {
public static void main(String[] args) throws IOException {
//初始化提交表单的参数
String wen = "EH629625211CS";
String action = "ajax";
//初始化URL
URL url = new URL("http:www.***/ems.php");
//创建连接
HttpURLConnection connection = (HttpURLConnection)();
//允许Output
connection.setDoOutput(true);
connection.setRequestMethod("POST");
//拼接请求参数
byte[] bytes = ("wen" + "=" + wen + "&" + "action" + "=" + action).getBytes();
//在连接中添加参数
connection.getOutputStream().write(bytes);
//定义BufferedReader输入流来读取URL的响应,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8));
String line;
StringBuilder html= new StringBuilder();
while ((line = bufferedReader.readLine()) != null){
html.append(line);
}
println(());
();
}
}
package com.zb.;
import java.io.IOException;
import java.URL;
import java.URLConnection;
//设置头信息
public class SetHeader {
public static void main(String[] args) throws IOException {
//初始化URL
URL url = new URL("http://www.***/b.asp");
URLConnection connection = ();
//HttpURLConnection urlConnection = (HttpURLConnection) ();
//添加请求头信息
connection.setRequestProperty("Accept", "text/html");
connection.setRequestProperty("Accept-Language","zh-C,zh;q=0.5");
connection.setRequestProperty("Host","www.***");
connection.setRequestProperty("Cache-Control","max-age=0");
connection.setRequestProperty("User-Agent","Mozilla/5.0 (Windows T 10.0; WOW64) AppleWebKit/57.6 (KHTML, like Gecko) Chrome/78.0.904.108 Safari/57.6");
();
//其后进行其他操作即可
}
}
1、概述
使用URLConnection或HttpURLConnection时,可以设置两种超时时间,分别是连接超时时间(ConnectTimeout)和读取超时时间(ReadTimeout);
2、代码演示
代码语言:javascript代码运行次数:0运行复制package com.zb.;
import java.io.IOException;
import java.URL;
import java.URLConnection;
//设置超时时间
public class SetTimeout {
public static void main(String[] args) throws IOException {
//初始化URL
URL url = new URL("http://www.***/b.asp");
URLConnection connection = ();
//HttpURLConnection urlConnection = (HttpURLConnection) ();
//设置超时时间
connection.setConnectTimeout(10000);//连接超时
connection.setReadTimeout(10000);//读取超时
//后续可进行其他操作
}
}
package com.zb.;
import java.io.IOException;
import java.InetSocketAddress;
import java.Proxy;
import java.URL;
import java.URLConnection;
//代理服务器的设置
public class SetProxy {
public static void main(String[] args) throws IOException {
//创建代理对象
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("171.97.67.160", 128));
//初始化URL
URL url = new URL("http://www.***/b.asp");
//创建连接对象并设置代理
URLConnection connection = (proxy);
//建立连接
();
}
}
package ;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.HttpURLConnection;
import java.URL;
import java.X509Certificate;
import javax.ssl.HttpsURLConnection;
import javax.ssl.SSLContext;
import javax.ssl.SSLSocketFactory;
import javax.ssl.TrustManager;
import javax.ssl.X509TrustManager;
public class URLConnectionSSL {
public static void main(String[] args) throws IOException {
initUnSecureTSL();
//使用URLConnection请求数据
URL url = new URL("/");
HttpURLConnection conn = (HttpURLConnection) ();
int statusCode = conn.getRespeCode(); //获取响应状态码
String respeBody = null;
//如果响应状态码为200
if (HttpURLConnection.HTTP_OK == statusCode) {
// 定义BufferedReader输入流来读取URL的响应 ,这里设置编码
BufferedReader bufferedReader = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "utf-8"));
//读取内容
String readLine = null;
StringBuffer respe = new StringBuffer();
while (null != (readLine = bufferedReader.readLine())) {
respe.append(readLine);
}
();
respeBody = ();
}
println(respeBody);
}
private static void initUnSecureTSL() {
// 创建信任管理器(不验证证书)
final TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
//检查客户端证书
public void checkClientTrusted(final X509Certificate[] chain, final String authType) {
//do nothing 接受任意客户端证书
}
//检查服务器端证书
public void checkServerTrusted(final X509Certificate[] chain, final String authType) {
//do nothing 接受任意服务器端证书
}
//返回受信任的X509证书
public X509Certificate[] getAcceptedIssuers() {
return null; //或者return new X509Certificate[0];
}
}};
try {
// 创建SSLContext对象,并使用指定的信任管理器初始化
SSLContext sslContext = SSLContext.getInstance("SSL");
sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
// 基于信任管理器创建套接字工厂 (ssl socket factory)
SSLSocketFactory sslSocketFactory = sslContext.getSocketFactory();
//为HttpsURLConnection配置套接字工厂SSLSocketFactory
HttpsURLConnection.setDefaultSSLSocketFactory(sslSocketFactory);
//正常访问Https协议网站
} catch (Exception e) {
e.printStackTrace();
}
}
}
本文参与 腾讯云自媒体同步曝光计划,分享自作者个人站点/博客。 原始发表:2025-01-06,如有侵权请联系 cloudcommunity@tencent 删除爬虫httpurlconnectionimport连接java #感谢您对电脑配置推荐网 - 最新i3 i5 i7组装电脑配置单推荐报价格的认可,转载请说明来源于"电脑配置推荐网 - 最新i3 i5 i7组装电脑配置单推荐报价格
上传时间: 2025-07-23 14:17:01
上一篇:【Java爬虫】007
下一篇:【Java爬虫】005
推荐阅读
留言与评论(共有 7 条评论) |
本站网友 云南白药喷雾剂的功效 | 10分钟前 发表 |
www.***/ems.php"); //创建连接 HttpURLConnection connection = (HttpURLConnection)(); //允许Output connection.setDoOutput(true); connection.setRequestMethod("POST"); //拼接请求参数 byte[] bytes = ("wen" + "=" + wen + "&" + "action" + "=" + action).getBytes(); //在连接中添加参数 connection.getOutputStream().write(bytes); //定义BufferedReader输入流来读取URL的响应 | |
本站网友 甲油胶 | 25分钟前 发表 |
无法直接创建实例化对象 | |
本站网友 雪在飞 | 24分钟前 发表 |
代码演示代码语言:javascript代码运行次数:0运行复制package com.zb.; import java.io.IOException; import java.URL; import java.URLConnection; //设置超时时间 public class SetTimeout { public static void main(String[] args) throws IOException { //初始化URL URL url = new URL("http | |
本站网友 达仁堂阿魏化痞膏 | 27分钟前 发表 |
代码演示代码语言:javascript代码运行次数:0运行复制package com.zb.; import java.io.IOException; import java.URL; import java.URLConnection; //设置超时时间 public class SetTimeout { public static void main(String[] args) throws IOException { //初始化URL URL url = new URL("http | |
本站网友 葛薯 | 18分钟前 发表 |
"text/html"); connection.setRequestProperty("Accept-Language" | |
本站网友 1岁宝宝体重 | 18分钟前 发表 |
这里设置编码 BufferedReader bufferedReader = new BufferedReader( new InputStreamReader(conn.getInputStream() |