Python中requests
库
flyfish
Python requests
库概述
1. 简介
-
什么是
requests
?requests
是一个简单易用的HTTP库,用于发送所有类型的HTTP请求。- 它简化了与Web服务的交互,支持复杂的操作如会话维持、文件上传等。
-
安装
requests
pip install requests
2. 基础使用
-
GET请求
import requests response = requests.get('https://api.github.com') print(response.status_code) print(response.text)
-
POST请求
payload = {'key1': 'value1', 'key2': 'value2'} response = requests.post('https://httpbin.org/post', data=payload) print(response.text)
-
其他HTTP方法
- PUT, DELETE, HEAD, OPTIONS等方法的使用方式类似GET和POST。
3. 参数处理
-
URL参数
payload = {'key1': 'value1', 'key2': 'value2'} response = requests.get('https://httpbin.org/get', params=payload)
-
表单数据
payload = {'key1': 'value1', 'key2': 'value2'} response = requests.post('https://httpbin.org/post', data=payload)
-
JSON数据
import json payload = {'key1': 'value1', 'key2': 'value2'} response = requests.post('https://httpbin.org/post', json=payload)
4. 响应处理
-
状态码
if response.status_code == 200:print("Success!") elif response.status_code == 404:print("Not Found.")
-
响应头
print(response.headers['Content-Type'])
-
响应体
print(response.text) # 文本形式 print(response.content) # 字节形式 print(response.json()) # JSON解析
5. 错误与异常处理
- 常见错误类型
try:response = requests.get('https://nonexistentwebsite.com')response.raise_for_status() # 抛出HTTPError异常 except requests.exceptions.HTTPError as errh:print ("Http Error:",errh) except requests.exceptions.ConnectionError as errc:print ("Error Connecting:",errc) except requests.exceptions.Timeout as errt:print ("Timeout Error:",errt) except requests.exceptions.RequestException as err:print ("OOps: Something Else",err)
6. 高级功能
-
会话对象
session = requests.Session() session.headers.update({'User-Agent': 'Custom User Agent'}) response = session.get('https://example.com')
-
Cookie管理
cookies = dict(cookies_are='working') response = requests.get('https://httpbin.org/cookies', cookies=cookies)
-
文件上传
files = {'file': open('report.xls', 'rb')} response = requests.post('https://httpbin.org/post', files=files)
-
SSL验证
response = requests.get('https://expired.badssl.com/', verify=False) # 忽略SSL证书验证
7. 实战案例
-
API调用
- 使用GitHub API获取用户信息。
response = requests.get('https://api.github.com/users/octocat') user_info = response.json() print(user_info)
-
网页抓取
- 使用BeautifulSoup结合requests进行网页抓取。
from bs4 import BeautifulSoup response = requests.get('https://example.com') soup = BeautifulSoup(response.text, 'html.parser') print(soup.prettify())
8. 性能优化
- 并发请求
- 使用
concurrent.futures
或asyncio
进行并发请求以提高效率。
from concurrent.futures import ThreadPoolExecutor urls = ['https://example.com', 'https://example.org'] with ThreadPoolExecutor(max_workers=5) as executor:futures = [executor.submit(requests.get, url) for url in urls]for future in futures:print(future.result().status_code)
- 使用
9. 最佳实践
-
重试机制
- 在网络不稳定的情况下,设置合理的重试策略。
from requests.adapters import HTTPAdapter s = requests.Session() s.mount('https://', HTTPAdapter(max_retries=3)) response = s.get('https://example.com')
-
日志记录
- 记录请求和响应以便调试。
import logging logging.basicConfig(level=logging.DEBUG) logging.getLogger("urllib3").setLevel(logging.WARNING) response = requests.get('https://example.com')
通过Python脚本调用一个基于HTTP的服务来处理文本和图像数据
import requests
import base64
import json
import logging# 设置日志
logging.basicConfig(level=logging.INFO)SERVER_URL = "http://0.0.0.0:37914/predict" # 可考虑从环境变量加载def encode_image(image_path):"""将图片文件编码为base64字符串"""try:with open(image_path, "rb") as image_file:image_data = image_file.read()return base64.b64encode(image_data).decode("utf-8")except FileNotFoundError:logging.error(f"Image file {image_path} not found.")return Noneexcept Exception as e:logging.error(f"Error encoding image {image_path}: {e}")return Nonedef send_request(text, images=None, model_name="deepseek-ai/deepseek-vl2-small", timeout=10):"""发送请求到服务端"""data = {"model_name": model_name,"text": text}if images:images_data = []for img in images:encoded_img = encode_image(img)if encoded_img:images_data.append(encoded_img)if images_data:data["images"] = images_dataelse:logging.warning("No valid images to send.")return Nonelogging.info("Sending request data")try:response = requests.post(SERVER_URL, json=data, timeout=timeout)response.raise_for_status() # 抛出HTTP错误return response.json()except requests.exceptions.HTTPError as http_err:logging.error(f"HTTP error occurred: {http_err}") # HTTP错误except requests.exceptions.ConnectionError as conn_err:logging.error(f"Connection error occurred: {conn_err}") # 连接错误except requests.exceptions.Timeout as timeout_err:logging.error(f"Timeout error occurred: {timeout_err}") # 超时except requests.exceptions.RequestException as err:logging.error(f"An error occurred: {err}") # 其他错误return Nonedef main():text = "请描述这张图。"images = ["/path/to/your/image.jpg"] # 确保路径正确response = send_request(text=text, images=images)if response:logging.info("Response from server:")logging.info(json.dumps(response, ensure_ascii=False, indent=4))else:logging.error("No valid response from server.")if __name__ == "__main__":main()
从本地图片文件夹中读取图像,并将这些图像与给定的文本一起发送到远程服务器进行处理的例子
import requests
import base64
import json
import os
import time
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm# 设置日志配置,包括日志级别和格式
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')# 从环境变量中读取配置,默认值为本地服务地址、模型名称和超时时间
SERVER_URL = os.getenv('SERVER_URL', "http://0.0.0.0:37914/predict")
MODEL_NAME = os.getenv('MODEL_NAME', "deepseek-ai/deepseek-vl2-small")
TIMEOUT = int(os.getenv('TIMEOUT', 10))class ImageEncoder:@staticmethoddef encode_image(image_path):"""将图片文件编码为base64字符串"""try:# 打开图像文件并读取其内容with open(image_path, "rb") as image_file:image_data = image_file.read()# 将图像数据编码为base64字符串并返回return base64.b64encode(image_data).decode("utf-8")except FileNotFoundError:# 如果图像文件不存在,记录错误信息并返回Nonelogging.error(f"Image file {image_path} not found.")return Noneexcept Exception as e:# 捕获其他异常并记录错误信息,返回Nonelogging.error(f"Error encoding image {image_path}: {e}")return Nonedef send_request(text, images=None, model_name=MODEL_NAME, timeout=TIMEOUT):"""发送请求到服务端"""data = {"model_name": model_name, # 模型名称"text": text # 文本内容}if images:images_data = []for img in images:encoded_img = ImageEncoder.encode_image(img)if encoded_img:images_data.append(encoded_img)if images_data:data["images"] = images_dataelse:# 如果没有有效的图像数据,记录警告信息并返回Nonelogging.warning("No valid images to send.")return Nonelogging.info("Sending request data") # 记录发送请求的信息try:# 发送POST请求,并设置超时时间response = requests.post(SERVER_URL, json=data, timeout=timeout)response.raise_for_status() # 抛出HTTP错误return response.json() # 返回响应的JSON数据except requests.exceptions.HTTPError as http_err:# 处理HTTP错误logging.error(f"HTTP error occurred: {http_err}")except requests.exceptions.ConnectionError as conn_err:# 处理连接错误logging.error(f"Connection error occurred: {conn_err}")except requests.exceptions.Timeout as timeout_err:# 处理超时错误logging.error(f"Timeout error occurred: {timeout_err}")except requests.exceptions.RequestException as err:# 处理其他请求异常logging.error(f"An error occurred: {err}")return Nonedef process_image(filename, text, result_folder):"""处理单个图像文件"""image_path = os.path.join("images", filename) # 构造图像文件路径images = [image_path]# 记录开始时间start_time = time.time()# 发送请求response = send_request(text, images)# 记录结束时间end_time = time.time()# 计算推理时间inference_time = end_time - start_time# 保存推理结果if response:# 将推理时间添加到响应中response["inference_time"] = inference_timeresult_filename = os.path.splitext(filename)[0] + ".txt"result_path = os.path.join(result_folder, result_filename)with open(result_path, "w", encoding="utf-8") as f:json.dump(response, f, ensure_ascii=False, indent=4)logging.info(f"Result saved to {result_path}, Inference time: {inference_time:.4f} seconds")else:logging.error(f"No valid response from server for {filename}, Inference time: {inference_time:.4f} seconds")def main():# 从prompt文件中读取文本prompt_file = "prompt.txt"try:with open(prompt_file, "r", encoding="utf-8") as f:text = f.read().strip()except FileNotFoundError:# 如果提示文件不存在,记录错误信息并退出logging.error(f"Error: prompt file {prompt_file} not found.")return# 遍历images文件夹中的每个图像文件images_folder = "images"result_folder = "result"if not os.path.exists(result_folder):# 如果结果文件夹不存在,则创建os.makedirs(result_folder)# 使用ThreadPoolExecutor进行并发处理with ThreadPoolExecutor(max_workers=5) as executor:futures = []for filename in os.listdir(images_folder):if filename.lower().endswith(('.png', '.jpg', '.jpeg')):# 如果是图片文件,则提交任务进行处理futures.append(executor.submit(process_image, filename, text, result_folder))else:# 如果不是图片文件,则记录警告信息并跳过logging.warning(f"Skipping non-image file: {filename}")# 使用tqdm显示进度条,提供更好的用户体验for future in tqdm(as_completed(futures), total=len(futures), desc="Processing Images"):future.result()if __name__ == "__main__":main()