# python-note **Repository Path**: zhanghuan97/python-note ## Basic Information - **Project Name**: python-note - **Description**: No description available - **Primary Language**: Unknown - **License**: Not specified - **Default Branch**: master - **Homepage**: None - **GVP Project**: No ## Statistics - **Stars**: 0 - **Forks**: 0 - **Created**: 2024-12-10 - **Last Updated**: 2024-12-10 ## Categories & Tags **Categories**: Uncategorized **Tags**: None ## README # python-note ## 爬虫 #### 1.发送请求 ```python # pip install requests import requests # 模拟浏览器 headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36' } url = '' # 发送请求 response = requests.get(url, headers=headers) # 设置编码 response.encoding = 'utf-8' ``` #### 2.响应结果 ```python response.text # 返回响应内容的文本形式(html源码) response.json # 用于解析响应内容,并返回一个包含解析结果的Python对象 response.content # 返回响应内容的二进制形式。它以字节的形式返回响应内容,适用于处理非文本类型的响应,如图像或文件。 ``` #### 3.解析数据 ```python """ 方法一:css选择器 """ import parsel # 获取响应文本数据(网页源代码) html_text = response.text # 把获取到html字符串数据,转化成可解析的对象 selector = parsel.Selector(html_text) # 提取30个房源数据对应div标签 divs = selector.css('.sellListContent li .info') for div in divs: """ 提取具体数据内容 """ title = div.css('.title a::text').get() # 提取标题 area_list = div.css('.positionInfo a::text').getall() area = area_list[0] # 小区 area_1 = area_list[1] # 区域 ``` ```python """ 方法二:xpath选择器 """ # pip install lxml from lxml import etree e = etree.HTML(response.text) info = '\n'.join(e.xpath('//div[@class = "m-post"]/p/text()')) title = e.xpath('//h1/text()')[0] url = f'https://www.85xs.cc{e.xpath("//tr/td[2]/a/@href")[0]}' # 例2 # 创建可以提取数据的对象 e = etree.HTML(response.text) # 提取ip ips = e.xpath('//div[1]/table/tr/td[1]/text()') # 提取端口 ports = e.xpath('//div[1]/table/tr/td[2]/text()') # 提取地址 address = e.xpath('//div[1]/table/tr/td[3]/text()') ``` ```python """ 方法三:正则 """ #导入正则模块 import re html_data = response.text # 提取标题 title = re.findall('

(.*?)

', html_data)[0] # 提取内容 content = re.findall('
(.*?)

', html_data, re.S)[0].replace('

', '\n') ``` ## 自动化办公 #### 1. Excel处理 1.1使用xlrd ```python # 读取Excel模块 pip install xlrd 现在只支持.xls import xlrd data = xlrd.open_workbook('./data/名单.xls') # 获取sheet1页 sheet = data.sheets()[0] for i in range(sheet.nrows): # 获取i行第二列 name = sheet.cell_value(i, 1) # 获取i行第三列 old_money = sheet.cell_value(i, 2) ``` 1.2使用pandas ```python # pip install pandas import pandas as pd dict = { '姓名': ['张三', '李四', '王五'], '年龄': [18, 19, 20], '性别': ['男', '女', '男'], '身高': [170, 165, 180] } df = pd.DataFrame(dict) # 写入文件 df.to_excel('./data/test.xlsx', index=False) print(df) # 读取文件 data = pd.read_excel('./data/每月物料表.xlsx') ``` #### Word处理 ```python ``` #### 邮件处理 ```python # 登录邮箱,发邮件 import smtplib from email.mime.text import MIMEText # 用来包装内容 from email.header import Header # 包装头部信息 # 发送者 from_addr = '2310545976@qq.com' # 发给谁 to_person = '10545976@qq.com' server = smtplib.SMTP('smtp.qq.com', 25) # 登录密码 server.login(from_addr, '13232322') # 主题 需要包装 subject = Header('邮件主题', 'utf-8') sender = Header('百战程序员', 'utf-8') # 邮件内容 msg = MIMEText('邮件内容', 'html', 'utf-8') msg['From'] = sender msg['Subject'] = subject server.sendmail(from_addr, to_person, msg.as_string()) ``` ## 数据可视化 ## web应用 ```python # pip install flask from flask import Flask, render_template, request data = {} app = Flask(__name__) @app.route('/index', methods=['POST']) def index(): # post请求获取用户名 # username = request.args.get('username') username = request.form.get('username') # print(username) password = request.args.get('password') return render_template('index.html', data=data) ```