【学习笔记】关于匿名函数 λ 的几个例子
本帖最后由 whyerect 于 2023-3-23 19:20 编辑例一:把列表中的学生按年龄排序
students = [
{"姓名": "张三", "年龄": 18},
{"姓名": "李四", "年龄": 7},
{"姓名": "王五", "年龄": 99},
{"姓名": "赵六", "年龄": 30},
{"姓名": "田七", "年龄": 22}
]
print("排序前:", students)
students.sort(key=lambda x: x["年龄"])
print("排序后:", students)
如果不用匿名函数lambda,用普通函数怎么做?
def sort_func(item):
return item["年龄"]
students = [
{"姓名": "张三", "年龄": 18},
{"姓名": "李四", "年龄": 7},
{"姓名": "王五", "年龄": 99},
{"姓名": "赵六", "年龄": 30},
{"姓名": "田七", "年龄": 22}
]
print("排序前:", students)
students.sort(key=sort_func)
print("排序后:", students)
输出结果:
排序前: [{'姓名': '张三', '年龄': 18}, {'姓名': '李四', '年龄': 7}, {'姓名': '王五', '年龄': 99}, {'姓名': '赵六', '年龄': 30}, {'姓名': '田七', '年龄': 22}]
排序后: [{'姓名': '李四', '年龄': 7}, {'姓名': '张三', '年龄': 18}, {'姓名': '田七', '年龄': 22}, {'姓名': '赵六', '年龄': 30}, {'姓名': '王五', '年龄': 99}]
https://static.52pojie.cn/static/image/hrline/1.gif
例二:sorted 函数可以用 key=函数名 对排序的列表先进行函数变换再排序
num_list =
# 先对列表取绝对值再进行排序
num_list_1 = sorted(num_list, key=abs)
print(num_list_1)
输出结果:
my_list = ["abcd", "a", "ab", "abc", "12345", "1"]
my_list_1 = sorted(my_list, key=lambda x: len(x))
print(my_list_1)
输出结果:
['a', '1', 'ab', 'abc', 'abcd', '12345']
https://static.52pojie.cn/static/image/hrline/1.gif
例三:min 函数可以用 key=函数名 对求最小值的列表先进行函数变换再排序
有一个列表比如 需求是用户输入一个小数,比如:3.14,要求判断输入的小数距离列表中哪个数最近,如何实现?
my_list = list(range(1, 11))
my_num = float(input("请输入1-10之间的小数:"))
result = min(my_list, key=lambda x: abs(x-my_num))
print(result)
用多线程采集阿里招聘的数据
本帖最后由 whyerect 于 2023-4-1 21:28 编辑获取阿里巴巴的招聘数据,需要字段,标题,更新时间,地址,通过多任务的方式获取10页数据
网址:https://talent.alibaba.com/off-campus/position-list?lang=zh
import pymongo
from queue import Queue
import requests
from threading import Thread
import time
class Alibaba:
def __init__(self):
self.client = pymongo.MongoClient()
self.collection = self.client['spider']['alibaba']
self.url = 'https://talent.alibaba.com/position/search?_csrf=b8ec86e6-d9fc-4eb5-8870-b43f9639c5b4'
self.data = {
"channel": "group_official_site",
"language": "zh",
"batchId": "",
"categories": "",
"deptCodes": [],
"key": "",
"pageIndex": 1,
"pageSize": 10,
"regions": "",
"subCategories": ""
}
self.headers = {
'cookie': 'prefered-lang=zh; cna=vOuhHKfdx1QCART5EuvFqCw6; XSRF-TOKEN=b8ec86e6-d9fc-4eb5-8870-b43f9639c5b4; SESSION=QzVGRTFCMjEyRkU2OEI3MDFEODgxRERENDJGMEZFMkM=; xlly_s=1; l=fBrKCclVNEWXfjn8BO5Bhurza77T4IOfGlVzaNbMiIEGa61P1HHx8NCsVs79RdtjgTf4setr24w81dhM8b4U-x9_PwJbsiooVxJ6-bpU-L5..; tfstk=cVgRBPGAdKvlxU93uzKcL-Zg5rgdZ5_8p_wNJVc0hoLVEA7di_uiWS_kV5eaeIC..; isg=BLW1ZBPfPVEEo1nXeZpCfsU_xDFvMmlEavcOvjfbKixwDtEA_4ONFSLIWNI4dYH8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
self.page_queue = Queue()
self.json_queue = Queue()
self.parsed_queue = Queue()
def get_pages(self):
for i in range(1, 11):
self.data['pageIndex'] = i
self.page_queue.put(self.data)
def get_data(self):
while True:
data = self.page_queue.get()
self.page_queue.task_done()
response = requests.post(url=self.url, json=data, headers=self.headers)
# print(response.json(), '\n' * 3)
self.json_queue.put(response.json())
def parse_data(self):
while True:
data = self.json_queue.get()
self.json_queue.task_done()
for i in data['content']['datas']:
item = {
'name': i['name'],
'update_time': self.timestamp_to_date(i['modifyTime']),
'workLocations': '/'.join(i['workLocations'])
}
self.parsed_queue.put(item)
def save_data(self):
i = 0
while True:
i += 1
item = self.parsed_queue.get()
self.parsed_queue.task_done()
print(i, item)
self.collection.insert_one(item)
@staticmethod
def timestamp_to_date(timestamp_13):
#Convert a 13-digit timestamp to year, month, and day
timestamp = timestamp_13
date = time.localtime(timestamp / 1000.0)
year = date.tm_year
month = date.tm_mon
day = date.tm_mday
date_string = str(year) + '年' + str(month) + '月' + str(day) + '日'
return date_string
def main(self):
t_list = list()
t_get_pages = Thread(target=self.get_pages)
t_list.append(t_get_pages)
for i in range(6):
t_get_data = Thread(target=self.get_data)
t_list.append(t_get_data)
for i in range(8):
t_parse_data = Thread(target=self.parse_data)
t_list.append(t_parse_data)
t_save_data = Thread(target=self.save_data)
t_list.append(t_save_data)
for t in t_list:
t.daemon = True
t.start()
for q in :
q.join()
"""
做这个作业的时候,我遇到很大一个坑就在这里,注意到t_save_data这个线程没有?队列为空的时候这个线程可能并没有执行完成,
q.join()并不能控制t_save_data这个线程已经运行完毕,由于它又是守护线程,所以随着队列为空,q.join()阻塞放行,
主线程结束,t_save_data执行到一半也被结束,因此只好用下面的time.sleep(5)人为添加阻塞。
另外,扩展一下,即使不考虑存储数据这个最后的线程好了,我们只考虑前面步骤的线程,
“队列q为空”和“所有线程执行完毕”之间是有时间差的,等于篮子是已经空了,任务已经被领完了,但执行任务还是需要时间的,
这中间有时间差,哪怕是0.000000001秒,由于q.join()只根据篮子空没空判断是否阻塞,主进程一结束,守护线程结束,
这有可能导致程序运行的偏差。
最后,总结一下这个程序的思路,也是对初学者最不好理解的地方:通过篮子是否为空(队列里的任务是否被领完)来控制
主线程的节奏,涉及q.get();q.task_done()和q.join(),然后通过主线程的结束而结束守护线程来退出死循环。
我一开始怎么也想不通,每个方法都是while True的死循环,而且循环体中没有break,它们是怎么退出的???
原来,就是靠设置成守护线程daemon=True来退出的。
q.join()方法的用途就是:用于等待守护线程运行结束,避免因主线程的提前结束而导致守护线程突然终止的问题。
"""
time.sleep(5)
if __name__ == '__main__':
start_time = time.time()
alibaba = Alibaba()
alibaba.main()
print("__main__总共花费时间:", time.time()-start_time)
多进程方式采集腾讯视频的电视剧介绍
import requestsimport pymongo
from multiprocessing import Process
from multiprocessing import JoinableQueue as Queue
import time
class Tengxun():
client = pymongo.MongoClient()
collection = client['spider']['tengxun']
def __init__(self):
self.headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
}
self.url = 'https://pbaccess.video.qq.com/trpc.vector_layout.page_view.PageService/getPage?video_appid=3000010'
# 存储需要传递的json
self.data_queue = Queue()
# 存放获取到的响应数据
self.json_queue = Queue()
# 存放解析之后的数据
self.content_queue = Queue()
def get_post_data(self):
for i in range(1, 11):
data = {
"page_context": {
"page_index": str(i)
},
"page_params": {
"page_id": "channel_list_second_page",
"page_type": "operation",
"channel_id": "100113",
"filter_params": "ifeature=-1&iarea=-1&iyear=-1&ipay=-1&sort=75",
"page": str(i)
},
"page_bypass_params": {
"params": {
"page_id": "channel_list_second_page",
"page_type": "operation",
"channel_id": "100113",
"filter_params": "ifeature=-1&iarea=-1&iyear=-1&ipay=-1&sort=75",
"page": str(i),
"caller_id": "3000010",
"platform_id": "2",
"data_mode": "default",
"user_mode": "default"
},
"scene": "operation",
"abtest_bypass_id": "ddf6b146f5be0ab4"
}
}
self.data_queue.put(data)
def get_data(self):
while True:
data = self.data_queue.get()
self.data_queue.task_done()
# 表单数据传递的是字典,载荷数据传递的是JSON
response = requests.get(self.url, self.headers, json=data)
self.json_queue.put(response.json())
def parse_data(self):
while True:
data = self.json_queue.get()
self.json_queue.task_done()
for video in data['data']['CardList']['children_list']['list']['cards']:
item = {
'title': video['params']['title'],
'second_title': video['params']['second_title'],
'timelong': video['params']['timelong'] if video['params'].get('timelong') else 'NULL'
}
print(item)
self.content_queue.put(item)
def save_data(self):
while True:
item = self.content_queue.get()
self.content_queue.task_done()
self.collection.insert_one(item)
def main(self):
process_list = list()
p_get_json = Process(target=self.get_post_data)
process_list.append(p_get_json)
p_get_data = Process(target=self.get_data)
process_list.append(p_get_data)
p_parse_data = Process(target=self.parse_data)
process_list.append(p_parse_data)
p_save_data = Process(target=self.save_data)
process_list.append(p_save_data)
for p in process_list:
p.daemon = True
p.start()
time.sleep(2)
for q in :
q.join()
if __name__ == '__main__':
tx = Tengxun()
tx.main()
感谢分享!!!! 感谢分享!!!! 支持支持一下
关于”对象关联“的几个例子
本帖最后由 whyerect 于 2023-3-24 13:11 编辑Python中一切皆对象,面向对象的三大特性:封装、继承、多态。听老师上课举了几个“多态”的例子,全部用到【对象关联】,下面是对象关联的几个例子,一个是不同的犬吠;一个是迷你操作系统安装app;一个是学生与班级;一个是迭代器。这些都是对象关联的例子。
例一:众狗汪汪汪
class Dog(object):
def bark(self):
print("狗汪汪叫...")
class LangDog(Dog):
def bark(self):
print("狼狗震耳欲聋的叫...")
class ZangAo(Dog):
pass
class Person(object):
def pk_dog(self, dog_obj):
print("人用力的向狗进行了攻击...")
dog_obj.bark()
anna = Person()
dog1 = Dog()
dog2 = LangDog()
dog3 = ZangAo()
anna.pk_dog(dog1)
anna.pk_dog(dog2)
anna.pk_dog(dog3)
输出结果:
人用力的向狗进行了攻击...
狗汪汪叫...
人用力的向狗进行了攻击...
狼狗震耳欲聋的叫...
人用力的向狗进行了攻击...
狗汪汪叫...
例二:APP装装装
class MiniOS(object):
"""MiniOS 操作系统类 """
def __init__(self, name):
self.name = name
self.apps = []# 安装的应用程序名称列表
def __str__(self):
return "%s 安装的软件列表为 %s" % (self.name, str(self.apps))
def install_app(self, app_obj):
# 判断是否已经安装了软件
if app_obj.name in self.apps:
print("已经安装了 %s,无需再次安装" % app_obj.name)
else:
app_obj.install()
self.apps.append(app_obj.name)
class App(object):
def __init__(self, name, version, desc):
self.name = name
self.version = version
self.desc = desc
def __str__(self):
return "%s 的当前版本是 %s - %s" % (self.name, self.version, self.desc)
def install(self):
print("将 %s [%s] 的执行程序复制到程序目录..." % (self.name, self.version))
class PyCharm(App):
pass
class Chrome(App):
def install(self):
print("正在解压缩安装程序...")
super().install()
linux = MiniOS("Linux")
print(linux)
pycharm = PyCharm("PyCharm", "1.0", "python开发的IDE环境")
chrome = Chrome("Chrome", "2.0", "谷歌浏览器")
linux.install_app(pycharm)
linux.install_app(chrome)
linux.install_app(chrome)
print(linux)
例三:学生在班里
class Classroom(object):
def __init__(self, name):
self.classroom_name = name
self.stus = []# 一般情况下在本类的其它方法中用到的实例属性,都要在__init__方法中定义
def add_new_stu(self, stu_obj):
"""定义新方法用来完成关联"""
self.stus.append(stu_obj)
class Student(object):
def __init__(self, name):
self.student_name = name
# 创建一个教室对象
class205 = Classroom("205班")
# 创建多个学生对象
stu01 = Student("学生1")
stu02 = Student("学生2")
stu03 = Student("学生3")
# 调用方法将学生添加到对象中
class205.add_new_stu(stu01)
class205.add_new_stu(stu02)
class205.add_new_stu(stu03)
# 调用学生的姓名
# 205教室.列表[下标].姓名
print(class205.stus.student_name)
print(class205.stus.student_name)
print(class205.stus.student_name)
例四:自制迭代器
class MyList(object):
"""自定义的一个可迭代对象"""
def __init__(self):
self.items = []
def add(self, val):
self.items.append(val)
def __iter__(self):
myiterator = MyIterator(self)
return myiterator
class MyIterator(object):
"""自定义的供上面可迭代对象使用的一个迭代器"""
def __init__(self, mylist_obj):
self.mylist = mylist_obj
# current用来记录当前访问到的位置
self.current = 0
def __next__(self):
if self.current < len(self.mylist.items):
item = self.mylist.items
self.current += 1
return item
else:
raise StopIteration
def __iter__(self):
return self
if __name__ == '__main__':
mylist = MyList()
mylist.add(1)
mylist.add(2)
mylist.add(3)
mylist.add(4)
mylist.add(5)
nums = list(mylist)
print(nums) 我发现我现在 BeautifulSoup 学得入门了:lol
import requests
import os
from bs4 import BeautifulSoup
path = "/铃声"
if not os.path.exists(path):
os.mkdir(path)
url = "https://sc.chinaz.com/yinxiao/index.html"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers).content.decode("utf-8")
soup = BeautifulSoup(response, "lxml")
div_list = soup.select("#AudioList .container .audio-item")
# print(div_list)
for div in div_list:
name = div.select(".name").get_text()
print(name.strip())
感谢分享!!!!
用lxml解析数据,然后,保存为JSON格式的文件
本帖最后由 whyerect 于 2023-3-25 14:50 编辑import json
import requests
from lxml import etree
url = "https://www.4399.com/flash/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
}
response = requests.get(url=url, headers=headers)
response.encoding = "gb2312"
html = etree.HTML(response.text)
li_list = html.xpath('//ul[@class="n-game cf"]/li')
data_list = list()
for li in li_list:
name = li.xpath("./a/b/text()")
href = li.xpath("./a/@href")
item = {"title": name, "href": href}
data_list.append(item)
with open("games.json", "w", encoding="utf-8") as f:
f.write(json.dumps(data_list, ensure_ascii=False, indent=2))
CSV模块的使用
本帖最后由 whyerect 于 2023-3-25 15:49 编辑import csv
with open("data.csv", "w", encoding="gbk", newline="") as f:
writer = csv.writer(f)
writer.writerow(["姓名", "年龄", "性别"])
writer.writerow(["张三", "16", "男"])
writer.writerow(["李四", "39", "女"])
writer.writerow(["王五", "58", "女"])
writer.writerow(["赵六", "89", "LGBT"])
当然不导入csv模块也是可以的,可以直接写:
with open("data1.csv", "w", encoding="gbk") as f:
f.write("姓名, 年龄, 性别\n")
f.write("张三, 16, 男\n")
f.write("李四, 39, 女\n")
f.write("王五, 58, 女\n")
f.write("赵六, 89, LGBT\n")
### 关于匿名函数 lambda 的几个例子
#### 例一:sorted 函数可以用 `key=函数名` 对排序的列表先进行函数变换再排序
```python
num_list =
# 先对列表取绝对值再进行排序
num_list_1 = sorted(num_list, key=abs)
print(num_list_1)
```
> **输出结果:**
>
```python
my_list = ["abcd", "a", "ab", "abc", "12345", "1"]
my_list_1 = sorted(my_list, key=lambda x: len(x))
print(my_list_1)
```
> **输出结果:**
> ['a', '1', 'ab', 'abc', 'abcd', '12345']
#### 例二:min 函数可以用 `key=函数名` 对求最小值的列表先进行函数变换再排序
有一个列表比如 ,现在的需求是:用户输入一个小数,比如:3.14,要求判断输入的小数距离列表中哪个数最近,如何实现?
```python
my_list = list(range(1, 11))
my_num = float(input("请输入1-10之间的小数:"))
result = min(my_list, key=lambda x: abs(x-my_num))
print(result)
```
#### 例三:`顾安老师上课讲的例子:`把列表中的学生按年龄排序
```python
students = [
{"姓名": "张三", "年龄": 18},
{"姓名": "李四", "年龄": 7},
{"姓名": "王五", "年龄": 99},
{"姓名": "赵六", "年龄": 30},
{"姓名": "田七", "年龄": 22}
]
print("排序前:", students)
students.sort(key=lambda x: x["年龄"])
print("排序后:", students)
```
如果不用匿名函数lambda,用普通函数怎么做?
```python
def sort_func(item):
return item["年龄"]
students = [
{"姓名": "张三", "年龄": 18},
{"姓名": "李四", "年龄": 7},
{"姓名": "王五", "年龄": 99},
{"姓名": "赵六", "年龄": 30},
{"姓名": "田七", "年龄": 22}
]
print("排序前:", students)
students.sort(key=sort_func)
print("排序后:", students)
```
> **输出结果:**
> 排序前: [{'姓名': '张三', '年龄': 18}, {'姓名': '李四', '年龄': 7}, {'姓名': '王五', '年龄': 99}, {'姓名': '赵六', '年龄': 30}, {'姓名': '田七', '年龄': 22}]
> 排序后: [{'姓名': '李四', '年龄': 7}, {'姓名': '张三', '年龄': 18}, {'姓名': '田七', '年龄': 22}, {'姓名': '赵六', '年龄': 30}, {'姓名': '王五', '年龄': 99}]
页:
[1]
2