动态注入

1
2
3
4
5
6
7
8
9
10
11
12
13
14
def import_object(name: str):
"""字符串导入模块方法"""
if name.count(".") == 0:
return __import__(name)
parts = name.split(".")
obj = __import__(".".join(parts[:-1]), fromlist=[parts[-1]])
try:
return getattr(obj, parts[-1])
except AttributeError:
raise ImportError("No module named %s" % parts[-1])


res = import_object('requests').get("https://www.baidu.com")
print(res.text)

多线程封装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

import time
from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor


class ThreadPoolSpider:
executor = ThreadPoolExecutor(max_workers=8)

# executor = ProcessPoolExecutor(max_workers=8)

def __init__(self):
pass

def http_request(self, url, second):
time.sleep(second)
return url, second

def crawl(self):
all_tasks = []
for index in range(100):
task = self.executor.submit(self.http_request, index, 2)
all_tasks.append(task)

for result in as_completed(all_tasks):
data = result.result()
print(data)


if __name__ == '__main__':
spider = ThreadPoolSpider()
spider.crawl()