
的微博:还记得和宝宝陪着@Dear-迪丽热巴 走过的花路吗?谢谢阿丝们一直以来的陪伴
的微博:去年和亲善大使热巴@Dear-迪丽热巴 的特别回忆
转发了 @WCS野生生物保护学会
的微博:去年和亲善大使热巴@Dear-迪丽热巴 的特别回忆
的微博:去年和亲善大使热巴@Dear-迪丽热巴 的特别回忆

的微博:#幸福触手可及##幸福触手可及定档0519# 从没有一个时刻,幸福如此靠近,只因有你在身边
的微博:【爱豆喊你来助力#北京2022#】
的微博:【想看看战疫一线医护人员们的脸!#极限挑战致敬医护人员#】脱下防疫服,援鄂人员们原来是这个模样。八位医护人员集体分享支援一线的故事,是他们为后方的我们竖起了最坚实的屏障,感谢这群医护天使的负重前行,致敬!@央视网青年 @雷佳音 @岳云鹏 @演员王迅 @贾乃亮 @努力努力再努力x @Dear-迪丽热巴...全文 赞[364004] 原文转发[1056354] 原文评论[3645]
的微博:#五四致敬战疫青年# #青春万岁#各地应急响应级别陆续下调,我们正在走向痊愈。回望这些年轻医务人员的脸,不应忘记,正是他们在危难之下,白衣执甲,毅然逆行,为我们筑起血肉长城。感恩提灯天使,致敬最可爱的人!春暖花开,等到疫情完全解除,无论你是从医还是就医,请记住医患之间的休戚与共、唇齿...全文 [组图共12张]
的微博:鸡条君目睹了vivo#极限挑战#第六季首发阵容@雷佳音 @岳云鹏 @演员王迅 @贾乃亮 @努力努力再努力x @Dear-迪丽热巴 @郭京飞 @邓伦 集结的整个过程,这就是欢迎新人的方式
+ pic_list = self.selector.xpath('//div[@class="c"]//img/@src')
+ for i, pic in enumerate(pic_list):
+ if "?" in pic:
+ pic = pic[:pic.index("?")]
+ pic_list[i] = pic
+ return pic_list
diff --git a/weibo_spider/parser/comment_parser.py b/weibo_spider/parser/comment_parser.py
new file mode 100644
index 00000000..c0117d80
--- /dev/null
+++ b/weibo_spider/parser/comment_parser.py
@@ -0,0 +1,69 @@
+import logging
+import random
+import requests
+import re
+from time import sleep
+from lxml.html import tostring
+from lxml.html import fromstring
+from lxml import etree
+from .parser import Parser
+from .util import handle_garbled, handle_html
+
+logger = logging.getLogger('spider.comment_parser')
+
+
+class CommentParser(Parser):
+ def __init__(self, cookie, weibo_id):
+ self.cookie = cookie
+ self.url = 'https://weibo.cn/comment/' + weibo_id
+ self.selector = handle_html(self.cookie, self.url)
+
+ def get_long_weibo(self):
+ """获取长原创微博"""
+ try:
+ for i in range(5):
+ self.selector = handle_html(self.cookie, self.url)
+ if self.selector is not None:
+ info_div = self.selector.xpath("//div[@class='c' and @id='M_']")[0]
+ info_span = info_div.xpath("//span[@class='ctt']")[0]
+ # 1. 获取 info_span 中的所有 HTML 代码作为字符串
+ html_string = etree.tostring(info_span, encoding='unicode', method='html')
+ # 2. 将
+ result = self.selector.xpath('//img[@alt="头像相册"]/../@href')
+ if len(result) > 0:
+ return "https://weibo.cn" + result[0]
+ else:
+ return "https://weibo.cn/" + str(self.user_id) + "/avatar?rl=0"
diff --git a/weibo_spider/parser/util.py b/weibo_spider/parser/util.py
new file mode 100644
index 00000000..b7c95736
--- /dev/null
+++ b/weibo_spider/parser/util.py
@@ -0,0 +1,150 @@
+import hashlib
+import json
+import logging
+import sys
+
+import requests
+from lxml import etree
+
+# Set GENERATE_TEST_DATA to True when generating test data.
+GENERATE_TEST_DATA = False
+TEST_DATA_DIR = 'tests/testdata'
+URL_MAP_FILE = 'url_map.json'
+logger = logging.getLogger('spider.util')
+
+# 全局代理配置,由 spider.py 初始化
+_proxies = None
+
+
+def set_proxies(proxy_url):
+ """设置全局代理"""
+ global _proxies
+ if proxy_url:
+ _proxies = {'http': proxy_url, 'https': proxy_url}
+ logger.info(u'已启用代理: %s', proxy_url)
+
+
+def get_proxies():
+ return _proxies
+
+
+def hash_url(url):
+ return hashlib.sha224(url.encode('utf8')).hexdigest()
+
+
+DEFAULT_UA = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/133.0.0.0 Safari/537.36')
+
+
+def handle_html(cookie, url):
+ """处理html"""
+ from time import sleep
+ headers = {'User-Agent': DEFAULT_UA, 'Cookie': cookie}
+ for attempt in range(5):
+ try:
+ resp = requests.get(url, headers=headers, timeout=10,
+ proxies=_proxies)
+ if resp.status_code == 200 and len(resp.content) > 0:
+ selector = etree.HTML(resp.content)
+ return selector
+ elif resp.status_code == 403:
+ wait = 300 * (attempt + 1)
+ logger.warning(u'403 IP被限制,等待%d秒后重试(第%d次)',
+ wait, attempt + 1)
+ sleep(wait)
+ elif resp.status_code == 432:
+ logger.error(u'432 User-Agent被拒绝,请更新UA')
+ return None
+ else:
+ wait = 60 * (attempt + 1)
+ logger.warning(u'请求返回状态码%d,等待%d秒后重试(第%d次)',
+ resp.status_code, wait, attempt + 1)
+ sleep(wait)
+ except Exception as e:
+ wait = 60 * (attempt + 1)
+ logger.warning(u'请求异常,等待%d秒后重试(第%d次): %s',
+ wait, attempt + 1, str(e))
+ sleep(wait)
+ logger.error(u'请求%s失败,已重试5次', url)
+ return None
+
+
+def handle_garbled(info):
+ """处理乱码"""
+ try:
+ if hasattr(info, 'xpath'): # 检查 info 是否具有 xpath 方法
+ info_str = info.xpath('string(.)') # 提取字符串内容
+ else:
+ info_str = str(info) # 若不支持 xpath,将其转换为字符串
+
+ info = info_str.replace(u'\u200b', '').encode(
+ sys.stdout.encoding, 'ignore').decode(sys.stdout.encoding)
+ return info
+ except Exception as e:
+ logger.exception(e)
+ return u'无'
+
+
+def bid2mid(bid):
+ """convert string bid to string mid"""
+ alphabet = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ base = len(alphabet)
+ bidlen = len(bid)
+ head = bidlen % 4
+ digit = int((bidlen - head) / 4)
+ dlist = [bid[0:head]]
+ for d in range(1, digit + 1):
+ dlist.append(bid[head:head + d * 4])
+ head += 4
+ mid = ''
+ for d in dlist:
+ num = 0
+ idx = 0
+ strlen = len(d)
+ for char in d:
+ power = (strlen - (idx + 1))
+ num += alphabet.index(char) * (base**power)
+ idx += 1
+ strnum = str(num)
+ while (len(d) == 4 and len(strnum) < 7):
+ strnum = '0' + strnum
+ mid += strnum
+ return mid
+
+
+def to_video_download_url(cookie, video_page_url):
+ if video_page_url == '':
+ return ''
+
+ video_object_url = video_page_url.replace('m.weibo.cn/s/video/show',
+ 'm.weibo.cn/s/video/object')
+ try:
+ headers = {'User-Agent': DEFAULT_UA, 'Cookie': cookie}
+ wb_info = requests.get(video_object_url, headers=headers,
+ proxies=_proxies).json()
+ video_url = wb_info['data']['object']['stream'].get('hd_url')
+ if not video_url:
+ video_url = wb_info['data']['object']['stream']['url']
+ if not video_url: # 说明该视频为直播
+ video_url = ''
+ except json.decoder.JSONDecodeError:
+ logger.warning(u'当前账号没有浏览该视频的权限')
+
+ return video_url
+
+
+def string_to_int(string):
+ """字符串转换为整数"""
+ if len(string) == 0:
+ logger.warning("string to int, the input string is empty!")
+ return 0
+ if isinstance(string, int):
+ return string
+ elif string.endswith(u'万+'):
+ string = string[:-2] + '0000'
+ elif string.endswith(u'万'):
+ string = float(string[:-1]) * 10000
+ elif string.endswith(u'亿'):
+ string = float(string[:-1]) * 100000000
+ return int(string)
diff --git a/weibo_spider/spider.py b/weibo_spider/spider.py
new file mode 100644
index 00000000..09125233
--- /dev/null
+++ b/weibo_spider/spider.py
@@ -0,0 +1,400 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+import json
+import logging
+import logging.config
+import os
+import random
+import shutil
+import sys
+from datetime import date, datetime, timedelta
+from time import sleep
+
+from absl import app, flags
+from tqdm import tqdm
+
+from . import config_util, datetime_util
+from .downloader import AvatarPictureDownloader
+from .parser import AlbumParser, IndexParser, PageParser, PhotoParser
+from .user import User
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('config_path', None, 'The path to config.json.')
+flags.DEFINE_string('u', None, 'The user_id we want to input.')
+flags.DEFINE_string('user_id_list', None, 'The path to user_id_list.txt.')
+flags.DEFINE_string('output_dir', None, 'The dir path to store results.')
+
+logging_path = os.path.split(
+ os.path.realpath(__file__))[0] + os.sep + 'logging.conf'
+logging.config.fileConfig(logging_path)
+logger = logging.getLogger('spider')
+
+
+class Spider:
+ def __init__(self, config):
+ """Weibo类初始化"""
+ self.filter = config[
+ 'filter'] # 取值范围为0、1,程序默认值为0,代表要爬取用户的全部微博,1代表只爬取用户的原创微博
+ since_date = config['since_date']
+ if isinstance(since_date, int):
+ since_date = date.today() - timedelta(since_date)
+ self.since_date = str(
+ since_date) # 起始时间,即爬取发布日期从该值到结束时间的微博,形式为yyyy-mm-dd
+ self.end_date = config[
+ 'end_date'] # 结束时间,即爬取发布日期从起始时间到该值的微博,形式为yyyy-mm-dd,特殊值"now"代表现在
+ random_wait_pages = config['random_wait_pages']
+ self.random_wait_pages = [
+ min(random_wait_pages),
+ max(random_wait_pages)
+ ] # 随机等待频率,即每爬多少页暂停一次
+ random_wait_seconds = config['random_wait_seconds']
+ self.random_wait_seconds = [
+ min(random_wait_seconds),
+ max(random_wait_seconds)
+ ] # 随机等待时间,即每次暂停要sleep多少秒
+ self.global_wait = config['global_wait'] # 配置全局等待时间,如每爬1000页等待3600秒等
+ self.page_count = 0 # 统计每次全局等待后,爬取了多少页,若页数满足全局等待要求就进入下一次全局等待
+ self.write_mode = config[
+ 'write_mode'] # 结果信息保存类型,为list形式,可包含txt、csv、json、mongo和mysql五种类型
+ self.pic_download = config[
+ 'pic_download'] # 取值范围为0、1,程序默认值为0,代表不下载微博原始图片,1代表下载
+ self.video_download = config[
+ 'video_download'] # 取值范围为0、1,程序默认为0,代表不下载微博视频,1代表下载
+ self.file_download_timeout = config.get(
+ 'file_download_timeout',
+ [5, 5, 10
+ ]) # 控制文件下载“超时”时的操作,值是list形式,包含三个数字,依次分别是最大超时重试次数、最大连接时间和最大读取时间
+ self.result_dir_name = config.get(
+ 'result_dir_name', 0) # 结果目录名,取值为0或1,决定结果文件存储在用户昵称文件夹里还是用户id文件夹里
+ self.cookie = config['cookie']
+ self.mysql_config = config.get('mysql_config') # MySQL数据库连接配置,可以不填
+
+ self.sqlite_config = config.get('sqlite_config')
+ self.kafka_config = config.get('kafka_config')
+ self.mongo_config = config.get('mongo_config')
+ self.post_config = config.get('post_config')
+ self.user_config_file_path = ''
+ user_id_list = config['user_id_list']
+ if FLAGS.user_id_list:
+ user_id_list = FLAGS.user_id_list
+ if not isinstance(user_id_list, list):
+ if not os.path.isabs(user_id_list):
+ user_id_list = os.getcwd() + os.sep + user_id_list
+ if not os.path.isfile(user_id_list):
+ logger.warning('不存在%s文件', user_id_list)
+ sys.exit()
+ self.user_config_file_path = user_id_list
+ if FLAGS.u:
+ user_id_list = FLAGS.u.split(',')
+ if isinstance(user_id_list, list):
+ # 第一部分是处理dict类型的
+ # 第二部分是其他类型,其他类型提供去重功能
+ user_config_list = list(
+ map(
+ lambda x: {
+ 'user_uri': x['id'],
+ 'since_date': x.get('since_date', self.since_date),
+ 'end_date': x.get('end_date', self.end_date),
+ }, [
+ user_id for user_id in user_id_list
+ if isinstance(user_id, dict)
+ ])) + list(
+ map(
+ lambda x: {
+ 'user_uri': x,
+ 'since_date': self.since_date,
+ 'end_date': self.end_date
+ },
+ set([
+ user_id for user_id in user_id_list
+ if not isinstance(user_id, dict)
+ ])))
+ if FLAGS.u:
+ config_util.add_user_uri_list(self.user_config_file_path,
+ user_id_list)
+ else:
+ user_config_list = config_util.get_user_config_list(
+ user_id_list, self.since_date)
+ for user_config in user_config_list:
+ user_config['end_date'] = self.end_date
+ self.user_config_list = user_config_list # 要爬取的微博用户的user_config列表
+ self.user_config = {} # 用户配置,包含用户id和since_date
+ self.new_since_date = '' # 完成某用户爬取后,自动生成对应用户新的since_date
+ self.user = User() # 存储爬取到的用户信息
+ self.got_num = 0 # 存储爬取到的微博数
+ self.weibo_id_list = [] # 存储爬取到的所有微博id
+
+ def write_weibo(self, weibos):
+ """将爬取到的信息写入文件或数据库"""
+ for writer in self.writers:
+ writer.write_weibo(weibos)
+ for downloader in self.downloaders:
+ downloader.download_files(weibos)
+
+ def write_user(self, user):
+ """将用户信息写入数据库"""
+ for writer in self.writers:
+ writer.write_user(user)
+
+ def get_user_info(self, user_uri):
+ """获取用户信息"""
+ self.user = IndexParser(self.cookie, user_uri).get_user()
+ self.page_count += 1
+
+ def download_user_avatar(self, user_uri):
+ """下载用户头像"""
+ avatar_album_url = PhotoParser(self.cookie,
+ user_uri).extract_avatar_album_url()
+ pic_urls = AlbumParser(self.cookie,
+ avatar_album_url).extract_pic_urls()
+ AvatarPictureDownloader(
+ self._get_filepath('img'),
+ self.file_download_timeout).handle_download(pic_urls)
+
+ def get_weibo_info(self):
+ """获取微博信息"""
+ try:
+ since_date = datetime_util.str_to_time(
+ self.user_config['since_date'])
+ now = datetime.now()
+ if since_date <= now:
+ page_num = IndexParser(
+ self.cookie,
+ self.user_config['user_uri']).get_page_num() # 获取微博总页数
+ self.page_count += 1
+ if self.page_count > 2 and (self.page_count +
+ page_num) > self.global_wait[0][0]:
+ wait_seconds = int(
+ self.global_wait[0][1] *
+ min(1, self.page_count / self.global_wait[0][0]))
+ logger.info(u'即将进入全局等待时间,%d秒后程序继续执行' % wait_seconds)
+ for i in tqdm(range(wait_seconds)):
+ sleep(1)
+ self.page_count = 0
+ self.global_wait.append(self.global_wait.pop(0))
+ page1 = 0
+ random_pages = random.randint(*self.random_wait_pages)
+ for page in tqdm(range(1, page_num + 1), desc='Progress'):
+ weibos, self.weibo_id_list, to_continue = PageParser(
+ self.cookie,
+ self.user_config, page, self.filter).get_one_page(
+ self.weibo_id_list) # 获取第page页的全部微博
+ logger.info(
+ u'%s已获取%s(%s)的第%d页微博%s',
+ '-' * 30,
+ self.user.nickname,
+ self.user.id,
+ page,
+ '-' * 30,
+ )
+ self.page_count += 1
+ if weibos:
+ yield weibos
+ if not to_continue:
+ break
+
+ # 通过加入随机等待避免被限制。爬虫速度过快容易被系统限制(一段时间后限
+ # 制会自动解除),加入随机等待模拟人的操作,可降低被系统限制的风险。默
+ # 认是每爬取1到5页随机等待6到10秒,如果仍然被限,可适当增加sleep时间
+ if (page - page1) % random_pages == 0 and page < page_num:
+ sleep(random.randint(*self.random_wait_seconds))
+ page1 = page
+ random_pages = random.randint(*self.random_wait_pages)
+
+ if self.page_count >= self.global_wait[0][0]:
+ logger.info(u'即将进入全局等待时间,%d秒后程序继续执行' %
+ self.global_wait[0][1])
+ for i in tqdm(range(self.global_wait[0][1])):
+ sleep(1)
+ self.page_count = 0
+ self.global_wait.append(self.global_wait.pop(0))
+
+ # 更新用户user_id_list.txt中的since_date
+ if self.user_config_file_path or FLAGS.u:
+ config_util.update_user_config_file(
+ self.user_config_file_path,
+ self.user_config['user_uri'],
+ self.user.nickname,
+ self.new_since_date,
+ )
+ except Exception as e:
+ logger.exception(e)
+
+ def _get_filepath(self, type):
+ """获取结果文件路径"""
+ try:
+ dir_name = self.user.nickname
+ if self.result_dir_name:
+ dir_name = self.user.id
+ if FLAGS.output_dir is not None:
+ file_dir = FLAGS.output_dir + os.sep + dir_name
+ else:
+ file_dir = (os.getcwd() + os.sep + 'weibo' + os.sep + dir_name)
+ if type == 'img' or type == 'video':
+ file_dir = file_dir + os.sep + type
+ if not os.path.isdir(file_dir):
+ os.makedirs(file_dir)
+ if type == 'img' or type == 'video':
+ return file_dir
+ file_path = file_dir + os.sep + self.user.id + '.' + type
+ return file_path
+ except Exception as e:
+ logger.exception(e)
+
+ def initialize_info(self, user_config):
+ """初始化爬虫信息"""
+ self.got_num = 0
+ self.user_config = user_config
+ self.weibo_id_list = []
+ if self.end_date == 'now':
+ self.new_since_date = datetime.now().strftime('%Y-%m-%d %H:%M')
+ else:
+ self.new_since_date = self.end_date
+ self.writers = []
+ if 'csv' in self.write_mode:
+ from .writer import CsvWriter
+
+ self.writers.append(
+ CsvWriter(self._get_filepath('csv'), self.filter))
+ if 'txt' in self.write_mode:
+ from .writer import TxtWriter
+
+ self.writers.append(
+ TxtWriter(self._get_filepath('txt'), self.filter))
+ if 'json' in self.write_mode:
+ from .writer import JsonWriter
+
+ self.writers.append(JsonWriter(self._get_filepath('json')))
+ if 'mysql' in self.write_mode:
+ from .writer import MySqlWriter
+
+ self.writers.append(MySqlWriter(self.mysql_config))
+ if 'mongo' in self.write_mode:
+ from .writer import MongoWriter
+
+ self.writers.append(MongoWriter(self.mongo_config))
+ if 'sqlite' in self.write_mode:
+ from .writer import SqliteWriter
+
+ self.writers.append(SqliteWriter(self.sqlite_config))
+
+ if 'kafka' in self.write_mode:
+ from .writer import KafkaWriter
+
+ self.writers.append(KafkaWriter(self.kafka_config))
+
+ if 'post' in self.write_mode:
+ from .writer import PostWriter
+
+ self.writers.append(PostWriter(self.post_config))
+
+ self.downloaders = []
+ if self.pic_download == 1:
+ from .downloader import (OriginPictureDownloader,
+ RetweetPictureDownloader)
+
+ self.downloaders.append(
+ OriginPictureDownloader(self._get_filepath('img'),
+ self.file_download_timeout))
+ if self.pic_download and not self.filter:
+ self.downloaders.append(
+ RetweetPictureDownloader(self._get_filepath('img'),
+ self.file_download_timeout))
+ if self.video_download == 1:
+ from .downloader import VideoDownloader
+
+ self.downloaders.append(
+ VideoDownloader(self._get_filepath('video'),
+ self.file_download_timeout))
+
+ def get_one_user(self, user_config):
+ """获取一个用户的微博"""
+ try:
+ self.get_user_info(user_config['user_uri'])
+ logger.info(self.user)
+ logger.info('*' * 100)
+
+ self.initialize_info(user_config)
+ self.write_user(self.user)
+ logger.info('*' * 100)
+
+ # 下载用户头像相册中的图片。
+ if self.pic_download:
+ self.download_user_avatar(user_config['user_uri'])
+
+ for weibos in self.get_weibo_info():
+ self.write_weibo(weibos)
+ self.got_num += len(weibos)
+ if not self.filter:
+ logger.info(u'共爬取' + str(self.got_num) + u'条微博')
+ else:
+ logger.info(u'共爬取' + str(self.got_num) + u'条原创微博')
+ logger.info(u'信息抓取完毕')
+ logger.info('*' * 100)
+ except Exception as e:
+ logger.exception(e)
+
+ def start(self):
+ """运行爬虫"""
+ try:
+ if not self.user_config_list:
+ logger.info(
+ u'没有配置有效的user_id,请通过config.json或user_id_list.txt配置user_id')
+ return
+ user_count = 0
+ user_count1 = random.randint(*self.random_wait_pages)
+ random_users = random.randint(*self.random_wait_pages)
+ for user_config in self.user_config_list:
+ if (user_count - user_count1) % random_users == 0:
+ sleep(random.randint(*self.random_wait_seconds))
+ user_count1 = user_count
+ random_users = random.randint(*self.random_wait_pages)
+ user_count += 1
+ self.get_one_user(user_config)
+ except Exception as e:
+ logger.exception(e)
+
+
+def _get_config():
+ """获取config.json数据"""
+ src = os.path.split(
+ os.path.realpath(__file__))[0] + os.sep + 'config_sample.json'
+ config_path = os.getcwd() + os.sep + 'config.json'
+ if FLAGS.config_path:
+ config_path = FLAGS.config_path
+ elif not os.path.isfile(config_path):
+ shutil.copy(src, config_path)
+ logger.info(u'请先配置当前目录(%s)下的config.json文件,'
+ u'如果想了解config.json参数的具体意义及配置方法,请访问\n'
+ u'https://github.com/dataabc/weiboSpider#2程序设置' %
+ os.getcwd())
+ sys.exit()
+ try:
+ with open(config_path) as f:
+ config = json.loads(f.read())
+ return config
+ except ValueError:
+ logger.error(u'config.json 格式不正确,请访问 '
+ u'https://github.com/dataabc/weiboSpider#2程序设置')
+ sys.exit()
+
+
+def main(_):
+ try:
+ config = _get_config()
+ config_util.validate_config(config)
+ # 初始化代理
+ proxy = config.get('proxy')
+ if proxy:
+ from .parser.util import set_proxies
+ set_proxies(proxy)
+ wb = Spider(config)
+ wb.start() # 爬取微博信息
+ except Exception as e:
+ logger.exception(e)
+
+
+if __name__ == '__main__':
+ app.run(main)
diff --git a/weibo_spider/user.py b/weibo_spider/user.py
new file mode 100644
index 00000000..dc135799
--- /dev/null
+++ b/weibo_spider/user.py
@@ -0,0 +1,29 @@
+class User:
+ def __init__(self):
+ self.id = ''
+
+ self.nickname = ''
+
+ self.gender = ''
+ self.location = ''
+ self.birthday = ''
+ self.description = ''
+ self.verified_reason = ''
+ self.talent = ''
+
+ self.education = ''
+ self.work = ''
+
+ self.weibo_num = 0
+ self.following = 0
+ self.followers = 0
+
+ def __str__(self):
+ """打印微博用户信息"""
+ result = ''
+ result += u'用户昵称: %s\n' % self.nickname
+ result += u'用户id: %s\n' % self.id
+ result += u'微博数: %d\n' % self.weibo_num
+ result += u'关注数: %d\n' % self.following
+ result += u'粉丝数: %d\n' % self.followers
+ return result
diff --git a/weibo_spider/user_id_list.txt b/weibo_spider/user_id_list.txt
new file mode 100644
index 00000000..ead74227
--- /dev/null
+++ b/weibo_spider/user_id_list.txt
@@ -0,0 +1,3 @@
+1669879400 Dear-迪丽热巴 2020-01-13 19:18
+1223178222 胡歌 2020-01-13 19:28
+1729370543 郭碧婷 2020-01-13 19:33
\ No newline at end of file
diff --git a/weibo_spider/weibo.py b/weibo_spider/weibo.py
new file mode 100644
index 00000000..54cec7ff
--- /dev/null
+++ b/weibo_spider/weibo.py
@@ -0,0 +1,32 @@
+class Weibo:
+ def __init__(self):
+ self.id = ''
+ self.user_id = ''
+
+ self.content = ''
+ self.article_url = ''
+
+ self.original_pictures = []
+ self.retweet_pictures = None
+ self.original = None
+ self.video_url = ''
+
+ self.publish_place = ''
+ self.publish_time = ''
+ self.publish_tool = ''
+
+ self.up_num = 0
+ self.retweet_num = 0
+ self.comment_num = 0
+
+ def __str__(self):
+ """打印一条微博"""
+ result = self.content + '\n'
+ result += u'微博发布位置:%s\n' % self.publish_place
+ result += u'发布时间:%s\n' % self.publish_time
+ result += u'发布工具:%s\n' % self.publish_tool
+ result += u'点赞数:%d\n' % self.up_num
+ result += u'转发数:%d\n' % self.retweet_num
+ result += u'评论数:%d\n' % self.comment_num
+ result += u'url:https://weibo.cn/comment/%s\n' % self.id
+ return result
diff --git a/weibo_spider/writer/__init__.py b/weibo_spider/writer/__init__.py
new file mode 100644
index 00000000..f6b24bd6
--- /dev/null
+++ b/weibo_spider/writer/__init__.py
@@ -0,0 +1,10 @@
+from .csv_writer import CsvWriter
+from .json_writer import JsonWriter
+from .mongo_writer import MongoWriter
+from .mysql_writer import MySqlWriter
+from .txt_writer import TxtWriter
+from .sqlite_writer import SqliteWriter
+from .kafka_writer import KafkaWriter
+from .post_writer import PostWriter
+
+__all__ = [CsvWriter, TxtWriter, JsonWriter, MongoWriter, MySqlWriter, SqliteWriter, KafkaWriter, PostWriter]
diff --git a/weibo_spider/writer/csv_writer.py b/weibo_spider/writer/csv_writer.py
new file mode 100644
index 00000000..193803da
--- /dev/null
+++ b/weibo_spider/writer/csv_writer.py
@@ -0,0 +1,46 @@
+import csv
+import logging
+
+from .writer import Writer
+
+logger = logging.getLogger('spider.csv_writer')
+
+
+class CsvWriter(Writer):
+ def __init__(self, file_path, filter):
+ self.file_path = file_path
+
+ self.result_headers = [('微博id', 'id'), ('微博正文', 'content'),
+ ('头条文章url', 'article_url'),
+ ('原始图片url', 'original_pictures'),
+ ('微博视频url', 'video_url'),
+ ('发布位置', 'publish_place'),
+ ('发布时间', 'publish_time'),
+ ('发布工具', 'publish_tool'), ('点赞数', 'up_num'),
+ ('转发数', 'retweet_num'), ('评论数', 'comment_num')]
+ if not filter:
+ self.result_headers.insert(4, ('被转发微博原始图片url', 'retweet_pictures'))
+ self.result_headers.insert(5, ('是否为原创微博', 'original'))
+ try:
+ with open(self.file_path, 'a', encoding='utf-8-sig',
+ newline='') as f:
+ writer = csv.writer(f)
+ writer.writerows([[kv[0] for kv in self.result_headers]])
+ except Exception as e:
+ logger.exception(e)
+
+ def write_user(self, user):
+ self.user = user
+
+ def write_weibo(self, weibos):
+ """将爬取的信息写入csv文件"""
+ try:
+ result_data = [[w.__dict__[kv[1]] for kv in self.result_headers]
+ for w in weibos]
+ with open(self.file_path, 'a', encoding='utf-8-sig',
+ newline='') as f:
+ writer = csv.writer(f)
+ writer.writerows(result_data)
+ logger.info(u'%d条微博写入csv文件完毕,保存路径:%s', len(weibos), self.file_path)
+ except Exception as e:
+ logger.exception(e)
diff --git a/weibo_spider/writer/json_writer.py b/weibo_spider/writer/json_writer.py
new file mode 100644
index 00000000..bca61c2d
--- /dev/null
+++ b/weibo_spider/writer/json_writer.py
@@ -0,0 +1,52 @@
+import codecs
+import json
+import logging
+import os
+
+from .writer import Writer
+
+logger = logging.getLogger('spider.json_writer')
+
+
+class JsonWriter(Writer):
+ def __init__(self, file_path):
+ self.file_path = file_path
+
+ def write_user(self, user):
+ self.user = user
+
+ def _update_json_data(self, data, weibo_info):
+ """更新要写入json结果文件中的数据,已经存在于json中的信息更新为最新值,不存在的信息添加到data中"""
+ data['user'] = self.user.__dict__
+ if data.get('weibo'):
+ is_new = 1 # 待写入微博是否全部为新微博,即待写入微博与json中的数据不重复
+ for old in data['weibo']:
+ if weibo_info[-1]['id'] == old['id']:
+ is_new = 0
+ break
+ if is_new == 0:
+ for new in weibo_info:
+ flag = 1
+ for i, old in enumerate(data['weibo']):
+ if new['id'] == old['id']:
+ data['weibo'][i] = new
+ flag = 0
+ break
+ if flag:
+ data['weibo'].append(new)
+ else:
+ data['weibo'] += weibo_info
+ else:
+ data['weibo'] = weibo_info
+ return data
+
+ def write_weibo(self, weibos):
+ """将爬到的信息写入json文件"""
+ data = {}
+ if os.path.isfile(self.file_path):
+ with codecs.open(self.file_path, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+ data = self._update_json_data(data, [w.__dict__ for w in weibos])
+ with codecs.open(self.file_path, 'w', encoding='utf-8') as f:
+ f.write(json.dumps(data, indent=4, ensure_ascii=False))
+ logger.info(u'%d条微博写入json文件完毕,保存路径:%s', len(weibos), self.file_path)
diff --git a/weibo_spider/writer/kafka_writer.py b/weibo_spider/writer/kafka_writer.py
new file mode 100644
index 00000000..247fd3a2
--- /dev/null
+++ b/weibo_spider/writer/kafka_writer.py
@@ -0,0 +1,41 @@
+import json
+import logging
+import sys
+
+from .writer import Writer
+
+logger = logging.getLogger('spider.kafka_writer')
+
+
+class KafkaWriter(Writer):
+ def __init__(self, kafka_config):
+ try:
+ from kafka import KafkaProducer
+ except ImportError:
+ logger.warning(
+ u'系统中可能没有安装kafka库,请先运行 pip install kafka-python ,再运行程序')
+ sys.exit()
+
+ self.kafka_config = kafka_config
+ self.producer = KafkaProducer(
+ bootstrap_servers=str(kafka_config['bootstrap-server']).split(','),
+ value_serializer=lambda m: json.dumps(m, ensure_ascii=False
+ ).encode('UTF-8'))
+ self.weibo_topics = list(kafka_config['weibo_topics'])
+ self.user_topics = list(kafka_config['user_topics'])
+ logger.info('{}', kafka_config)
+
+ def write_weibo(self, weibo):
+ for w in weibo:
+ w.user_id = self.user.id
+ for topic in self.weibo_topics:
+ self.producer.send(topic, value=w.__dict__)
+
+ def write_user(self, user):
+ self.user = user
+
+ for topic in self.user_topics:
+ self.producer.send(topic, value=user.__dict__)
+
+ def __del__(self):
+ self.producer.close()
diff --git a/weibo_spider/writer/mongo_writer.py b/weibo_spider/writer/mongo_writer.py
new file mode 100644
index 00000000..c6c08c5b
--- /dev/null
+++ b/weibo_spider/writer/mongo_writer.py
@@ -0,0 +1,62 @@
+import copy
+import logging
+import sys
+
+from .writer import Writer
+
+logger = logging.getLogger('spider.mongo_writer')
+
+
+class MongoWriter(Writer):
+ def __init__(self, mongo_config):
+ self.mongo_config = mongo_config
+ self.connection_string = mongo_config['connection_string']
+ self.dba_name = mongo_config.get('dba_name', None)
+ self.dba_password = mongo_config.get('dba_password', None)
+
+ def _info_to_mongodb(self, collection, info_list):
+ """将爬取的信息写入MongoDB数据库"""
+ try:
+ import pymongo
+ except ImportError:
+ logger.warning(
+ u'系统中可能没有安装pymongo库,请先运行 pip install pymongo ,再运行程序')
+ sys.exit()
+ try:
+ from pymongo import MongoClient
+
+ client = MongoClient(self.connection_string)
+ if self.dba_name or self.dba_password:
+ # authenticate() 在PyMongo3.6版本就已弃用,这一段可能需要后续跟进
+ client.admin.authenticate(
+ self.dba_name, self.dba_password, mechanism='SCRAM-SHA-1'
+ )
+
+ db = client['weibo']
+ collection = db[collection]
+ new_info_list = copy.deepcopy(info_list)
+ for info in new_info_list:
+ if not collection.find_one({'id': info['id']}):
+ collection.insert_one(info)
+ else:
+ collection.update_one({'id': info['id']}, {'$set': info})
+ except pymongo.errors.ServerSelectionTimeoutError:
+ logger.warning(
+ u'系统中可能没有安装或启动MongoDB数据库,请先根据系统环境安装或启动MongoDB,再运行程序')
+ sys.exit()
+
+ def write_weibo(self, weibos):
+ """将爬取的微博信息写入MongoDB数据库"""
+ weibo_list = []
+ for w in weibos:
+ w.user_id = self.user.id
+ weibo_list.append(w.__dict__)
+ self._info_to_mongodb('weibo', weibo_list)
+ logger.info(u'%d条微博写入MongoDB数据库完毕', len(weibos))
+
+ def write_user(self, user):
+ """将爬取的用户信息写入MongoDB数据库"""
+ self.user = user
+ user_list = [user.__dict__]
+ self._info_to_mongodb('user', user_list)
+ logger.info(u'%s信息写入MongoDB数据库完毕', user.nickname)
diff --git a/weibo_spider/writer/mysql_writer.py b/weibo_spider/writer/mysql_writer.py
new file mode 100644
index 00000000..7118c083
--- /dev/null
+++ b/weibo_spider/writer/mysql_writer.py
@@ -0,0 +1,142 @@
+import copy
+import logging
+import sys
+
+from .writer import Writer
+
+logger = logging.getLogger('spider.mysql_writer')
+
+
+class MySqlWriter(Writer):
+ def __init__(self, mysql_config):
+ self.mysql_config = mysql_config
+
+ # 创建'weibo'数据库
+ create_database = """CREATE DATABASE IF NOT EXISTS weibo DEFAULT
+ CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci"""
+ self._mysql_create_database(create_database)
+ self.mysql_config['db'] = 'weibo'
+
+ def _mysql_create(self, connection, sql):
+ """创建MySQL数据库或表"""
+ try:
+ with connection.cursor() as cursor:
+ cursor.execute(sql)
+ finally:
+ connection.close()
+
+ def _mysql_create_database(self, sql):
+ """创建MySQL数据库"""
+ try:
+ import pymysql
+ except ImportError:
+ logger.warning(
+ u'系统中可能没有安装pymysql库,请先运行 pip install pymysql ,再运行程序')
+ sys.exit()
+ try:
+ connection = pymysql.connect(**self.mysql_config)
+ self._mysql_create(connection, sql)
+ except pymysql.OperationalError:
+ logger.warning(u'系统中可能没有安装或正确配置MySQL数据库,请先根据系统环境安装或配置MySQL,再运行程序')
+ sys.exit()
+
+ def _mysql_create_table(self, sql):
+ """创建MySQL表"""
+ import pymysql
+ connection = pymysql.connect(**self.mysql_config)
+ self._mysql_create(connection, sql)
+
+ def _mysql_insert(self, table, data_list):
+ """向MySQL表插入或更新数据"""
+ import pymysql
+ if len(data_list) > 0:
+ # We use this to filter out unset values.
+ data_list = [{k: v
+ for k, v in data.items() if v is not None}
+ for data in data_list]
+
+ keys = ', '.join(data_list[0].keys())
+ values = ', '.join(['%s'] * len(data_list[0]))
+ connection = pymysql.connect(**self.mysql_config)
+ cursor = connection.cursor()
+ sql = """INSERT INTO {table}({keys}) VALUES ({values}) ON
+ DUPLICATE KEY UPDATE""".format(table=table,
+ keys=keys,
+ values=values)
+ update = ','.join([
+ ' {key} = values({key})'.format(key=key)
+ for key in data_list[0]
+ ])
+ sql += update
+ try:
+ cursor.executemany(
+ sql, [tuple(data.values()) for data in data_list])
+ connection.commit()
+ except Exception as e:
+ connection.rollback()
+ logger.exception(e)
+ finally:
+ connection.close()
+
+ def write_weibo(self, weibos):
+ """将爬取的微博信息写入MySQL数据库"""
+ # 创建'weibo'表
+ try:
+ create_table = """
+ CREATE TABLE IF NOT EXISTS weibo (
+ id varchar(10) NOT NULL,
+ user_id varchar(12),
+ content varchar(5000),
+ article_url varchar(200),
+ original_pictures varchar(3000),
+ retweet_pictures varchar(3000),
+ original BOOLEAN NOT NULL DEFAULT 1,
+ video_url varchar(300),
+ publish_place varchar(100),
+ publish_time DATETIME NOT NULL,
+ publish_tool varchar(30),
+ up_num INT NOT NULL,
+ retweet_num INT NOT NULL,
+ comment_num INT NOT NULL,
+ PRIMARY KEY (id)
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4"""
+ self._mysql_create_table(create_table)
+ # 在'weibo'表中插入或更新微博数据
+ weibo_list = []
+ info_list = copy.deepcopy(weibos)
+ for weibo in info_list:
+ weibo.user_id = self.user.id
+ weibo_list.append(weibo.__dict__)
+ self._mysql_insert('weibo', weibo_list)
+ logger.info(u'%d条微博写入MySQL数据库完毕', len(weibos))
+ except Exception as e:
+ logger.exception(e)
+
+ def write_user(self, user):
+ """将爬取的用户信息写入MySQL数据库"""
+ try:
+ self.user = user
+
+ # 创建'user'表
+ create_table = """
+ CREATE TABLE IF NOT EXISTS user (
+ id varchar(20) NOT NULL,
+ nickname varchar(30),
+ gender varchar(10),
+ location varchar(200),
+ birthday varchar(40),
+ description varchar(400),
+ verified_reason varchar(140),
+ talent varchar(200),
+ education varchar(200),
+ work varchar(200),
+ weibo_num INT,
+ following INT,
+ followers INT,
+ PRIMARY KEY (id)
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4"""
+ self._mysql_create_table(create_table)
+ self._mysql_insert('user', [user.__dict__])
+ logger.info(u'%s信息写入MySQL数据库完毕', user.nickname)
+ except Exception as e:
+ logger.exception(e)
diff --git a/weibo_spider/writer/post_writer.py b/weibo_spider/writer/post_writer.py
new file mode 100644
index 00000000..af536623
--- /dev/null
+++ b/weibo_spider/writer/post_writer.py
@@ -0,0 +1,59 @@
+import codecs
+import json
+import logging
+import os
+import requests
+
+from .writer import Writer
+from time import sleep
+from requests.exceptions import RequestException
+
+logger = logging.getLogger('spider.post_writer')
+
+class PostWriter(Writer):
+ def __init__(self, post_config):
+ self.post_config = post_config
+ self.api_url = post_config['api_url']
+ self.api_token = post_config.get('api_token', None)
+ self.dba_password = post_config.get('dba_password', None)
+
+ def write_user(self, user):
+ self.user = user
+
+ def _update_json_data(self, data, weibo_info):
+ """将获取到的微博数据转换为json输出模式一致"""
+ data['user'] = self.user.__dict__
+ if data.get('weibo'):
+ data['weibo'] += weibo_info
+ else:
+ data['weibo'] = weibo_info
+ return data
+
+ def send_post_request_with_token(self, url, data, token, max_retries, backoff_factor):
+ headers = {
+ 'Content-Type': 'application/json',
+ 'api-token': f'{token}',
+ }
+ for attempt in range(max_retries + 1):
+ try:
+ response = requests.post(url, json=data, headers=headers)
+ if response.status_code == requests.codes.ok:
+ return response.json()
+ else:
+ raise RequestException(f"Unexpected response status: {response.status_code}")
+ except RequestException as e:
+ if attempt < max_retries:
+ sleep(backoff_factor * (attempt + 1)) # 逐步增加等待时间,避免频繁重试
+ continue
+ else:
+ logger.error(f"在尝试{max_retries}次发出POST连接后,请求失败:{e}")
+
+ def write_weibo(self, weibos):
+ """将爬到的信息POST到API"""
+ data = {}
+ data = self._update_json_data(data, [w.__dict__ for w in weibos])
+ if data:
+ self.send_post_request_with_token(self.api_url, data, self.api_token, 3, 2)
+ logger.info(u'%d条微博通过POST发送到 %s', len(weibos), self.api_url)
+ else:
+ logger.info(u'没有获取到微博,略过API POST')
diff --git a/weibo_spider/writer/sqlite_writer.py b/weibo_spider/writer/sqlite_writer.py
new file mode 100644
index 00000000..cea0ccd9
--- /dev/null
+++ b/weibo_spider/writer/sqlite_writer.py
@@ -0,0 +1,108 @@
+import copy
+import logging
+import sys
+
+from .writer import Writer
+
+logger = logging.getLogger('spider.sqlite_writer')
+
+
+class SqliteWriter(Writer):
+ def __init__(self, sqlite_config):
+ self.sqlite_config = sqlite_config
+
+ def _sqlite_create(self, connection, sql):
+ """创建sqlite数据库或表"""
+ try:
+ cursor = connection.cursor()
+ cursor.execute(sql)
+ finally:
+ connection.close()
+
+ def _sqlite_create_table(self, sql):
+ """创建sqlite表"""
+ import sqlite3
+ connection = sqlite3.connect(self.sqlite_config)
+ self._sqlite_create(connection, sql)
+
+ def _sqlite_insert(self, table, data_list):
+ """向sqlite表插入或更新数据"""
+ import sqlite3
+ if len(data_list) > 0:
+ # We use this to filter out unset values.
+ data_list = [{k: v
+ for k, v in data.items() if v is not None}
+ for data in data_list]
+
+ keys = ', '.join(data_list[0].keys())
+ values = ', '.join(['?'] * len(data_list[0]))
+ connection = sqlite3.connect(self.sqlite_config)
+ cursor = connection.cursor()
+ sql = """INSERT OR REPLACE INTO {table}({keys}) VALUES ({values})""".format(
+ table=table, keys=keys, values=values)
+ try:
+ cursor.executemany(
+ sql, [tuple(data.values()) for data in data_list])
+ connection.commit()
+ except Exception as e:
+ connection.rollback()
+ logger.exception(e)
+ finally:
+ connection.close()
+
+ def write_weibo(self, weibos):
+ """将爬取的微博信息写入sqlite数据库"""
+ # 创建'weibo'表
+ create_table = """
+ CREATE TABLE IF NOT EXISTS weibo (
+ id varchar(10) NOT NULL,
+ user_id varchar(12),
+ content varchar(2000),
+ article_url varchar(200),
+ original_pictures varchar(3000),
+ retweet_pictures varchar(3000),
+ original BOOLEAN NOT NULL DEFAULT 1,
+ video_url varchar(300),
+ publish_place varchar(100),
+ publish_time DATETIME NOT NULL,
+ publish_tool varchar(30),
+ up_num INT NOT NULL,
+ retweet_num INT NOT NULL,
+ comment_num INT NOT NULL,
+ PRIMARY KEY (id)
+ )"""
+ self._sqlite_create_table(create_table)
+ # 在'weibo'表中插入或更新微博数据
+ weibo_list = []
+ info_list = copy.deepcopy(weibos)
+ for weibo in info_list:
+ weibo.user_id = self.user.id
+ weibo_list.append(weibo.__dict__)
+ self._sqlite_insert('weibo', weibo_list)
+ logger.info(u'%d条微博写入sqlite数据库完毕', len(weibos))
+
+ def write_user(self, user):
+ """将爬取的用户信息写入sqlite数据库"""
+ self.user = user
+
+ # 创建'user'表
+ create_table = """
+ CREATE TABLE IF NOT EXISTS user (
+ id varchar(20) NOT NULL,
+ nickname varchar(30),
+ gender varchar(10),
+ location varchar(200),
+ birthday varchar(40),
+ description varchar(400),
+ verified_reason varchar(140),
+ talent varchar(200),
+ education varchar(200),
+ work varchar(200),
+ weibo_num INT,
+ following INT,
+ followers INT,
+ PRIMARY KEY (id)
+ )"""
+ self._sqlite_create_table(create_table)
+ self._sqlite_insert('user', [user.__dict__])
+ logger.info(u'%s信息写入sqlite数据库完毕', user.nickname)
diff --git a/weibo_spider/writer/txt_writer.py b/weibo_spider/writer/txt_writer.py
new file mode 100644
index 00000000..6eddd862
--- /dev/null
+++ b/weibo_spider/writer/txt_writer.py
@@ -0,0 +1,57 @@
+import logging
+import sys
+
+from .writer import Writer
+
+logger = logging.getLogger('spider.txt_writer')
+
+
+class TxtWriter(Writer):
+ def __init__(self, file_path, filter):
+ self.file_path = file_path
+
+ self.user_header = u'用户信息'
+ self.user_desc = [('nickname', '用户昵称'), ('id', '用户id'),
+ ('weibo_num', '微博数'), ('following', '关注数'),
+ ('followers', '粉丝数')]
+
+ if filter:
+ self.weibo_header = u'原创微博内容'
+ else:
+ self.weibo_header = u'微博内容'
+ self.weibo_desc = [('publish_place', '微博位置'), ('publish_time', '发布时间'),
+ ('up_num', '点赞数'), ('retweet_num', '转发数'),
+ ('comment_num', '评论数'), ('publish_tool', '发布工具')]
+
+ def write_user(self, user):
+ self.user = user
+ user_info = '\n'.join(
+ [v + ':' + str(self.user.__dict__[k]) for k, v in self.user_desc])
+
+ with open(self.file_path, 'ab') as f:
+ f.write((self.user_header + ':\n' + user_info + '\n\n').encode(
+ sys.stdout.encoding))
+ logger.info(u'%s信息写入txt文件完毕,保存路径:%s', self.user.nickname,
+ self.file_path)
+
+ def write_weibo(self, weibo):
+ """将爬取的信息写入txt文件"""
+
+ weibo_header = ''
+ if self.weibo_header:
+ weibo_header = self.weibo_header + ':\n'
+ self.weibo_header = ''
+
+ try:
+ temp_result = []
+ for w in weibo:
+ temp_result.append(w.__dict__['content'] + '\n' + '\n'.join(
+ [v + ':' + str(w.__dict__[k])
+ for k, v in self.weibo_desc]))
+ result = '\n\n'.join(temp_result) + '\n\n'
+
+ with open(self.file_path, 'ab') as f:
+ f.write((weibo_header + result).encode(sys.stdout.encoding))
+ logger.info(u'%d条微博写入txt文件完毕,保存路径:%s', len(weibo), self.file_path)
+ except Exception as e:
+ logger.exception(e)
diff --git a/weibo_spider/writer/writer.py b/weibo_spider/writer/writer.py
new file mode 100644
index 00000000..45366510
--- /dev/null
+++ b/weibo_spider/writer/writer.py
@@ -0,0 +1,17 @@
+from abc import ABC, abstractmethod
+
+
+class Writer(ABC):
+ def __init__(self):
+ """根据需要,初始化结果路径、初始化表头、初始化数据库等"""
+ pass
+
+ @abstractmethod
+ def write_weibo(self, weibo):
+ """给定微博信息,写入对应文本或数据库"""
+ pass
+
+ @abstractmethod
+ def write_user(self, user):
+ """给定用户信息,写入对应文本或数据库"""
+ pass