抉择微信好友数据对其进行数据分析。比方可视化好友男女比例散布,可视化省份起源,可视化签名的情感强度值等等。Tips:github地址:https://github.com/Mlscoding/Pyhon-wechatDataAnalyse须要筹备微信好友数据文件,格局为csv。itchat连贯微信不可用,起因是网页版微信API被封了,itchat等基于webAPI的计划都生效了。能够应用 WechatPCAPI 实现,它是间接调用 PC 版微信客户端的,当然有一点不足之处就是须要应用指定版本的 Python 和指定版本的 PC 版微信客户端。我并没有应用这种办法,不过多赘述。须要装置的库在代码中能看到。如果seacorn库装置不上能够尝试镜像源:pip install seaborn -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com复制上述指令关上cmd运行即可。
1.剖析好友性别并可视化import csvfrom matplotlib import pyplot as pltplt.rcParams['font.sans-serif'] = ['SimHei']# 用来失常显示中文标签plt.rcParams['axes.unicode_minus'] = False# 1.读取csv文件,把性别信息读取进去def getSex(filename): lstsex = [] with open(filename, 'r') as fr: reader = csv.reader(fr) for i in reader: lstsex.append(i[4]) return lstsex # 2.性别pyechart可视化def VisualSexpyechart(lstsex): sex = dict() # 2.1提取好友性别信息,从1开始,因为第0个是本人 for f in lstsex[1:]: if f == '1': # 男 sex["man"] = sex.get("man", 0) + 1 elif f == '2': # 女 sex["women"] = sex.get("women", 0) + 1 else: # 未知 sex["unknown"] = sex.get("unknown", 0) + 1 # 在屏幕上打印进去 total = len(lstsex[1:]) # 2.2打印出本人的好友性别比例 plt.figure(figsize=(6, 9)) labels = [u'男性好友', u'女性好友', u'性别不明'] sizes = [sex['man'], sex['women'], sex['unknown']] colors = ['cornflowerblue', 'lightcoral', 'silver'] explode = (0.05, 0, 0) patches, l_text, p_text = plt.pie(sizes, explode=explode, labels=labels, colors=colors, labeldistance=1.1, autopct='%3.1f%%', shadow=False, startangle=90, pctdistance=0.6) for t in l_text: t.set_size(30) for t in p_text: t.set_size(20) plt.axis('equal') plt.legend() plt.show()# 3.执行主程序,失去所有好友性别VisualSexpyechart(getSex("friends_information.csv"))2.剖析好友省份起源并可视化import csvimport matplotlib.pyplot as pltfrom matplotlib.font_manager import FontProperties# 用于读取csv文件,获取好友所在省份信息def get_city_info(filename): lstcity = [] with open(filename, 'r') as fr: reader = csv.reader(fr) for i in reader: lstcity.append(i[3]) return lstcity# 用于绘制柱状图,并展现好友所在省份信息def plot_city_info(city_info): # 用于存储各省份的好友数量 province_counts = {} # 须要统计的省份列表 provinces = ['山东', '黑龙江', '澳门', '吉林', '江苏', '贵州', '上海', '湖北', '北京', '浙江', '河北', '辽宁'] for province in provinces: province_counts[province] = city_info.count(province) # 设置中文字体 font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=12) # 设置图表款式 plt.style.use('ggplot') # 绘制柱状图 fig, ax = plt.subplots(figsize=(10, 6)) x = range(len(provinces)) y = [province_counts[p] for p in provinces] color = ['lightsteelblue', 'cornflowerblue', 'cadetblue', 'mediumturquoise', 'paleturquoise', 'powderblue', 'skyblue', 'lightskyblue', 'lightblue', 'lavender', 'thistle', 'plum'] ax.bar(x, y, color=color, alpha=0.8) # 增加数据标签 for i, v in enumerate(y): ax.text(i, v + 0.1, str(v), ha='center', fontproperties=font) # 设置x轴的刻度标签 ax.set_xticks(x) ax.set_xticklabels(provinces, fontproperties=font) # 设置y轴的刻度范畴和标签 ax.set_ylim(0, max(y) * 1.2) ax.set_ylabel('好友数量', fontproperties=font) # 设置图表题目 ax.set_title('好友所在省份散布状况', fontproperties=font) # 显示网格线 ax.grid(axis='y', linestyle='--', alpha=0.6) # 暗藏上、右边框线 ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) # 显示图表 plt.show()# 主程序,用于执行上述两个函数if __name__ == '__main__': city_info = get_city_info('friends_information.csv') plot_city_info(city_info)3.剖析好友共性签名情感强度值并可视化from textblob import TextBlobimport seaborn as snsimport matplotlib.pyplot as pltsns.set(style='darkgrid', palette='deep')plt.rcParams['font.sans-serif'] = ['SimHei']plt.rcParams['axes.unicode_minus'] = Falsewith open('english.txt', 'r') as file: signatures = file.read()polarity_scores = []positive = 0negative = 0neutral = 0for signature in signatures.split('\n'): blob = TextBlob(signature) polarity_scores.append(blob.sentiment.polarity) if blob.sentiment.polarity > 0: positive += 1 elif blob.sentiment.polarity < 0: negative += 1 else: neutral += 1# 应用Seaborn库绘制直方图和KDE曲线fig, ax = plt.subplots(figsize=(10, 5))sns.histplot(polarity_scores, bins=20, color='#4A4A4A', kde=True, ax=ax)# 增加平均线和标签mean = sum(polarity_scores) / len(polarity_scores)ax.axvline(x=mean, color='r', linestyle='--', linewidth=2, label='Mean')ax.text(mean+0.01, max(ax.get_ylim())/2, f"Mean: {mean:.2f}", fontsize=12, color='r')plt.xlabel('Polarity score')plt.ylabel('Frequency')plt.title('Sentiment Polarity Distribution')plt.legend()plt.show()4.成果展现剖析好友性别
...