关于python:Python爬取B站历史观看记录并用Bokeh做数据可视化

4次阅读

共计 4559 个字符,预计需要花费 12 分钟才能阅读完成。

import base64
from math import pi, sin, cos
from bokeh.util.browser import view
from bokeh.colors.named import (aquamarine, bisque, crimson, darkolivegreen, firebrick, gainsboro, hotpink, indigo, khaki,

                            mediumvioletred, olivedrab, orchid, paleturquoise, skyblue, seagreen, tomato, orchid, firebrick, lightgray)

from bokeh.document import Document
from bokeh.models.glyphs import Wedge, AnnularWedge, ImageURL, Text
from bokeh.models import ColumnDataSource, Plot, Range1d
from bokeh.resources import INLINE
from bokeh.sampledata.browsers import browsers_nov_2013, icons

数据

df_ = df.groupby(by=[‘category’, ‘author_name’]).agg(

count=('author_name', 'count'), 
duration_avg=('duration', 'mean'),
duration_sum=('duration', 'sum'),
author_face=('author_face', 'first')

)
df_ = df_.reset_index()
df_[‘duration_avg’] = df_[‘duration_avg’] / 60
df_[‘duration_sum’] = df_[‘duration_sum’] / 60
df_[‘duration_percentage’] = df_[‘duration_sum’] / df_[‘duration_sum’].sum() * 100
xdr = Range1d(start=-2, end=2)
ydr = Range1d(start=-2, end=2)

画布

plot = Plot(x_range=xdr, y_range=ydr, plot_width=800, plot_height=800)

自定义属性

plot.title.text = “Web browser market share (November 2013)”

plot.toolbar_location = None

调色

colors = {“ 动物圈 ”: aquamarine, “ 动画 ”: bisque, “ 国创 ”: crimson, “ 娱乐 ”: darkolivegreen, “ 影视 ”: firebrick, “ 时尚 ”: gainsboro,

      "汽车": hotpink,  "游戏": indigo, "生存": khaki, "常识": skyblue, "科技": seagreen, "纪录片": tomato, "美食": orchid,
      "舞蹈": paleturquoise, "静止": firebrick, "音乐": olivedrab, "鬼畜": mediumvioletred, "无": lightgray, "Other": lightgray}

数据预处理

aggregated = df_.groupby(“category”).agg(sum)
selected = aggregated[aggregated.duration_percentage >= 1].copy()
selected.loc[“Other”] = aggregated[aggregated.duration_percentage < 1].sum()
categorys = selected.index.tolist()
radians = lambda x: 2pi(x/100)
angles = selected.duration_percentage.map(radians).cumsum()
end_angles = angles.tolist()
start_angles = [0] + end_angles[:-1]
name_first = selected.index.tolist()
percentages = [(‘{:.2f}%’.format((y – x) / 6.2831852 * 100)) for x, y in zip(start_angles, end_angles)]
categorys_source = ColumnDataSource(dict(

start  = start_angles,
end    = end_angles,
colors = [colors[category] for category in categorys ],
name_first = name_first,
percentages = percentages,

))

绘图

glyph = Wedge(x=0, y=0, radius=1, line_color=”white”,

line_width=2, start_angle="start", end_angle="end", fill_color="colors")

glyph_renderer = plot.add_glyph(categorys_source, glyph)

增加 hover 工具

tooltips = f”@name_first: @percentages”
plot.add_tools(HoverTool(tooltips=tooltips, renderers=[glyph_renderer]))
def polar_to_cartesian(r, start_angles, end_angles):

cartesian = lambda r, alpha: (r*cos(alpha), r*sin(alpha))
points = []
for start, end in zip(start_angles, end_angles):
    points.append(cartesian(r, (end + start)/2))
return zip(*points)

first = True
for category, start_angle, end_angle in zip(categorys, start_angles, end_angles):

versions = df_[(df_.category == category) & (df_.duration_percentage >= 0.1)]
angles = versions.duration_percentage.map(radians).cumsum() + start_angle
end = angles.tolist() + [end_angle]
start = [start_angle] + end[:-1]
angle = end[-1] - start[0]
angle = angle if angle else 1
name_second = versions['author_name'].tolist() if not versions.empty else ['orthers']
if len(start) > len(name_second):
    name_second += ['orthers']
percentages = [(y - x) / angle for x, y in zip(start, end)]
max_percentage = max(percentages) if max(percentages) else 1
base_color = colors[category]
fill = [base_color.lighten((1 - i / max_percentage)*0.2).to_hex() for i in percentages]
percentages = [('{:.2f}%'.format((y - x) / 6.2831852 * 100)) for x, y in zip(start, end)]
# extra empty string accounts for all versions with share < 0.5 together
text = [number if share >= 1 else ""for number, share in zip(versions.author_name, versions.duration_percentage) ] + [""]
x, y = polar_to_cartesian(1.25, start, end)
source = ColumnDataSource(dict(start=start, end=end, fill=fill,
                              name_second=name_second, percentages=percentages))
glyph = AnnularWedge(x=0, y=0,
    inner_radius=1, outer_radius=1.5, start_angle="start", end_angle="end",
    line_color="white", line_width=2, fill_color="fill")
glyph_renderer = plot.add_glyph(source, glyph)
# 增加 hover 工具
tooltips = [Skrill 下载](https://www.gendan5.com/wallet/Skrill.html)f"@name_second: @percentages"
plot.add_tools(HoverTool(tooltips=tooltips, renderers=[glyph_renderer]))
text_angle = [(start[i]+end[i])/2 for i in range(len(start))]
text_angle = [angle + pi if pi/2 < angle < 3*pi/2 else angle for angle in text_angle]
text_source = ColumnDataSource(dict(text=text, x=x, y=y, angle=text_angle))
glyph = Text(x="x", y="y", text="text", angle="angle",
    text_align="center", text_baseline="middle", text_font_size="8pt")
plot.add_glyph(text_source, glyph)

text = [“%.02f%%” % value for value in selected.duration_percentage]
x, y = polar_to_cartesian(0.7, start_angles, end_angles)
text_source = ColumnDataSource(dict(text=text, x=x, y=y))
glyph = Text(x=”start”, y=”end”, text=”text”, text_align=”center”, text_baseline=”middle”)
plot.add_glyph(text_source, glyph)

显示

show(plot)

正文完
 0