原文链接:http://tecdat.cn/?p=5124
包含术语“生物信息学”的推文示例
第1步: 加载所需的软件包
# load packageslibrary(twitteR)library(igraph)library(stringr)
第2步: 收集关于“生物信息学”的推文
# tweets in english containing "bioinformatics"dm_tweets = searchTwitter("bioinformatics", n=500,)# get textdm_txt = sapply(dm_tweets, function(x) x$getText())
第3步:识别转发
# regular expressions to find retweetsgrep("(RT|via)((?:\\b\\W*@\\w+)+)", dm_tweets,ignore.case=TRUE, value=TRUE)# which tweets are retweetsrt_patterns = grep("(RT|via)((?:\\b\\W*@\\w+)+)",dm_txt, ignore.case=TRUE)# show retweets (these are the ones we want to focus on)dm_txt[rt_patterns]
第4步:收集谁转发和谁发布
我们将使用这些结果来形成边缘列表以创建图形
# create list to store user nameswho_retweet = as.list(1:length(rt_patterns))who_post = as.list(1:length(rt_patterns))# for loopfor (i in 1:length(rt_patterns)){# get tweet with retweet entitytwit = dm_tweets[[rt_patterns[i]]]# get retweet sourceposter = str_extract_all(twit$getText(),"(RT|via)((?:\\b\\W*@\\w+)+)")#remove ':'poster = gsub(":", "", unlist(poster))# name of retweeted userwho_post[[i]] = gsub("(RT @|via @)", "", poster, ignore.case=TRUE)# name of retweeting userwho_retweet[[i]] = rep(twit$getScreenName(), length(poster))}# unlistwho_post = unlist(who_post)who_retweet = unlist(who_retweet)
第5步: 从编辑清单创建图形
# two column matrix of edgesretweeter_poster = cbind(who_retweet, who_post)# generate graphrt_graph = graph.edgelist(retweeter_poster)# get vertex namesver_labs = get.vertex.attribute(rt_graph, "name", index=V(rt_graph))
第6步: 让我们绘制图
# choose some layoutglay = layout.fruchterman.reingold(rt_graph)# plotpar(bg="gray15", mar=c(1,1,1,1))plot(rt_graph, layout=glay,vertex.color="gray25",vertex.size=10,vertex.label=ver_labs,vertex.label.family="sans",vertex.shape="none",vertex.label.color=hsv(h=0, s=0, v=.95, alpha=0.5),vertex.label.cex=0.85,edge.arrow.size=0.8,edge.arrow.width=0.5,edge.width=3,edge.color=hsv(h=.95, s=1, v=.7, alpha=0.5))# add titletitle("\nTweets with 'bioinformatics': Who retweets whom",cex.main=1, col.main="gray95")
第7步:让我们试着给它一个更生物信息学的外观
# another plotpar(bg="gray15", mar=c(1,1,1,1))plot(rt_graph, layout=glay,vertex.color=hsv(h=.35, s=1, v=.7, alpha=0.1),vertex.frame.color=hsv(h=.35, s=1, v=.7, alpha=0.1),vertex.size=5,vertex.label=ver_labs,vertex.label.family="mono",vertex.label.color=hsv(h=0, s=0, v=.95, alpha=0.5),vertex.label.cex=0.85,edge.arrow.size=0.8,edge.arrow.width=0.5,edge.width=3,edge.color=hsv(h=.35, s=1, v=.7, alpha=0.4))# add titletitle("\nTweets with 'bioinformatics': Who retweets whom",cex.main=1, col.main="gray95", family="mono")