import pandas as pd
import graph_tool as gt
import graph_tool.draw
import graph_tool.community
import itertools
import collections
import matplotlib
import math
df = pd.read_csv('/home/aahu/Downloads/evolution/evolution/products_vendors.tsv',sep='\t')
#discard meta-categories"
meta_cats = ['Other','Drugs','Guides & Tutorials','Fraud Related',
'Services','Digital Goods','Electronics', 'Custom Listings']
df = df[df['category'].map(lambda x:x not in meta_cats)]
#df['count'] = df.groupby(['vendor','category']).transform('count').index
#build graph-tool ids
node_lbs = {}
rev_node_lbs = {}
for idx,vendor in enumerate(df['category'].drop_duplicates()):
node_lbs[vendor] = idx
rev_node_lbs[idx] = vendor
df['id'] = df['category'].map(lambda x:node_lbs[x])
edge_list = []
dfg = df.groupby('vendor')
for name,group in dfg:
ei = itertools.combinations(group['id'].drop_duplicates(),2)
for e in ei:
edge_list.append(tuple(sorted(e)))
#filter edges by num shared vendors
MIN_SHARED_VENDORS=1
c = collections.Counter(edge_list)
edge_list = [e for e in c if c[e]>=MIN_SHARED_VENDORS]
#build graph
g = gt.Graph(directed=False)
g.add_edge_list(edge_list)
g.vertex_properties['label'] = g.new_vertex_property('string')
for v in g.vertices():
g.vertex_properties['label'][v] = rev_node_lbs[g.vertex_index[v]]
print('g vert/edges: ',g.num_vertices(), g.num_edges())
#add edge weight property
g.edge_properties['weight'] = g.new_edge_property('double')
g.edge_properties['color'] = g.new_edge_property('vector<double>')
for e in g.edges():
w = c[tuple(sorted([e.source(),e.target()]))]
g.edge_properties['weight'][e] = w
alpha = (float(w)/max(c.values())) + .025
g.edge_properties['color'][e] = [103/255.0,134/255.0,239/255.0,alpha]
state = gt.community.minimize_nested_blockmodel_dl(g,deg_corr=False,
eweight=g.ep['weight'])
bstack = state.get_bstack()
t = gt.community.get_hierarchy_tree(bstack)[0]
tpos = pos = gt.draw.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
cts = gt.draw.get_hierarchy_control_points(g, t, tpos,beta=.87)
pos = g.own_property(tpos)
b = bstack[0].vp["b"]
#text rotation
text_rot = g.new_vertex_property('double')
g.vertex_properties['text_rot'] = text_rot
for v in g.vertices():
if pos[v][0] >0:
text_rot[v] = math.atan(pos[v][1]/pos[v][0])
else:
text_rot[v] = math.pi + math.atan(pos[v][1]/pos[v][0])
print('saving to disk...')
gt.draw.graph_draw(g, pos=pos, vertex_fill_color=b,
edge_control_points=cts,
vertex_size=20,
vertex_text=g.vertex_properties['label'],
vertex_text_rotation=g.vertex_properties['text_rot'],
vertex_text_position=1,
vertex_font_size=20,
vertex_font_family='mono',
vertex_anchor=0,
vertex_color=b,
vcmap=matplotlib.cm.Spectral,
ecmap=matplotlib.cm.Spectral,
edge_color=g.edge_properties['color'],
bg_color=[0,0,0,1],
output_size=[1024*2,1024*2],
output='/home/aahu/Desktop/evo_nvends={0}.png'.format(MIN_SHARED_VENDORS))