In [1]:
import sys; sys.path.append(_dh[0].split("knowknow")[0])
from knowknow import *
In [2]:
cntall = get_cnt("sociology-wos-all.doc", ['fy','fj'])
Loaded keys: dict_keys(['fy', 'fj'])
Available keys: ['c', 'c.c', 'c.fj', 'c.fy', 'fj', 'fj.fy', 'fy', 'ty', 'ty.ty']
In [17]:
len(cntall['fj'].values())
Out[17]:
312
In [58]:
print(sorted(set(chain.from_iterable(
    x.split('.')
    for x in ['c', 'c.c', 'c.fj', 'c.fy', 'c.fy.j', 'fa', 'fa.c', 'fa.fj', 'fa.fj.fy', 'fa.fy', 'fj', 'fj.fy', 'fj.ta', 'fj.ty', 'fy', 'fy.ta', 'fy.ty', 'ta', 'ty', 'ty.ty']
))))
['c', 'fa', 'fj', 'fy', 'j', 'ta', 'ty']
In [4]:
cits
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-5acc75617131> in <module>
----> 1 cits

NameError: name 'cits' is not defined
In [59]:
list(ysum.values())[0]
Out[59]:
{'total': 4,
 'maxcount': 2,
 'first': 2008,
 'maxcounty': 2011,
 'death_4': None,
 'death_8': None,
 'maxpropy': 2011,
 'death_9': None,
 'death_max': None,
 'death_7': None,
 'death_last': None,
 'totalprop': 0.0017142717933904491,
 'last': 2011,
 'death_1': None,
 'maxprop': 0.0007550018875047187,
 'death_3': None,
 'death_5': None,
 'death_2': None,
 'death_6': None,
 'name': 'mellor, d'}
In [54]:
print(" ".join("[%s]"%x for x in ['c', 'c.c', 'c.fj', 'c.fy', 'c.fy.j', 'fa', 'fa.c', 'fa.fj', 'fa.fj.fy', 'fa.fy', 'fj', 'fj.fy', 'fj.ta', 'fj.ty', 'fy', 'fy.ta', 'fy.ty', 'ta', 'ty', 'ty.ty']))
[c] [c.c] [c.fj] [c.fy] [c.fy.j] [fa] [fa.c] [fa.fj] [fa.fj.fy] [fa.fy] [fj] [fj.fy] [fj.ta] [fj.ty] [fy] [fy.ta] [fy.ty] [ta] [ty] [ty.ty]
In [3]:
cnti = get_cnt("sociology-wos.ind", ['fy','c.fy','c','c.fj','fj.fy','ta','fa', 'fa.fy','fj'])
cntd = get_cnt("sociology-wos.doc", ['fy','c.fy','c','c.fj','fj.fy','ta','fa', 'fa.fy','fj'])
Loaded keys: dict_keys(['fy', 'c.fy', 'c', 'c.fj', 'fj.fy', 'ta', 'fa', 'fa.fy', 'fj'])
Available keys: ['c', 'c.c', 'c.fj', 'c.fy', 'c.fy.j', 'fa', 'fa.c', 'fa.fj', 'fa.fj.fy', 'fa.fy', 'fj', 'fj.fy', 'fj.ta', 'fj.ty', 'fy', 'fy.ta', 'fy.ty', 'ta', 'ty', 'ty.ty']
Loaded keys: dict_keys(['fy', 'c.fy', 'c', 'c.fj', 'fj.fy', 'ta', 'fa', 'fa.fy', 'fj'])
Available keys: ['a', 'c', 'c.c', 'c.fj', 'c.fy', 'c.fy.j', 'fa', 'fa.c', 'fa.fj', 'fa.fj.fy', 'fa.fy', 'fj', 'fj.fy', 'fj.ta', 'fj.ty', 'fy', 'fy.ta', 'fy.ty', 'ta', 'ty', 'ty.ty']
In [76]:
sum(cntd['c'].values())
Out[76]:
1381075
In [60]:
ysum = load_variable("sociology-wos.c.ysum")
In [61]:
len(ysum)
Out[61]:
106628
In [65]:
len([x for x in ysum.values() if x['pub']<1980 and x['death_0'] is not None])/len([x for x in ysum.values() if x['pub']<1980])
Out[65]:
0.7054974553995862
In [68]:
len([x for x in ysum.values() if 1990<=x['pub']<2000 and x['death_3'] is not None])/len([x for x in ysum.values() if 1990<=x['pub']<2000])
Out[68]:
0.33960621761658033
In [59]:
for typ in ['c','ta','fa']:

    ysum = load_variable("sociology-wos.%s.ysum"%typ)

    cc = Counter([x['death_3'] for x in ysum.values() if x['death_3'] is not None and x['total']>5])
    years = range(1960,2010,1)

    plt.plot(
        years,
        [cc[y] for y in years],
        label=typ
    )
    
plt.legend()
Out[59]:
<matplotlib.legend.Legend at 0x2521150cdc8>
In [17]:
len(set([cross.fa for cross, count in cntd['fa.fy'].items() if 1970 <= cross.fy < 1975 and cross.fa in ysum]))
Out[17]:
511
In [38]:
s = [ len(set(
    cross.fa
    for cross, count in cntd['fa.fy'].items()
    if cntd['fa'][(cross.fa,)] == 1
    and Y <= cross.fy < Y+5
))  / len(set( cross.fa for cross, count in cntd['fa.fy'].items() if Y <= cross.fy < Y+5 ))
     for Y in range(1960,2015,5) ]
In [39]:
s
Out[39]:
[0.25925925925925924,
 0.2676136363636364,
 0.2884486232370719,
 0.29568497995755716,
 0.29310980103168754,
 0.2860098678975012,
 0.3038234891051117,
 0.3172647094357859,
 0.32481407772075,
 0.343436894838764,
 0.40822501120573734]
In [40]:
plt.plot(s)
Out[40]:
[<matplotlib.lines.Line2D at 0x2520f378488>]
In [29]:
37537/61038
Out[29]:
0.614977554965759
In [51]:
 
In [62]:
[x for x in ysum if 'bogue' in x]
Out[62]:
['bogue|skid row am cities',
 'bogue|structure metropolit',
 'bogue|study population',
 'bogue|state ec areas',
 'bogue|comp population urba',
 'bogue|components populatio',
 'bogue|comparative populati',
 'bogue|metropolitan decentr']
In [61]:
ysum['bogue|1959']
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-61-c5bb9cbc0833> in <module>
----> 1 ysum['bogue|1959']

KeyError: 'bogue|1959'

demo authors

In [19]:
ysum = load_variable("sociology-wos.fa.ysum")
In [32]:
list(ysum.values())[0]
Out[32]:
{'total': 4,
 'maxcount': 2,
 'first': 2008,
 'maxcounty': 2011,
 'death_4': None,
 'death_8': None,
 'maxpropy': 2011,
 'death_9': None,
 'death_max': None,
 'death_7': None,
 'death_last': None,
 'totalprop': 0.0017142717933904491,
 'last': 2011,
 'death_1': None,
 'maxprop': 0.0007550018875047187,
 'death_3': None,
 'death_5': None,
 'death_2': None,
 'death_6': None,
 'name': 'mellor, d'}
In [40]:
for x in sorted( ysum.values(), key=lambda x: -(x['death_1'] - x['first']) if x['death_1'] is not None else 0 )[:10]:
    #print(x['total'], x['name'], x['death_1']-x['first'], x['first'], x['death_1'], sum( cntd['fa.fy'][(x['name'], y)] for y in range(2000,2020)))
    print( "%s (%s years, %s publications)" % (
        " ".join(reversed(x['name'].split(","))).title(),
        x['death_1']-x['first'],
        x['total']
    ) )
 H Becker (74 years, 36 publications)
 N Demerath (59 years, 18 publications)
 N Babchuk (52 years, 21 publications)
 I Deutscher (50 years, 10 publications)
 C Coughenour (49 years, 17 publications)
 M Field (49 years, 5 publications)
 A Inkeles (49 years, 14 publications)
 W Firey (49 years, 10 publications)
 P Cutright (48 years, 24 publications)
 A Kerckhoff (48 years, 28 publications)
In [41]:
for x in sorted( ysum.values(), key=lambda x: -(x['death_6'] - x['first']) if x['death_4'] is not None else -(2010-x['first']) )[:10]:
    print(x['name'], x['first'], x['death_6'], sum( cntd['fa.fy'][(x['name'], y)] for y in range(2000,2020)), x['total'])
pearson, r 1926 None 1 3
white, r 1931 None 10 20
bernard, j 1933 None 4 18
smith, t 1936 2008 10 51
hughes, e 1936 None 2 10
davis, k 1937 None 10 25
taylor, c 1938 None 10 18
mills, c 1939 None 12 23
davis, a 1940 None 17 32
schneider, j 1940 None 8 18

demo works

In [3]:
ysum = load_variable("sociology-wos.c.ysum")
In [38]:
sum(cnti['fy'][(y,)] for y in range(1880,1950))
Out[38]:
2904
In [ ]:
cntd['fa'][()]
In [19]:
# what percent are dead1 from before 1980?
100 * len([1 for x in ysum.values() if 2000 > x['first'] >= 1990 and x['death_3'] is not None]) / \
    len([1 for x in ysum.values() if 2000 > x['first'] >= 1990])
Out[19]:
50.71226591260737
In [16]:
# what percent are dead1 from before 1980?
100 * len([1 for x in ysum.values() if x['first'] < 1980 and x['death_last'] is not None]) / \
    len([1 for x in ysum.values() if x['first'] < 1980])
Out[16]:
72.8737934802404
In [ ]:
# what percent are dead1 from before 1980?
100 * len([1 for x in ysum.values() if x['first'] < 1980 and x['death_1'] is not None]) / \
    len([1 for x in ysum.values() if x['first'] < 1980])
In [9]:
# what percent has at least 5 citations
100 * len([ 1 for _,count in cntd['c'].items() if count >= 5 ]) / len(cntd['c'])
Out[9]:
45.5711915794291
In [11]:
mx = max(cntd['c'], key=lambda x:cntd['c'][x])
print(mx, cntd['c'][mx])
c(c='bourdieu|distinctions social') 2178
In [4]:
[x for x in cntd['fa'] if 'blau' in x[0].lower()]
Out[4]:
[fa(fa='blau, j'),
 fa(fa='knoblauch, h'),
 fa(fa='blau, p'),
 fa(fa='blau, f'),
 fa(fa='blau, k'),
 fa(fa='blauner, r'),
 fa(fa='blau, z')]
In [9]:
cntd['c.fy'][("goffman|stigma management sp",2017)]
Out[9]:
47
In [6]:
Counter(dict(cntd['c'].items())).most_common(100)
Out[6]:
[(c(c='bourdieu|distinction a social'), 2178),
 (c(c='glaser|discover grounded th'), 1411),
 (c(c='blau|am occupational stru'), 1272),
 (c(c='berger|social construction'), 1215),
 (c(c='mead|mind self and societ'), 1213),
 (c(c='putnam|bowling alone collap'), 1169),
 (c(c='goffman|stigma management sp'), 1168),
 (c(c='parsons|social system legal'), 1137),
 (c(c='foucault|discipline punish bi'), 1134),
 (c(c='durkheim|elementary formrel'), 1129),
 (c(c='garfinkel|stud ethnomethodol'), 1107),
 (c(c='durkheim|division labor soc'), 1071),
 (c(c='granovetter|1973|am j sociol,v78,p1361'), 1029),
 (c(c='wilson|the truly disadvanta'), 1027),
 (c(c='weber|m weber gesammelte a'), 995),
 (c(c='weber|ec society'), 990),
 (c(c='giddens|modernity self ident'), 987),
 (c(c='bourdieu|outline theory pract'), 926),
 (c(c='coleman|fdn social theory'), 890),
 (c(c='becker|outsiders'), 883),
 (c(c='goffman|asylums essays socia'), 861),
 (c(c='allport|nat prejudice'), 833),
 (c(c='blumer|symb interact'), 817),
 (c(c='goffman|frame analysis'), 810),
 (c(c='giddens|constitution soc out'), 782),
 (c(c='hochschild|managed heart commer'), 745),
 (c(c='strauss|basics qualitative r'), 737),
 (c(c='coleman|1988|am j sociol,v94,ps94'), 729),
 (c(c='becker|treatise family'), 724),
 (c(c='becker|human capital'), 720),
 (c(c='simmel|sociology'), 717),
 (c(c='west|1987|gender soc,v1,p125'), 717),
 (c(c='bourdieu|reproduction'), 714),
 (c(c='massey|am apartheid segreta'), 713),
 (c(c='goffman|interactional ritual'), 713),
 (c(c='mills|sociol imagination'), 706),
 (c(c='hirschi|causes delinquency'), 704),
 (c(c='swidler|1986|am sociol rev,v51,p273'), 694),
 (c(c='braverman|labor monoply capita'), 688),
 (c(c='bourdieu|the logic of practic'), 683),
 (c(c='granovetter|1985|am j sociol,v91,p481'), 654),
 (c(c='homans|social behavior elem'), 644),
 (c(c='parsons|structure social act'), 634),
 (c(c='olsen|logic collective act'), 631),
 (c(c='berger|sacred canopy elemen'), 631),
 (c(c='esping-andersen|1990|3 worlds welfare cap'), 626),
 (c(c='blau|exchange power socia'), 623),
 (c(c='raudenbush|hlm6 hierarchical li'), 619),
 (c(c='dimaggio|1983|am sociol rev,v48,p147'), 618),
 (c(c='weber|theory social ec org'), 617),
 (c(c='willis|learning labour why'), 613),
 (c(c='gordon|assimilation am life'), 612),
 (c(c='beck|risk soc new modernt'), 601),
 (c(c='mcpherson|2001|annu rev sociol,v27,p415'), 593),
 (c(c='blalock|theory minority grou'), 580),
 (c(c='bourdieu|invitation reflectiv'), 559),
 (c(c='freidson|profession med'), 557),
 (c(c='putnam|making democracy wor'), 556),
 (c(c='bellah|habits heart'), 546),
 (c(c='foucault|power knowledge'), 545),
 (c(c='connell|masculinities'), 544),
 (c(c='foucault|1998|hist sexuality,v1'), 540),
 (c(c='durkheim|rules sociological m'), 534),
 (c(c='lareau|unequal childhoods r'), 533),
 (c(c='mcadams|political process de'), 528),
 (c(c='giddens|consequences modern'), 527),
 (c(c='bourdieu|1986|hdb theory res socio,p241'), 516),
 (c(c='blau|inequality heterogen'), 514),
 (c(c='connell|gender short intro'), 512),
 (c(c='gramsci|selection prison not'), 509),
 (c(c='tilly|mobilization revolut'), 509),
 (c(c='polanyi|the great transforma'), 504),
 (c(c='butler|gender trouble'), 501),
 (c(c='bowles|sch capitalist am ed'), 493),
 (c(c='portes|1998|annu rev sociol,v22,p1'), 488),
 (c(c='meyer|1977|am j sociol,v83,p340'), 487),
 (c(c='wallerstein|crises world system'), 485),
 (c(c='charmaz|constructed grounded'), 483),
 (c(c='mccarthy|1977|am j sociol,v82,p1212'), 474),
 (c(c='steensland|2000|soc forces,v79,p291'), 472),
 (c(c='hofstede|culture consequences'), 465),
 (c(c='gilligan|different voice'), 464),
 (c(c='radloff|1977|applied psychological measurement,v1,p385'), 460),
 (c(c='edwards|contested terrain'), 460),
 (c(c='burt|structural holes'), 452),
 (c(c='bollen|structural equation'), 452),
 (c(c='long|regression models ca'), 441),
 (c(c='erikson|constant flux study'), 441),
 (c(c='mills|power elite'), 439),
 (c(c='rogers|diffusion innovation'), 437),
 (c(c='bell|coming post ind soc'), 436),
 (c(c='foucault|m foucault structura'), 436),
 (c(c='festinger|theory cognitive dis'), 436),
 (c(c='snijders|multilevel anal'), 432),
 (c(c='wilson|when work disappears'), 432),
 (c(c='homans|human group'), 432),
 (c(c='myrdal|am dilemma negro pro'), 429),
 (c(c='snow|1986|am sociol rev,v51,p464'), 428),
 (c(c='joreskog|lisrel'), 425),
 (c(c='cohen|delinquent boys'), 418)]
In [41]:
print("num citations per year 1950+", [ cnti['fy'][(y,)] for y in range(1950,2020) ] )
num citations per year 1950+ [336, 513, 531, 539, 534, 555, 599, 596, 608, 751, 1049, 1302, 1618, 1788, 1889, 2122, 3068, 2495, 2111, 4486, 5278, 6020, 6229, 6955, 7394, 8580, 10221, 11622, 13660, 14024, 17294, 15751, 17405, 16907, 19065, 18453, 18903, 21179, 23315, 24740, 24134, 25704, 25770, 26698, 26832, 25176, 26356, 25892, 27329, 26223, 28397, 30243, 31507, 30630, 32199, 31250, 35317, 38471, 40575, 42491, 55494, 59247, 61022, 66011, 66734, 71773, 71027, 67302, 72678, 78048]
In [25]:
Counter(cntd['c'].values()).most_common(10)
Out[25]:
[(2, 34288),
 (3, 24960),
 (4, 18042),
 (1, 17857),
 (5, 13257),
 (6, 9899),
 (7, 7819),
 (8, 6220),
 (9, 4994),
 (10, 4161)]
In [17]:
len([x for x in cnti['c'] if cnti['c'][x] > 0])
Out[17]:
174810
In [19]:
len([x for x in cnti['ta'] if cnti['ta'][x] > 0])
Out[19]:
37370

citations, articles before 1950

In [10]:
cc = defaultdict(int)
cc2 = defaultdict(int)
for (j,y),count in cnti['fj.fy'].items():
    if y < 1950 and count > 0:
        cc[j] += count
        cc2[j] += cntd['fj.fy'][(j,y)]
In [11]:
cc
Out[11]:
defaultdict(int,
            {'american sociological review': 1129,
             'rural sociology': 241,
             'social forces': 765,
             'american journal of sociology': 769})
In [13]:
sum(cc2.values())
Out[13]:
1151

example sequence

In [66]:
name = 'kahl|measurement modernis'
In [70]:
 
Out[70]:
1969
In [71]:
print([cntd['c.fy'][(name,y)] for y in range(ysum[name]['first'], 2019)])
[3, 3, 4, 6, 13, 2, 2, 2, 2, 3, 4, 2, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
In [72]:
a = np.array([cntd['c.fy'][(name,y)] for y in range(ysum[name]['first'], 2019)])
In [74]:
np.max( np.argwhere( a > 0 ) ) + 1 + ysum[name]['first']
Out[74]:
1990
In [79]:
ysum[name]['death_0']
Out[79]:
1990
In [4]:
list(cnt['c'])[:5]
Out[4]:
[c(c='andrews|social indicators we'),
 c(c='andrews|1991|measures personality,v1,p61'),
 c(c='campbell|quality life percept'),
 c(c='costa|revised neo personal'),
 c(c='cummins|2002|universality subject,p7')]
In [16]:
min( y for (c,y), count in cnt['c.fy'].items() if c == name and count > 0 )
Out[16]:
1967
In [10]:
[x for x in cnt['c.fj'].items() if x[0][0] == 'macpherson|1962' and x[0]]
Out[10]:
[(c_fj(c='macpherson|1962', fj='american journal of economics and sociology'),
  3),
 (c_fj(c='macpherson|1962', fj='american journal of sociology'), 5),
 (c_fj(c='macpherson|1962', fj='media culture & society'), 1),
 (c_fj(c='macpherson|1962', fj='sociological quarterly'), 1),
 (c_fj(c='macpherson|1962', fj='journal of law and society'), 1),
 (c_fj(c='macpherson|1962', fj='comparative studies in society and history'),
  6),
 (c_fj(c='macpherson|1962', fj='law & society review'), 2),
 (c_fj(c='macpherson|1962', fj='rural sociology'), 2),
 (c_fj(c='macpherson|1962', fj='social science research'), 2),
 (c_fj(c='macpherson|1962', fj='british journal of sociology'), 7),
 (c_fj(c='macpherson|1962', fj='society'), 1),
 (c_fj(c='macpherson|1962', fj='international sociology'), 1),
 (c_fj(c='macpherson|1962', fj='teaching sociology'), 1),
 (c_fj(c='macpherson|1962', fj='american sociological review'), 6),
 (c_fj(c='macpherson|1962', fj='economy and society'), 1),
 (c_fj(c='macpherson|1962', fj='international journal of intercultural relations'),
  1),
 (c_fj(c='macpherson|1962', fj='politics & society'), 4),
 (c_fj(c='macpherson|1962', fj='sociology-the journal of the british sociological association'),
  3),
 (c_fj(c='macpherson|1962', fj='acta sociologica'), 3),
 (c_fj(c='macpherson|1962', fj='sociologia ruralis'), 2),
 (c_fj(c='macpherson|1962', fj='theory and society'), 4),
 (c_fj(c='macpherson|1962', fj='sociology of education'), 1),
 (c_fj(c='macpherson|1962', fj='social science quarterly'), 2),
 (c_fj(c='macpherson|1962', fj='canadian journal of sociology-cahiers canadiens de sociologie'),
  3),
 (c_fj(c='macpherson|1962', fj='human studies'), 1),
 (c_fj(c='macpherson|1962', fj='social forces'), 1),
 (c_fj(c='macpherson|1962', fj='social indicators research'), 1),
 (c_fj(c='macpherson|1962', fj='sociological inquiry'), 3),
 (c_fj(c='macpherson|1962', fj='review of religious research'), 1),
 (c_fj(c='macpherson|1962', fj='sociological review'), 1),
 (c_fj(c='macpherson|1962', fj='archives europeennes de sociologie'), 1),
 (c_fj(c='macpherson|1962', fj='social problems'), 1),
 (('macpherson|1962', 1967), 0)]
In [ ]: