NLTK Programming Statistics
Some of the statistical programming commands are as follows:
>>> f = [w for w in text1 if len(w) > 14 and w.startswith ('a')] // words with length > 14 and startswith 'a'//
>>> print(f)
['apprehensiveness', 'authoritatively', 'apprehensiveness', 'archiepiscopacy', 'amphitheatrical', 'apprehensiveness', 'apprehensiveness']
>>> set(f)
{'apprehensiveness', 'amphitheatrical', 'archiepiscopacy', 'authoritatively'}
>>> f.most_common(5)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'list' object has no attribute 'most_common'
>>> f = [w for w in text3 if len(w) > 12 and w.endswith ('ed')] // words with length > 12 and ends with 'a'//
>>> set(f)
{'uncircumcised'}
>>> f = [w for w in text3 if len(w) > 12 and w.istitle]
>>> set(f)
{'plenteousness', 'womenservants', 'uncircumcised', 'EleloheIsrael', 'Zaphnathpaaneah', 'righteousness', 'interpretations', 'sheepshearers', 'interpretation', 'threshingfloor', 'Jegarsahadutha'}
>>> f = [w for w in text3 if 15 >= len(w) > 12 and w.islower()]
>>> set(f)
{'plenteousness', 'womenservants', 'uncircumcised', 'righteousness', 'interpretations', 'sheepshearers', 'interpretation', 'threshingfloor'}
>>> f= FreqDist(len(w) for w in text2 if len(w) > 14)
>>> print(f)
<FreqDist with 3 samples and 29 outcomes>
>>> set(f)
{16, 17, 15}
>>> f.plot()
>>> f.most_common(20)
[(15, 24), (17, 3), (16, 2)]
>>> f= FreqDist(w for w in text2 if len(w) > 14)
>>> print(f)
<FreqDist with 20 samples and 29 outcomes>
>>> set(f)
{'disinterestedness', 'conscientiously', 'disrespectfully', 'congratulations', 'misconstruction', 'representations', 'proportionately', 'companionableness', 'enfranchisement', 'unobtrusiveness', 'inquisitiveness', 'dissatisfaction', 'inconsiderately', 'disappointments', 'acknowledgments', 'disqualifications', 'instantaneously', 'connoisseurship', 'misapprehension', 'incomprehensible'}
>>> f.most_common(20) // most common top 20 words in terms of their repititions//
[('misapprehension', 4), ('disappointments', 3), ('acknowledgments', 3), ('incomprehensible', 2), ('congratulations', 2), ('conscientiously', 1), ('representations', 1), ('inconsiderately', 1), ('instantaneously', 1), ('inquisitiveness', 1), ('dissatisfaction', 1), ('disinterestedness', 1), ('companionableness', 1), ('proportionately', 1), ('disqualifications', 1), ('connoisseurship', 1), ('misconstruction', 1), ('enfranchisement', 1), ('disrespectfully', 1), ('unobtrusiveness', 1)]
>>> f.max() // words with highest word size//
'misapprehension'
>>> f.tabulate() // tabulate words in terms of their repetitions in text//
misapprehension disappointments acknowledgments incomprehensible congratulations conscientiously representations inconsiderately instantaneously inquisitiveness dissatisfaction disinterestedness companionableness proportionately disqualifications connoisseurship misconstruction enfranchisement disrespectfully unobtrusiveness
4 3 3 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
>>> f.plot()
>>> f.plot(cumulative=True)
Some of the statistical programming commands are as follows:
>>> f = [w for w in text1 if len(w) > 14 and w.startswith ('a')] // words with length > 14 and startswith 'a'//
>>> print(f)
['apprehensiveness', 'authoritatively', 'apprehensiveness', 'archiepiscopacy', 'amphitheatrical', 'apprehensiveness', 'apprehensiveness']
>>> set(f)
{'apprehensiveness', 'amphitheatrical', 'archiepiscopacy', 'authoritatively'}
>>> f.most_common(5)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'list' object has no attribute 'most_common'
>>> f = [w for w in text3 if len(w) > 12 and w.endswith ('ed')] // words with length > 12 and ends with 'a'//
>>> set(f)
{'uncircumcised'}
>>> f = [w for w in text3 if len(w) > 12 and w.istitle]
>>> set(f)
{'plenteousness', 'womenservants', 'uncircumcised', 'EleloheIsrael', 'Zaphnathpaaneah', 'righteousness', 'interpretations', 'sheepshearers', 'interpretation', 'threshingfloor', 'Jegarsahadutha'}
>>> f = [w for w in text3 if 15 >= len(w) > 12 and w.islower()]
>>> set(f)
{'plenteousness', 'womenservants', 'uncircumcised', 'righteousness', 'interpretations', 'sheepshearers', 'interpretation', 'threshingfloor'}
>>> f= FreqDist(len(w) for w in text2 if len(w) > 14)
>>> print(f)
<FreqDist with 3 samples and 29 outcomes>
>>> set(f)
{16, 17, 15}
>>> f.plot()
Following plot is depicting counts of words with particular size.
[(15, 24), (17, 3), (16, 2)]
>>> f= FreqDist(w for w in text2 if len(w) > 14)
>>> print(f)
<FreqDist with 20 samples and 29 outcomes>
>>> set(f)
{'disinterestedness', 'conscientiously', 'disrespectfully', 'congratulations', 'misconstruction', 'representations', 'proportionately', 'companionableness', 'enfranchisement', 'unobtrusiveness', 'inquisitiveness', 'dissatisfaction', 'inconsiderately', 'disappointments', 'acknowledgments', 'disqualifications', 'instantaneously', 'connoisseurship', 'misapprehension', 'incomprehensible'}
>>> f.most_common(20) // most common top 20 words in terms of their repititions//
[('misapprehension', 4), ('disappointments', 3), ('acknowledgments', 3), ('incomprehensible', 2), ('congratulations', 2), ('conscientiously', 1), ('representations', 1), ('inconsiderately', 1), ('instantaneously', 1), ('inquisitiveness', 1), ('dissatisfaction', 1), ('disinterestedness', 1), ('companionableness', 1), ('proportionately', 1), ('disqualifications', 1), ('connoisseurship', 1), ('misconstruction', 1), ('enfranchisement', 1), ('disrespectfully', 1), ('unobtrusiveness', 1)]
>>> f.max() // words with highest word size//
'misapprehension'
>>> f.tabulate() // tabulate words in terms of their repetitions in text//
misapprehension disappointments acknowledgments incomprehensible congratulations conscientiously representations inconsiderately instantaneously inquisitiveness dissatisfaction disinterestedness companionableness proportionately disqualifications connoisseurship misconstruction enfranchisement disrespectfully unobtrusiveness
4 3 3 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
>>> f.plot()
>>> f.plot(cumulative=True)
No comments:
Post a Comment