Wednesday 6 March 2019

NLTK Programming-Statistics (Part 1)

                       Statistics in NLTK programming using distinct commands

Here, we will use few commands to do statistical operations on text. Some examples are as follows:

>>> f = FreqDist(text2)  //Frequency Distribution of the text2//
>>> print(f)
<FreqDist with 6833 samples and 141576 outcomes>

>>> f.most_common(100)  //Most common words as per Frequency Distribution//
[(',', 9397), ('to', 4063), ('.', 3975), ('the', 3861), ('of', 3565), ('and', 3350), ('her', 2436), ('a', 2043), ('I', 2004), ('in', 1904), ('was', 1846), ('it', 1568), ('"', 1506), (';', 1419), ('she', 1333), ('be', 1305), ('that', 1297), ('for', 1234), ('not', 1212), ('as', 1179), ('you', 1037), ('with', 971), ('had', 969), ('his', 941), ('he', 895), ("'", 883), ('have', 807), ('at', 806), ('by', 737), ('is', 728), ('."', 721), ('s', 700), ('Elinor', 684), ('on', 676), ('all', 642), ('him', 633), ('so', 617), ('but', 597), ('which', 592), ('could', 568), ('Marianne', 566), ('my', 551), ('Mrs', 530), ('from', 527), ('would', 507), ('very', 492), ('no', 488), ('their', 463), ('them', 462), ('--', 461), ('been', 440), ('were', 437), ('me', 433), ('they', 428), ('more', 406), ('said', 397), (',"', 396), ('any', 389), ('what', 375), ('this', 372), ('-', 366), ('every', 361), ('than', 360), ('will', 354), ('or', 353), ('your', 347), ('an', 346), ('such', 340), ('one', 304), ('do', 296), ('But', 289), ('!', 289), ('much', 287), ('sister', 282), ('only', 282), ('must', 279), ('own', 271), ('am', 270), ('Edward', 262), ('when', 261), ('who', 260), ('mother', 258), ('She', 258), ('herself', 255), ('Dashwood', 252), ('if', 249), ('The', 243), ('time', 237), ('know', 230), ('Jennings', 230), ('should', 228), ('are', 224), ('might', 215), ('Willoughby', 215), ('?"', 213), ('did', 211), ('now', 210), ('there', 209), ('think', 209), ('Miss', 208)]

>>> f['could']  //Count of word 'could' in text2// 
568
>>> f['to']  //Count of word 'to' in text2// 
4063

>>> V = [w for w in set(text2) if len(w) > 12]  //finding list of words with letter sizes >12//
>>> sorted(V)
['Disappointment', 'INconvenience', 'Prescriptions', 'Somersetshire', 'Unaccountable', 'accommodating', 'accommodation', 'accommodations', 'accomplishment', 'acknowledging', 'acknowledgment', 'acknowledgments', 'acquaintances', 'administering', 'affectionately', 'aggrandizement', 'anticipations', 'apprehensions', 'circumspection', 'circumstanced', 'circumstances', 'commiseration', 'communicating', 'communication', 'communicative', 'companionableness', 'comparatively', 'compassionate', 'comprehension', 'congratulated', 'congratulating', 'congratulations', 'connoisseurship', 'conscientious', 'conscientiously', 'consciousness', 'considerately', 'consideration', 'considerations', 'consternation', 'constitutional', 'contemptuously', 'contradictory', 'conversations', 'correspondence', 'demonstrations', 'determination', 'disagreements', 'disappointing', 'disappointment', 'disappointments', 'disapprobation', 'discrimination', 'disengagement', 'disinclination', 'disinterested', 'disinterestedness', 'disproportion', 'disqualifications', 'disrespectfully', 'dissatisfaction', 'distinguished', 'distinguishing', 'embarrassment', 'embellishment', 'embellishments', 'encouragement', 'encouragements', 'encroachments', 'enfranchisement', 'entertainment', 'establishment', 'expeditiously', 'expensiveness', 'extraordinary', 'felicitations', 'gentlemanlike', 'gratification', 'imperfections', 'impertinently', 'impossibility', 'impoverishing', 'impracticable', 'incautiousness', 'incomprehensible', 'inconsiderable', 'inconsiderately', 'inconsistency', 'inconvenience', 'inconveniences', 'indefatigable', 'independently', 'indispensable', 'indispensably', 'indisposition', 'inexperienced', 'inquisitiveness', 'insensibility', 'insignificance', 'instantaneous', 'instantaneously', 'insufficiency', 'insurmountable', 'intentionally', 'intrinsically', 'investigation', 'involuntarily', 'irreconcilable', 'irrepressible', 'irreproachable', 'justification', 'misapprehension', 'misconstruction', 'misunderstood', 'mortification', 'neighbourhood', 'opportunities', 'overspreading', 'particularity', 'philanthropic', 'prepossessing', 'prepossession', 'probabilities', 'proportionately', 'protestations', 'qualification', 'qualifications', 'reasonableness', 'recommendation', 'reconciliation', 'representation', 'representations', 'reproachfully', 'respectability', 'resuscitation', 'significantly', 'solicitations', 'strengthening', 'superannuated', 'thoughtfulness', 'unaccountable', 'unaccountably', 'unacknowledged', 'uncomfortable', 'unconquerable', 'understanding', 'unembarrassed', 'unexhilarating', 'unfashionable', 'unfortunately', 'unintelligible', 'unintentional', 'uninteresting', 'unjustifiable', 'unobtrusiveness', 'unpleasantest', 'unpleasantness', 'unpremeditated', 'unsuitableness', 'unwillingness']

>>> V = [w for w in set(text2) if len(w) > 15] //finding list of words with letter sizes >15//
>>> sorted(V)
['companionableness', 'disinterestedness', 'disqualifications', 'incomprehensible']

>>> V = [w for w in set(text2) if len(w) > 14] //finding list of words with letter sizes >14//
>>> sorted(V)
['acknowledgments', 'companionableness', 'congratulations', 'connoisseurship', 'conscientiously', 'disappointments', 'disinterestedness', 'disqualifications', 'disrespectfully', 'dissatisfaction', 'enfranchisement', 'incomprehensible', 'inconsiderately', 'inquisitiveness', 'instantaneously', 'misapprehension', 'misconstruction', 'proportionately', 'representations', 'unobtrusiveness']

>>> V = [w for w in set(text3) if len(w) > 14]
>>> sorted(V)
['Zaphnathpaaneah', 'interpretations']

>>> V = [w for w in set(text3) if len(w) < 2]  //finding list of words with letter sizes <2//
>>> sorted(V)
['!', "'", '(', ')', ',', '.', ':', ';', '?', 'A', 'G', 'I', 'O', 'a', 'd', 'e', 'h', 'm', 'n', 'o', 's', 'w', 'y']

>>> V = [w for w in set(text3) if len(w) <= 2] 
>>> sorted(V)
['!', "'", '(', ')', ',', ',)', '.', '.)', ':', ';', ';)', '?', '?)', 'A', 'Am', 'An', 'As', 'At', 'Be', 'By', 'Do', 'En', 'Er', 'Es', 'G', 'Go', 'He', 'I', 'If', 'In', 'Is', 'It', 'LO', 'Lo', 'Me', 'My', 'O', 'Of', 'Oh', 'On', 'Se', 'So', 'To', 'Up', 'Ur', 'Uz', 'We', 'Ye', 'Zo', 'a', 'al', 'am', 'an', 'as', 'at', 'aw', 'be', 'by', 'co', 'd', 'da', 'do', 'e', 'ea', 'ev', 'ey', 'fa', 'fo', 'go', 'gr', 'h', 'ha', 'he', 'if', 'in', 'ir', 'is', 'it', 'ki', 'kn', 'la', 'li', 'lo', 'm', 'ma', 'me', 'mi', 'my', 'n', 'na', 'ne', 'no', 'o', 'oa', 'of', 'on', 'or', 'ou', 'ri', 's', 'sa', 'sh', 'si', 'so', 'th', 'to', 'tr', 'up', 'us', 'w', 'wa', 'we', 'wi', 'wo', 'y', 'ye']

>>> V = [w for w in set(text3) if (2 < len(w) < 4)] //finding list of words with letter sizes in range of 2 & 4//
>>> sorted(V)
['Abr', 'All', 'And', 'Ard', 'Are', 'Art', 'Ask', 'Bow', 'But', 'Buz', 'Can', 'Dan', 'Day', 'Din', 'Egy', 'Ehi', 'Eno', 'Eri', 'Eve', 'For', 'Gad', 'Get', 'God', 'Hai', 'Ham', 'His', 'How', 'Hul', 'Huz', 'Isa', 'Jac', 'Job', 'Kor', 'Lay', 'Let', 'Lie', 'Lot', 'Lud', 'Luz', 'Mam', 'Man', 'Nay', 'Nod', 'Not', 'Now', 'Our', 'Out', 'Pau', 'Put', 'Reu', 'Say', 'See', 'Set', 'She', 'Sod', 'The', 'Thy', 'Two', 'Who', 'Why', 'Yea', 'Yet', 'Zar', 'add', 'aga', 'age', 'air', 'all', 'alo', 'and', 'any', 'are', 'ark', 'art', 'ash', 'ask', 'ass', 'bad', 'bak', 'bed', 'bou', 'bow', 'bre', 'but', 'buy', 'can', 'chi', 'clo', 'cru', 'cry', 'cup', 'cut', 'day', 'dea', 'dew', 'did', 'die', 'dim', 'doe', 'dry', 'dwe', 'ear', 'eat', 'end', 'ewe', 'fai', 'far', 'fat', 'fed', 'few', 'fie', 'fig', 'fir', 'fle', 'flo', 'fly', 'for', 'fou', 'fro', 'gat', 'get', 'goa', 'got', 'gre', 'gro', 'had', 'han', 'her', 'hid', 'hil', 'him', 'his', 'hor', 'hou', 'how', 'ill', 'inn', 'jud', 'kid', 'lad', 'lan', 'law', 'lay', 'led', 'let', 'lie', 'man', 'may', 'men', 'met', 'mou', 'nig', 'nor', 'not', 'now', 'oak', 'off', 'oil', 'old', 'one', 'oth', 'our', 'out', 'own', 'pea', 'pit', 'pla', 'pow', 'put', 'ram', 'ran', 'red', 'rib', 'rid', 'riv', 'rul', 'run', 'sac', 'sad', 'sat', 'saw', 'say', 'sea', 'see', 'set', 'she', 'sin', 'sir', 'sit', 'six', 'sle', 'sod', 'son', 'sou', 'sow', 'spe', 'spi', 'ste', 'sto', 'sun', 'tak', 'tar', 'ten', 'the', 'thi', 'thy', 'tim', 'too', 'top', 'tru', 'two', 'voi', 'vow', 'war', 'was', 'wat', 'way', 'who', 'why', 'wit', 'wiv', 'wor', 'wot', 'yea', 'yet', 'you']

>>> V = [w for w in set(text3) if len(w) == 6] //finding list of words with letter sizes =6//
>>> sorted(V)
['Abidah', 'Achbor', 'Adbeel', 'Amalek', 'Anamim', 'Ararat', 'Arioch', 'Arkite', 'Ashbel', 'Asshur', 'Becher', 'Behold', 'Beriah', 'Bethel', 'Beware', 'Bilhah', 'Bilhan', 'Birsha', 'Bozrah', 'Cainan', 'Calneh', 'Canaan', 'Cheran', 'Chesed', 'Chezib', 'Cursed', 'Diklah', 'Dishan', 'Dishon', 'Dothan', 'Eldaah', 'Ephron', 'Escape', 'Eshban', 'Eshcol', 'Except', 'Fulfil', 'Galeed', 'Gather', 'Gether', 'Gilead', 'Goshen', 'Hanoch', 'Heaven', 'Hebrew', 'Hebron', 'Hemdan', 'Hereby', 'Hezron', 'Hinder', 'Hivite', 'Horite', 'Huppim', 'Husham', 'Hushim', 'Ishbak', 'Ishuah', 'Israel', 'Ithran', 'Jaalam', 'Jabbok', 'Jachin', 'Jemuel', 'Jimnah', 'Joktan', 'Jordan', 'Joseph', 'Judith', 'Kadesh', 'Kemuel', 'Kittim', 'Kohath', 'Lamech', 'Machir', 'Manass', 'Matred', 'Merari', 'Mibsam', 'Mibzar', 'Midian', 'Milcah', 'Mishma', 'Mizpah', 'Moriah', 'Muppim', 'Naamah', 'Naaman', 'Nahath', 'Nimrod', 'Peniel', 'Penuel', 'Phallu', 'Pharez', 'Phuvah', 'Raamah', 'Rachel', 'Remain', 'Return', 'Reuben', 'Reumah', 'Sabtah', 'Samlah', 'Seeing', 'Sephar', 'Shalem', 'Shaveh', 'Shebah', 'Shelah', 'Shepho', 'Shiloh', 'Shinab', 'Shinar', 'Shobal', 'Should', 'Sichem', 'Siddim', 'Simeon', 'Sinite', 'Sitnah', 'Spirit', 'Surely', 'Syrian', 'Temani', 'Thirty', 'Thorns', 'Timnah', 'Twelve', 'Whence', 'Zaavan', 'Zeboim', 'Zibeon', 'Zillah', 'Zilpah', 'Zimran', 'Zuzims', 'abated', 'abroad', 'absent', 'accept', 'afraid', 'aileth', 'always', 'angels', 'answer', 'appear', 'aprons', 'archer', 'asketh', 'awaked', 'badest', 'bakers', 'barren', 'basket', 'battle', 'beasts', 'became', 'become', 'befall', 'befell', 'before', 'beheld', 'behind', 'behold', 'belong', 'beside', 'better', 'beyond', 'biteth', 'bitter', 'blessi', 'bodies', 'boldly', 'booths', 'border', 'bottle', 'bought', 'bowels', 'bowing', 'breach', 'breath', 'broken', 'bruise', 'budded', 'bundle', 'buried', 'butler', 'butter', 'called', 'camels', 'camest', 'cannot', 'cattle', 'caught', 'caused', 'ceased', 'change', 'charge', 'childr', 'choice', 'cities', 'cleave', 'closed', 'coffin', 'comest', 'cometh', 'coming', 'crieth', 'cubits', 'cursed', 'custom', 'damsel', 'darkne', 'daught', 'dearth', 'denied', 'depart', 'desire', 'devour', 'digged', 'dipped', 'direct', 'divide', 'divine', 'double', 'dreams', 'driven', 'droves', 'earing', 'eatest', 'eighty', 'either', 'elders', 'eldest', 'eleven', 'embalm', 'endued', 'endure', 'enmity', 'enough', 'envied', 'errand', 'escape', 'espied', 'except', 'failed', 'fallen', 'famine', 'father', 'faults', 'favour', 'feared', 'feeble', 'fellow', 'female', 'fetcht', 'fierce', 'filled', 'finish', 'fishes', 'flocks', 'follow', 'forbid', 'forgat', 'forget', 'formed', 'former', 'fourth', 'freely', 'friend', 'fruits', 'garden', 'garmen', 'gather', 'gavest', 'giants', 'giveth', 'giving', 'golden', 'goodly', 'gopher', 'gotten', 'grapes', 'ground', 'guilty', 'halted', 'handle', 'hanged', 'hardly', 'harlot', 'hasted', 'having', 'healed', 'health', 'hearth', 'hearts', 'heaven', 'heifer', 'height', 'herein', 'hither', 'hollow', 'honour', 'horror', 'horses', 'hunter', 'images', 'indeed', 'itself', 'jewels', 'joined', 'judged', 'keeper', 'killed', 'kindly', 'kissed', 'labour', 'ladder', 'leaped', 'leaves', 'length', 'lesser', 'lifted', 'lights', 'likene', 'little', 'liveth', 'living', 'lodged', 'looked', 'lovest', 'loveth', 'maiden', 'manner', 'master', 'matter', 'mayest', 'meadow', 'messes', 'mighty', 'mocked', 'months', 'morrow', 'morsel', 'morter', 'mother', 'mouths', 'moveth', 'moving', 'myself', 'nation', 'nights', 'ninety', 'nought', 'number', 'obeyed', 'obtain', 'offeri', 'office', 'opened', 'openly', 'parcel', 'parted', 'passed', 'people', 'perish', 'person', 'pieces', 'pigeon', 'pillar', 'pilled', 'placed', 'places', 'plains', 'played', 'pledge', 'plenty', 'pluckt', 'poplar', 'poured', 'praise', 'prayed', 'priest', 'prince', 'prison', 'profit', 'proved', 'pulled', 'pursue', 'quiver', 'rained', 'reason', 'regard', 'remove', 'renown', 'report', 'reproa', 'rested', 'return', 'reward', 'riches', 'rolled', 'rulers', 'saidst', 'saving', 'savour', 'sawest', 'saying', 'scarce', 'season', 'second', 'secret', 'seeing', 'seemed', 'servan', 'served', 'sevens', 'shadow', 'shamed', 'shaved', 'shekel', 'shewed', 'shield', 'should', 'shrank', 'shrubs', 'signet', 'silver', 'sister', 'smooth', 'softly', 'sorely', 'sorrow', 'sought', 'speckl', 'speech', 'spices', 'spirit', 'spoken', 'spread', 'sprung', 'stayed', 'stolen', 'stones', 'street', 'strife', 'stript', 'strive', 'strong', 'strove', 'subdue', 'submit', 'subtil', 'summer', 'surely', 'surety', 'tabret', 'talked', 'tender', 'terror', 'thence', 'things', 'thirty', 'though', 'thread', 'throne', 'tiller', 'tithes', 'togeth', 'tongue', 'toward', 'tribes', 'trough', 'turned', 'twelve', 'twenty', 'upward', 'utmost', 'valley', 'verily', 'virgin', 'vision', 'wagons', 'waited', 'walked', 'wander', 'washed', 'waters', 'wealth', 'weaned', 'weight', 'westwa', 'whales', 'whence', 'wicked', 'window', 'winged', 'winter', 'within', 'worthy', 'yonder', 'younge']

>>> V = [w for w in set(text3) if len(w) != 2 and len(w) < 4] //finding list of words with letter sizes =3//
>>> sorted(V)
['!', "'", '(', ')', ',', '.', ':', ';', '?', 'A', 'Abr', 'All', 'And', 'Ard', 'Are', 'Art', 'Ask', 'Bow', 'But', 'Buz', 'Can', 'Dan', 'Day', 'Din', 'Egy', 'Ehi', 'Eno', 'Eri', 'Eve', 'For', 'G', 'Gad', 'Get', 'God', 'Hai', 'Ham', 'His', 'How', 'Hul', 'Huz', 'I', 'Isa', 'Jac', 'Job', 'Kor', 'Lay', 'Let', 'Lie', 'Lot', 'Lud', 'Luz', 'Mam', 'Man', 'Nay', 'Nod', 'Not', 'Now', 'O', 'Our', 'Out', 'Pau', 'Put', 'Reu', 'Say', 'See', 'Set', 'She', 'Sod', 'The', 'Thy', 'Two', 'Who', 'Why', 'Yea', 'Yet', 'Zar', 'a', 'add', 'aga', 'age', 'air', 'all', 'alo', 'and', 'any', 'are', 'ark', 'art', 'ash', 'ask', 'ass', 'bad', 'bak', 'bed', 'bou', 'bow', 'bre', 'but', 'buy', 'can', 'chi', 'clo', 'cru', 'cry', 'cup', 'cut', 'd', 'day', 'dea', 'dew', 'did', 'die', 'dim', 'doe', 'dry', 'dwe', 'e', 'ear', 'eat', 'end', 'ewe', 'fai', 'far', 'fat', 'fed', 'few', 'fie', 'fig', 'fir', 'fle', 'flo', 'fly', 'for', 'fou', 'fro', 'gat', 'get', 'goa', 'got', 'gre', 'gro', 'h', 'had', 'han', 'her', 'hid', 'hil', 'him', 'his', 'hor', 'hou', 'how', 'ill', 'inn', 'jud', 'kid', 'lad', 'lan', 'law', 'lay', 'led', 'let', 'lie', 'm', 'man', 'may', 'men', 'met', 'mou', 'n', 'nig', 'nor', 'not', 'now', 'o', 'oak', 'off', 'oil', 'old', 'one', 'oth', 'our', 'out', 'own', 'pea', 'pit', 'pla', 'pow', 'put', 'ram', 'ran', 'red', 'rib', 'rid', 'riv', 'rul', 'run', 's', 'sac', 'sad', 'sat', 'saw', 'say', 'sea', 'see', 'set', 'she', 'sin', 'sir', 'sit', 'six', 'sle', 'sod', 'son', 'sou', 'sow', 'spe', 'spi', 'ste', 'sto', 'sun', 'tak', 'tar', 'ten', 'the', 'thi', 'thy', 'tim', 'too', 'top', 'tru', 'two', 'voi', 'vow', 'w', 'war', 'was', 'wat', 'way', 'who', 'why', 'wit', 'wiv', 'wor', 'wot', 'y', 'yea', 'yet', 'you']

>>> V = [w for w in set(text3) if 'as' in w] //finding list of words which has 'as' in it//
>>> sorted(V)
['Bashemath', 'Casluhim', 'Cast', 'Damascus', 'Ellasar', 'Forasmuch', 'Girgashites', 'Girgasite', 'Hast', 'Haste', 'Lasha', 'Manass', 'Manasseh', 'Mash', 'Masrekah', 'Massa', 'Pass', 'Pildash', 'Thahash', 'Tiras', 'Whereas', 'appease', 'as', 'ascending', 'ash', 'ashamed', 'ask', 'asked', 'asketh', 'ass', 'assembly', 'asses', 'assigned', 'asswaged', 'basket', 'baskets', 'beast', 'beasts', 'blasted', 'brass', 'breasts', 'carcases', 'cast', 'castles', 'cease', 'ceased', 'compassed', 'compasseth', 'decreased', 'displease', 'displeased', 'east', 'eastward', 'everlasting', 'fashion', 'fast', 'feast', 'grass', 'hast', 'haste', 'hasted', 'hastened', 'hastily', 'increase', 'increased', 'last', 'least', 'mast', 'master', 'measures', 'occasion', 'pass', 'passed', 'past', 'pasture', 'pleasant', 'pleased', 'pleaseth', 'pleasure', 'purchase', 'purchased', 'reason', 'seas', 'season', 'seasons', 'treasure', 'trespass', 'was', 'wash', 'washed', 'wast']

>>> V = [w for w in set(text3) if 'non' in w] //finding list of words which has 'non' in it//
>>> sorted(V)
['Pinon', 'none']

>>> V = [w for w in set(text3) if 'a' and 'b' in w]
>>> sorted(V)
['Abel', 'Abelmizraim', 'Abidah', 'Abide', 'Abimael', 'Abimelech', 'Abr', 'Abrah', 'Abraham', 'Abram', 'Achbor', 'Adbeel', 'Aholibamah', 'Allonbachuth', 'Arbah', 'Ashbel', 'Babel', 'Beersheba', 'Cherubims', 'Chezib', 'Deborah', 'Dinhabah', 'Ebal', 'Eber', 'Elbethel', 'Eshban', 'Ezbon', 'Heber', 'Hebrew', 'Hebrews', 'Hebron', 'Hereby', 'Hobah', 'Ishbak', 'Jabal', 'Jabbok', 'Jacob', 'Jebusite', 'Jebusites', 'Job', 'Jobab', 'Jubal', 'Kirjatharba', 'Laban', 'Lehabim', 'Mehetabel', 'Mezahab', 'Mibsam', 'Mibzar', 'Moab', 'Moabites', 'Nebajoth', 'Obal', 'Rebek', 'Rebekah', 'Rehoboth', 'Reub', 'Reuben', 'Sabtah', 'Sabtech', 'Seba', 'Sheba', 'Shebah', 'Shemeber', 'Shinab', 'Shobal', 'Tebah', 'Tubal', 'Tubalcain', 'Unstable', 'Zeboiim', 'Zeboim', 'Zebul', 'Zebulun', 'Zibeon', 'abated', 'abide', 'able', 'abode', 'abomination', 'about', 'above', 'abroad', 'absent', 'abundantly', 'assembly', 'back', 'backward', 'bad', 'bade', 'badest', 'badne', 'bak', 'bake', 'bakemeats', 'baker', 'bakers', 'balm', 'bands', 'bank', 'bare', 'barr', 'barren', 'basket', 'baskets', 'battle', 'bdellium', 'be', 'bear', 'beari', 'bearing', 'beast', 'beasts', 'beautiful', 'became', 'because', 'become', 'bed', 'been', 'befall', 'befell', 'before', 'began', 'begat', 'beget', 'begettest', 'begin', 'beginning', 'begotten', 'beguiled', 'beheld', 'behind', 'behold', 'being', 'believed', 'belly', 'belong', 'beneath', 'bereaved', 'beside', 'besides', 'besought', 'best', 'betimes', 'better', 'between', 'betwixt', 'beyond', 'binding', 'bird', 'birds', 'birthday', 'birthright', 'biteth', 'bitter', 'blame', 'blameless', 'blasted', 'bless', 'blessed', 'blesseth', 'blessi', 'blessing', 'blessings', 'blindness', 'blood', 'blossoms', 'bodies', 'boldly', 'bondman', 'bondmen', 'bondwoman', 'bone', 'bones', 'book', 'booths', 'border', 'borders', 'born', 'bosom', 'both', 'bottle', 'bou', 'boug', 'bough', 'bought', 'bound', 'bow', 'bowed', 'bowels', 'bowing', 'boys', 'bracelets', 'branches', 'brass', 'bre', 'breach', 'bread', 'breadth', 'break', 'breaketh', 'breaking', 'breasts', 'breath', 'breathed', 'breed', 'brethren', 'brick', 'brimstone', 'bring', 'brink', 'broken', 'brook', 'broth', 'brother', 'brought', 'brown', 'bruise', 'budded', 'build', 'builded', 'built', 'bulls', 'bundle', 'bundles', 'burdens', 'buried', 'burn', 'burning', 'burnt', 'bury', 'buryingplace', 'business', 'but', 'butler', 'butlers', 'butlership', 'butter', 'buy', 'by', 'chamber', 'concubi', 'concubine', 'concubines', 'cubit', 'cubits', 'double', 'doubled', 'doubt', 'embalm', 'embalmed', 'embraced', 'establish', 'established', 'feeble', 'feebler', 'firstborn', 'forbid', 'habitations', 'herb', 'honourable', 'husba', 'husband', 'husbandman', 'inhabitants', 'inhabited', 'labour', 'lamb', 'lambs', 'number', 'numbered', 'numbering', 'obeisance', 'obey', 'obeyed', 'observed', 'obtain', 'peaceable', 'peaceably', 'rebelled', 'rebuked', 'remember', 'remembered', 'rib', 'ribs', 'shrubs', 'subdue', 'submit', 'substance', 'subtil', 'subtilty', 'tabret', 'thereby', 'trembled', 'tribes', 'tribute', 'troubled', 'vagabond', 'whereby', 'womb', 'wombs']

>>> V = [w for w in set(text3) if 'on' and 'al' in w]
>>> sorted(V)
['Amal', 'Amalek', 'Amalekites', 'Baalhanan', 'Calah', 'Calneh', 'Chaldees', 'Ebal', 'Galeed', 'Jaalam', 'Jabal', 'Jubal', 'Mahalaleel', 'Mahalath', 'Malchiel', 'Male', 'Naphtali', 'Obal', 'Phallu', 'Salah', 'Salem', 'Shalem', 'Shall', 'Shalt', 'Shobal', 'Tidal', 'Tubal', 'Tubalcain', 'Uzal', 'al', 'alive', 'all', 'almon', 'alo', 'alone', 'aloud', 'also', 'altar', 'altogether', 'always', 'balm', 'befall', 'calf', 'call', 'called', 'conceal', 'continually', 'dale', 'deal', 'dealt', 'embalm', 'embalmed', 'fall', 'fallen', 'falsely', 'female', 'foal', 'foals', 'half', 'halted', 'healed', 'health', 'male', 'males', 'meal', 'perpetual', 'royal', 'salt', 'salvation', 'shall', 'shalt', 'small', 'stalk', 'steal', 'talked', 'talking', 'vale', 'valley', 'victuals', 'walk', 'walked', 'walketh', 'walking', 'wall', 'wealth', 'whales']

>>> V = [w for w in text3 if 'on' and 'al' in w]
>>> sorted(V)
['Amal', 'Amalek', 'Amalekites', 'Baalhanan', 'Baalhanan', 'Calah', 'Calah', 'Calneh', 'Chaldees', 'Chaldees', 'Chaldees', 'Ebal', 'Galeed', 'Galeed', 'Jaalam', 'Jaalam', 'Jaalam', 'Jabal', 'Jubal', 'Mahalaleel', 'Mahalaleel', 'Mahalaleel', 'Mahalaleel', 'Mahalaleel', 'Mahalath', 'Malchiel', 'Male', 'Naphtali', 'Naphtali', 'Naphtali', 'Naphtali', 'Obal', 'Phallu', 'Salah', 'Salah', 'Salah', 'Salah', 'Salah', 'Salah', 'Salem', 'Shalem', 'Shall', 'Shall', 'Shall', 'Shall', 'Shall', 'Shall', 'Shalt', 'Shobal', 'Shobal', 'Shobal', 'Tidal', 'Tidal', 'Tubal', 'Tubalcain', 'Tubalcain', 'Uzal', 'al', 'al', 'al', 'alive', 'alive', 'alive', 'alive', 'alive', 'alive', 'alive', 'alive', 'alive', 'alive', 'alive', 'alive', 'all',..............

 V = [w for w in set(text3) if 'on' and 'al' and 'th' in w]
>>> sorted(V)
['Ahuzzath', 'Allonbachuth', 'Asenath', 'Ashteroth', 'Avith', 'Bashemath', 'Bethel', 'Bethlehem', 'Bethuel', 'Both', 'Dothan', 'Earth', 'Elbethel', 'Ephrath', 'Ethiopia', 'Gather', 'Gether', 'Hamathite', 'Hazarmaveth', 'Heth', 'Ithran', 'Japheth', 'Jegarsahadutha', 'Jetheth', 'Judith', 'Kiriathaim', 'Kirjatharba', 'Kohath', 'Mahalath', 'Manahath', 'Methusa', 'Methusael', 'Methuselah', 'Nahath', 'Nebajoth', 'Neither', 'Pathrusim', 'Rehoboth', 'Riphath', 'Seth', 'Succoth', 'Timnath', 'With', 'Zaphnathpaaneah', 'aileth', 'altogether', 'anoth', 'another', 'anything', 'asketh', 'beneath', 'birthday', 'birthright', 'biteth', 'blesseth', 'booths', 'both', 'breadth', 'breaketh', 'breath', 'breathed', 'brethren', 'broth', 'brother', 'clothed', 'clothes', 'cometh', 'compasseth', 'creepeth', 'crieth', 'curseth', 'dearth', 'death', 'divineth', 'doeth', 'doth', 'drinketh', 'earth', 'either', 'faileth', 'fath', 'fathe', 'father', 'fathers', 'fifth', 'findeth', 'forth', 'fourteenth', 'fourth', 'gather', 'gathered', 'gathering', 'giveth', 'goeth', 'hath', 'health', 'hearth', 'henceforth', 'hith', 'hither', 'hundredth', 'knoweth', 'length', 'lieth', 'liveth', 'longeth', 'loveth', 'meeteth', 'mirth', 'month', 'months', 'mother', 'mouth', 'mouths', 'moveth', 'needeth', 'neither', 'north', 'northward', 'nothing', 'oath', 'oth', 'other', 'overthrew', 'overthrow', 'path', 'pleaseth', 'proceedeth', 'remaineth', 'repenteth', 'sackcloth', 'saith', 'seeth', 'seventeenth', 'seventh', 'sheddeth', 'sheweth', 'sixth', 'slayeth', 'smooth', 'south', 'southward', 'speaketh', 'strength', 'strengthened', 'teeth', 'tenth', 'th', 'than', 'that', 'the', 'thee', 'their', 'them', 'themselv', 'themselves', 'then', 'thence', 'there', 'thereby', 'therefore', 'therein', 'thereof', 'thereon', 'these', 'they', 'thi', 'thicket', 'thigh', 'thin', 'thine', 'thing', 'things', 'think', 'third', 'thirteen', 'thirteenth', 'thirty', 'this', 'thistles', 'thither', 'thoroughly', 'those', 'thou', 'though', 'thought', 'thoughts', 'thousand', 'thousands', 'thread', 'three', 'threescore', 'threshingfloor', 'throne', 'through', 'throughout', 'thus', 'thy', 'thyself', 'tithes', 'togeth', 'together', 'toucheth', 'truth', 'twentieth', 'walketh', 'wealth', 'wherewith', 'whether', 'whither', 'with', 'withered', 'withheld', 'withhold', 'within', 'without', 'worth', 'worthy', 'wotteth', 'wrath', 'wroth', 'youth']

>>> V = [w for w in set(text3) if w.endswith ('al')] //finding list of words which ends with 'al'//
>>> sorted(V)
['Amal', 'Ebal', 'Jabal', 'Jubal', 'Obal', 'Shobal', 'Tidal', 'Tubal', 'Uzal', 'al', 'conceal', 'deal', 'foal', 'meal', 'perpetual', 'royal', 'steal']

>>> V = [w for w in set(text3) if w.endswith ('al') or w.endswith ('th')]
>>> sorted(V)
['Ahuzzath', 'Allonbachuth', 'Amal', 'Asenath', 'Ashteroth', 'Avith', 'Bashemath', 'Both', 'Earth', 'Ebal', 'Ephrath', 'Hazarmaveth', 'Heth', 'Jabal', 'Japheth', 'Jetheth', 'Jubal', 'Judith', 'Kohath', 'Mahalath', 'Manahath', 'Nahath', 'Nebajoth', 'Obal', 'Rehoboth', 'Riphath', 'Seth', 'Shobal', 'Succoth', 'Tidal', 'Timnath', 'Tubal', 'Uzal', 'With', 'aileth', 'al', 'anoth', 'asketh', 'beneath', 'biteth', 'blesseth', 'both', 'breadth', 'breaketh', 'breath', 'broth', 'cometh', 'compasseth', 'conceal', 'creepeth', 'crieth', 'curseth', 'deal', 'dearth', 'death', 'divineth', 'doeth', 'doth', 'drinketh', 'earth', 'faileth', 'fath', 'fifth', 'findeth', 'foal', 'forth', 'fourteenth', 'fourth', 'giveth', 'goeth', 'hath', 'health', 'hearth', 'henceforth', 'hith', 'hundredth', 'knoweth', 'length', 'lieth', 'liveth', 'longeth', 'loveth', 'meal', 'meeteth', 'mirth', 'month', 'mouth', 'moveth', 'needeth', 'north', 'oath', 'oth', 'path', 'perpetual', 'pleaseth', 'proceedeth', 'remaineth', 'repenteth', 'royal', 'sackcloth', 'saith', 'seeth', 'seventeenth', 'seventh', 'sheddeth', 'sheweth', 'sixth', 'slayeth', 'smooth', 'south', 'speaketh', 'steal', 'strength', 'teeth', 'tenth', 'th', 'thirteenth', 'togeth', 'toucheth', 'truth', 'twentieth', 'walketh', 'wealth', 'wherewith', 'with', 'worth', 'wotteth', 'wrath', 'wroth', 'youth']

>>> V = [w for w in set(text3) if w.endswith ('al') or w.endswith ('ion') or w.endswith('k')]
>>> sorted(V)
['Amal', 'Amalek', 'Ask', 'Drink', 'Ebal', 'Esek', 'Ishbak', 'Jabal', 'Jabbok', 'Jubal', 'Look', 'Melchizedek', 'Obal', 'Rebek', 'Shobal', 'Speak', 'Tidal', 'Tubal', 'Uzal', 'Ziphion', 'abomination', 'affliction', 'al', 'ark', 'ask', 'back', 'bak', 'bank', 'book', 'break', 'brick', 'brink', 'brook', 'conceal', 'conception', 'dark', 'deal', 'dominion', 'drank', 'drink', 'fashion', 'flock', 'foal', 'folk', 'generation', 'imagination', 'interpretation', 'lack', 'lion', 'look', 'mark', 'meal', 'mention', 'milk', 'mock', 'nation', 'neck', 'oak', 'occasion', 'occupation', 'overtook', 'perpetual', 'portion', 'possession', 'provision', 'rank', 'royal', 'sack', 'salvation', 'seek', 'shrank', 'speak', 'stalk', 'steal', 'stink', 'suck', 'tak', 'think', 'took', 'traffick', 'vision', 'walk', 'week', 'wick', 'work']

>>> V = [w for w in set(text3) if w.endswith ('al') or w.endswith ('k') or w.startswith('bo')]
>>> sorted(V)
['Amal', 'Amalek', 'Ask', 'Drink', 'Ebal', 'Esek', 'Ishbak', 'Jabal', 'Jabbok', 'Jubal', 'Look', 'Melchizedek', 'Obal', 'Rebek', 'Shobal', 'Speak', 'Tidal', 'Tubal', 'Uzal', 'al', 'ark', 'ask', 'back', 'bak', 'bank', 'bodies', 'boldly', 'bondman', 'bondmen', 'bondwoman', 'bone', 'bones', 'book', 'booths', 'border', 'borders', 'born', 'bosom', 'both', 'bottle', 'bou', 'boug', 'bough', 'bought', 'bound', 'bow', 'bowed', 'bowels', 'bowing', 'boys', 'break', 'brick', 'brink', 'brook', 'conceal', 'dark', 'deal', 'drank', 'drink', 'flock', 'foal', 'folk', 'lack', 'look', 'mark', 'meal', 'milk', 'mock', 'neck', 'oak', 'overtook', 'perpetual', 'rank', 'royal', 'sack', 'seek', 'shrank', 'speak', 'stalk', 'steal', 'stink', 'suck', 'tak', 'think', 'took', 'traffick', 'walk', 'week', 'wick', 'work']

>>> V = [w for w in set(text3) if w.endswith ('d') and w.startswith('as')]
>>> sorted(V)
['ashamed', 'asked', 'assigned', 'asswaged']

>>> V = [w for w in set(text3) if w.endswith ('d') and w.startswith('as') and 'h' in w]
>>> sorted(V)
['ashamed']

>>> V = [w for w in set(text3) if w.endswith ('d') and w.startswith('b') and 'o' in w]
>>> sorted(V)
['behold', 'beyond', 'blood', 'bound', 'bowed']

>>> V = [w for w in set(text3) if w.endswith ('d') and w.startswith('as') and 'h' in w and len(w) >3]
>>> sorted(V)
['ashamed']
>>> V = [w for w in set(text3) if w.endswith ('d') and w.startswith('b') and 'o' in w and len(w) > 3]
>>> sorted(V)
['behold', 'beyond', 'blood', 'bound', 'bowed']

>>> V = [w for w in set(text3) if w.istitle()] //finding list of title words in text//
>>> sorted(V)
['A', 'Abel', 'Abelmizraim', 'Abidah', 'Abide', 'Abimael', 'Abimelech', 'Abr', 'Abrah', 'Abraham', 'Abram', 'Accad', 'Achbor', 'Adah', 'Adam', 'Adbeel', 'Admah', 'Adullamite', 'After', 'Aholibamah', 'Ahuzzath', 'Ajah', 'Akan', 'All', 'Allonbachuth', 'Almighty', 'Almodad', 'Also', 'Alvah', 'Alvan', 'Am', 'Amal', 'Amalek', 'Amalekites', 'Ammon', 'Amorite', 'Amorites', 'Amraphel', 'An', 'Anah', 'Anamim', 'And', 'Aner', 'Angel', 'Appoint', 'Aram', 'Aran', 'Ararat', 'Arbah', 'Ard', 'Are', 'Areli', 'Arioch', 'Arise', 'Arkite', 'Arodi', 'Arphaxad', 'Art', 'Arvadite', 'As', 'Asenath', 'Ashbel', 'Asher', 'Ashkenaz', 'Ashteroth', 'Ask', 'Asshur', 'Asshurim', 'Assyr', 'Assyria', 'At', 'Atad', 'Avith', 'Baalhanan', 'Babel', 'Bashemath', 'Be', 'Because', 'Becher', 'Bedad', 'Beeri', 'Beerlahairoi', 'Beersheba', 'Behold', 'Bela', 'Belah', 'Benam', 'Benjamin', 'Beno', 'Beor', 'Bera', 'Bered', 'Beriah', 'Bethel', 'Bethlehem', 'Bethuel', 'Beware', 'Bilhah', 'Bilhan', 'Binding', 'Birsha', 'Bless', 'Blessed', 'Both', 'Bow', 'Bozrah', 'Bring', 'But', 'Buz', 'By', 'Cain', 'Cainan', 'Calah', 'Calneh', 'Can', 'Cana', 'Canaan', 'Canaanite', 'Canaanites', 'Canaanitish', 'Caphtorim', 'Carmi', 'Casluhim', 'Cast', 'Cause', 'Chaldees', ....

>>> V = [w for w in set(text3) if w.istitle() and len(w) == 10]
>>> sorted(V)
['Adullamite', 'Aholibamah', 'Amalekites', 'Canaanites', 'Kadmonites', 'Kenizzites', 'Kiriathaim', 'Mahalaleel', 'Methuselah', 'Midianites', 'Perizzites', 'Potipherah']

>>> V = [w for w in set(text3) if w.isupper()] //finding list of words which has all letters in capital//
>>> sorted(V)
['A', 'G', 'I', 'LO', 'LORD', 'O']
>>> V = [w for w in set(text3) if w.isdigit()] 
>>> sorted(V)
[]
>>> V = [w for w in set(text1) if w.isdigit()]//finding list of digits in text//
>>> sorted(V)
['000', '1', '10', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '11', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '12', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '13', '130', '131', '132', '133', '134', '135', '14', '144', '1492', '15', '150', '16', '1652', '1668', '1671', '1690', '1695', '17', '1726', '1729', '1750', '1772', '1775', '1776', '1778', '1779', '1788', '1791', '1793', '18', '180', '1807', '1819', '1820', '1821', '1825', '1828', '1833', '1836', '1839', '1840', '1842', '1846', '1850', '1851', '19', '2', '20', '2000', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '400', '41', '42', '43', '44', '440', '45', '46', '47', '48', '49', '5', '50', '500', '51', '52', '53', '54', '55', '550', '56', '57', '58', '59', '6', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '7', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '8', '80', '800', '81', '82', '83', '84', '85', '86', '87', '88', '89', '890', '9', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99']

>>> V = [w for w in set(text2) if w.isalnum() and w.isdigit()]
>>> sorted(V)
['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '1811', '19', '2', '20', '200', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '6', '7', '8', '9']
>>> V = [w for w in set(text2) if w.isalnum() and w.isdigit() and len(w) > 2]
>>> sorted(V)
['1811', '200']

>>> [len(w) for w in text1 if len(w) > 14]
[16, 15, 15, 17, 15, 15, 15, 15, 17, 15, 15, 15, 15, 17, 15, 15, 15, 17, 16, 15, 15, 15, 15, 16, 15, 15, 16, 15, 15, 15, 15, 17, 15, 15, 15, 15, 17, 16, 15, 15, 15, 17, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 16, 15, 16, 15, 17, 15, 15, 15, 15, 15, 15, 16, 15, 16, 15, 16, 15, 16, 16, 16, 15, 15, 15, 15, 15, 16, 16, 16, 15, 15, 18, 17, 15, 15, 15, 15, 15, 15, 17, 17, 20, 15, 16, 15, 15, 16, 17, 16, 15, 15, 15, 15, 16, 15]

>>> f = [len(w) for w in text1 if len(w) > 14]
>>> print(f)
[16, 15, 15, 17, 15, 15, 15, 15, 17, 15, 15, 15, 15, 17, 15, 15, 15, 17, 16, 15, 15, 15, 15, 16, 15, 15, 16, 15, 15, 15, 15, 17, 15, 15, 15, 15, 17, 16, 15, 15, 15, 17, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 16, 15, 16, 15, 17, 15, 15, 15, 15, 15, 15, 16, 15, 16, 15, 16, 15, 16, 16, 16, 15, 15, 15, 15, 15, 16, 16, 16, 15, 15, 18, 17, 15, 15, 15, 15, 15, 15, 17, 17, 20, 15, 16, 15, 15, 16, 17, 16, 15, 15, 15, 15, 16, 15]
>>> set(f)
{15, 16, 17, 18, 20}

Project 2: Finding negative sentiments in text document

                                         Negative sentiments in texts

Task: Our task is to find negative sentiments in form of words in the text document. Usually, negative sentiment words will be words starting with 'dis' like dissatisfied or disastrous. Also, other negative sentiment words include shameful or negative. So, here we use regular expression function in python to find it.

Start by using steps given below:
Step 1: Finding particular group of words

>>> f=(w for w in set(text1) if re.search('^a.*(ed)$', w))
>>> f
>>> sorted(f)
['abandoned', 'abased', 'abashed', 'abated', 'abed', 'abhorred', 'abided', 'abominated', 'abounded', 'abridged', 'absorbed', 'abstained', 'abstracted', 'accelerated', 'accommodated', 'accompanied', 'accomplished', 'accosted', 'accounted', 'accumulated', 'accursed', 'accustomed', 'ached', 'achieved', 'acquainted', 'acquiesced', 'acquired', 'acted', 'actuated', 'adapted', 'added', 'addressed', 'administered', 'admitted', 'admonished', 'adopted', 'adorned', 'advanced', 'advertised', 'advised', 'aerated', 'affected', 'affixed', 'afflicted', 'afforded', 'affrighted', 'affronted', 'aged', 'aggregated', 'aggrieved', 'agitated', 'agonized', 'agreed', 'aimed', 'alarmed', 'alleged', 'allotted', 'allowed', 'alluded', 'allured', 'altered', 'amounted', 'amplified', 'amputated', 'analysed', 'anchored', 'animated', 'annihilated', 'announced', 'anointed', 'answered', 'anticipated', 'antlered', 'appalled', 'apparelled', 'appeared', 'applied', 'appointed', 'apportioned', 'apprised', 'approached', 'appropriated', 'approved', 'arched', 'argued', 'armed', 'arranged', 'arrayed', 'arrested', 'arrived', 'articulated', 'ascended', 'ascertained', 'ascribed', 'asked', 'assailed', 'assembled', 'assented', 'asserted', 'assigned', 'assisted', 'associated', 'assumed', 'assured', 'astonished', 'attached', 'attacked', 'attained', 'attended', 'attenuated', 'attested', 'attracted', 'attuned', 'augmented', 'authenticated', 'authorized', 'averred', 'averted', 'awaited', 'awakened', 'awarded', 'awed']

>>> f=(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis.*', w) or re.search('^sh.*', w))
>>> sorted(f)
['dis', 'disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disbands', 'disbelief', 'discerned', 'discernible', 'discernment', 'discerns', 'discharge', 'discharged', 'discharges', 'discharging', 'disciple', 'disciples', 'discipline', 'disclosed', 'disclosures', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disdain', 'disdained', 'disease', 'disembowelled', 'disembowelments', 'disencumber', 'disengaged', 'disentangling', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dish', 'disheartening', 'dishes', 'dishonour', 'disincline', 'disinfecting', 'disintegrate', 'disinterested', 'disinterred', 'disjointedly', 'disks', 'dislike', 'dislocated', 'dislocation', 'dislodged', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'dismember', 'dismembered', 'dismemberer', 'dismembering', 'dismemberment', 'dismissal', 'dismissed', 'disobedience', 'disobey', 'disobeying', 'disorder', 'disordered', 'disorderliness', 'disorderly', 'disorders', 'disparagement', 'dispel', 'dispensed', 'dispenses', 'dispersed', 'dispirited', 'dispirits', 'displaced', 'display', 'displayed', 'displays', 'disport', 'disposed', 'disposing', 'disposition', 'disproved', 'dispute', 'disputes', 'disputing', 'disquietude', 'disrated', 'disreputable', 'dissatisfaction', 'dissect', 'dissemble', 'dissembling', 'dissent', 'dissertations', 'dissimilar', 'dissociated', 'dissolutions', 'dissolve', 'dissolved', 'distance', 'distances', 'distant', 'distantly', 'distended', 'distension', 'distilled', 'distinct', 'distinction', 'distinctions', 'distinctive', 'distinctly', 'distinguish', 'distinguished', 'distinguishing', 'distortions', 'distracted', 'distraction', 'distress', ......']

>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis.*', w) or re.search('^sh.*', w))
>>> f.most_common(20)
[('discernment', 1), ('shallows', 1), ('shingled', 1), ('shutters', 1), ('discrimination', 1), ('disbelief', 1), ('shivered', 1), ('shifting', 1), ('shocked', 1), ('shrouded', 1), ('disrated', 1), ('shadowy', 1), ('discourse', 1), ('disdain', 1), ('shady', 1), ('shining', 1), ('disparagement', 1), ('shared', 1), ('shambling', 1), ('shapes', 1)]

>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis.*', w) or re.search('^sh.*', w))
>>> f.most_common(20)
[('discernment', 1), ('shallows', 1), ('shingled', 1), ('shutters', 1), ('discrimination', 1), ('disbelief', 1), ('shivered', 1), ('shifting', 1), ('shocked', 1), ('shrouded', 1), ('disrated', 1), ('shadowy', 1), ('discourse', 1), ('disdain', 1), ('shady', 1), ('shining', 1), ('disparagement', 1), ('shared', 1), ('shambling', 1), ('shapes', 1)]

Step 2: Refinement

>>> f=FreqDist(w for w in set(text1) if re.search('^dis[a-s].*', w))
>>> sorted(f)
['disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disbands', 'disbelief', 'discerned', 'discernible', 'discernment', 'discerns', 'discharge', 'discharged', 'discharges', 'discharging', 'disciple', 'disciples', 'discipline', 'disclosed', 'disclosures', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disdain', 'disdained', 'disease', 'disembowelled', 'disembowelments', 'disencumber', 'disengaged', 'disentangling', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dish', 'disheartening', 'dishes', 'dishonour', 'disincline', 'disinfecting', 'disintegrate', 'disinterested', 'disinterred',.....................]

>>> f=FreqDist(w for w in set(text1) if not 'displ' in w and re.search('^dis[a-s].*', w)) //Here we want to print all words starting with 'dis' and have 4th alphabet in range of (a to s) but not 'pl'//
>>> sorted(f)
['disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disbands', 'disbelief', 'discerned', 'discernible', 'discernment', 'discerns', 'discharge', 'discharged', 'discharges', 'discharging', 'disciple', 'disciples', 'discipline', 'disclosed', 'disclosures', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disdain', 'disdained', 'disease', 'disembowelled', 'disembowelments', 'disencumber', 'disengaged',

Step 3: Further Refinement to look for specific words

>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis.*', w) or re.search('^sh.*', w) or re.search('neg.*', w)) // long list since it will print words starting with 'dis',  'sh' and 'neg' while starting with 'ash' & end with 'ed'//
>>> sorted(f)
['Abednego', 'dis', 'disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disbands', 'disbelief', 'discerned', 'discernible', 'discernment', 'discerns', 'discharge', 'discharged', 'discharges', 'discharging', 'disciple', 'disciples', 'discipline', 'disclosed', 'disclosures', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disdain', 'disdained', 'disease', 'disembowelled', 'disembowelments', 'disencumber', 'disengaged', 'disentangling', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dish', 'disheartening', 'dishes', 'dishonour', 'disincline', 'disinfecting', 'disintegrate', 'disinterested', 'disinterred', 'disjointedly', 'disks', 'dislike', 'dislocated', 'dislocation', 'dislodged', 'dismal',....................]


>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis.*', w) or re.search('^sh[o|a].*', w) or re.search('^neg.*', w)) //Here, it will print non-negative or neutral words like disclosures, discharged, etc.//
>>> sorted(f)
['dis', 'disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disbands', 'disbelief', 'discerned', 'discernible', 'discernment', 'discerns', 'discharge', 'discharged', 'discharges', 'discharging', 'disciple', 'disciples', 'discipline', 'disclosed', 'disclosures', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disdain', 'disdained', 'disease', 'disembowelled', 'disembowelments', 'disencumber', 'disengaged', 'disentangling', 'disgorge', 'disguise', 'disguisement', 'disguises',.......


>>> f=FreqDist(w for w in set(text1) if not 'disti' in w or re.search('^ash.*(ed)$', w) or re.search('^dis.*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg.*', w)) //we wish to print which do not have 'disti' or 'dis' or 'sho/sha/shoc/shac/shak/shok'  or 'neg' in it. Hence it will print all words except words containing these requested above and list be very long//
>>> sorted(f)
['!', '!"', '!"--', "!'", '!\'"', '!)', '!)"', '!*', '!--', '!--"', "!--'", '"', '"\'', '"--', '"...', '";', '$', '&', "'", "',", "',--", "'-", "'--", "';", '(', ')', '),', ')--', ').', ').--', '):', ');', ');--', '*', ',', ',"', ',"--', ",'", ",'--", ',)', ',*', ',--', ',--"', ",--'", '-', '--', '--"', "--'", '--\'"', '--(', '---"', '---,', '.', '."', '."*', '."--', ".'", '.\'"', '.)', '.*', '.*--', '.,', '.--', '.--"', '...', '....', '.]', '000', '1', '10', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '11', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '12', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '13', '130', '131', '132', '133', '134', '135', '14', '144', '1492', '15', '150', '15th', '16', '1652', '1668', '1671', '1690', '1695', '16th', '17', '1726', '1729', '1750', '1772', '1775', '1776', '1778', '1779', '1788', '1791', '1793', '18', '180', '1807', '1819', '1820', '1821', '1825', '1828', '1833', '1836', '1839', '1840', '1842', '1846', '1850', '1851', '19', '1ST', '1st', '2', '20', '2000', '200th', '21',........................(very long list)


>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a|c|h|g|l|o|m|t][g|s|p|r|o|u|e|b|a].*', w) or re.search('^dis[m|o][a|b].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*', w))
>>> sorted(f)
['disable', 'disabled', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'discerned', 'discernible', 'discernment', 'discerns', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'disheartening', 'dishes', 'dishonour', 'dislocated', 'dislocation', 'dislodged', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'dismember', 'dismembered', 'dismemberer', 'dismembering', 'dismemberment', 'disobedience', 'disobey', 'disobeying', 'disorder', 'disordered', 'disorderliness', 'disorderly', 'disorders', 'distance', 'distances', 'distant', 'distantly', 'distended', 'distension', 'distortions', 'distracted', 'distraction', 'distress', 'distressed', 'distributed', 'district', 'districts', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'disturb', 'disturbing', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']

>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a|c|o|m][g|s|p|r|o|u|e|b|a].*', w) or re.search('^dist(r)[u|e].*', w) or re.search('^dis[g|h|m|o][a|b|o|u].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*', w))
>>> sorted(f)
['disable', 'disabled', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'discerned', 'discernible', 'discernment', 'discerns', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dishonour', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'dismember', 'dismembered', 'dismemberer', 'dismembering', 'dismemberment', 'disobedience', 'disobey', 'disobeying', 'disorder', 'disordered', 'disorderliness', 'disorderly', 'disorders', 'distress', 'distressed', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']

>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a][g|p]*(?!o).*', w) or re.search('^dist(r)[u|e].*', w) or re.search('^dis[g|h|m|o][a|b|o|u].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*(?!o).*', w))
>>> sorted(f)
['disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dishonour', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'disobedience', 'disobey', 'disobeying', 'distress', 'distressed', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']

>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a][g|p]*(?!pea)(?!o).*', w) or re.search('^dist(r)[u|e].*', w) or re.search('^dis[g|h|m|o][a|b|o|u].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*(?!o).*', w))
>>> sorted(f)
['disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dishonour', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'disobedience', 'disobey', 'disobeying', 'distress', 'distressed', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']

>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a][g|p]*(?!(pea))(?!o).*', w) or re.search('^dist(r)[u|e].*', w) or re.search('^dis[g|h|m|o][a|b|o|u].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*(?!o).*', w))
>>> sorted(f)
['disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dishonour', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'disobedience', 'disobey', 'disobeying', 'distress', 'distressed', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']

Step 4: Final result

>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a][g|p]*(?!p)(?!e)(?!o).*', w) or re.search('^dist(r)[u|e].*', w) or re.search('^dis[g|h|m|o][a|b|o|u].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*(?!o).*', w))
>>> sorted(f)
['disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disaster', 'disasters', 'disastrous', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dishonour', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'disobedience', 'disobey', 'disobeying', 'distress', 'distressed', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']