Sentiments in texts using Regular Expressions
Sentiments are expressed as words in the tests.
Following steps in NLTK is used to find specific sentimental words in the text
Step1:
>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis.*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg.*', w))
>>> sorted(f)
['dis', 'disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disbands', 'disbelief', 'discerned', 'discernible', 'discernment', 'discerns', 'discharge', 'discharged', 'discharges', 'discharging', 'disciple', 'disciples', 'discipline', 'disclosed', 'disclosures', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disdain', 'disdained', 'disease', 'disembowelled', 'disembowelments', 'disencumber', 'disengaged', 'disentangling', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dish', 'disheartening', 'dishes', 'dishonour', 'disincline', 'disinfecting', 'disintegrate', 'disinterested', 'disinterred', 'disjointedly', 'disks', 'dislike', 'dislocated', 'dislocation', 'dislodged', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'dismember', 'dismembered', 'dismemberer', 'dismembering', 'dismemberment', 'dismissal', 'dismissed', 'disobedience', 'disobey', 'disobeying', 'disorder', 'disordered', 'disorderliness', 'disorderly', 'disorders', 'disparagement', 'dispel', 'dispensed', 'dispenses', 'dispersed', 'dispirited', 'dispirits', 'displaced', 'display', 'displayed', 'displays', 'disport', 'disposed', 'disposing', 'disposition', 'disproved', 'dispute', 'disputes', 'disputing', 'disquietude', 'disrated', 'disreputable', 'dissatisfaction', 'dissect', 'dissemble', 'dissembling', 'dissent', 'dissertations', 'dissimilar', 'dissociated', 'dissolutions', 'dissolve', 'dissolved', 'distance', 'distances', 'distant', 'distantly', 'distended', 'distension', 'distilled', 'distinct', 'distinction', 'distinctions', 'distinctive', 'distinctly', 'distinguish', 'distinguished', 'distinguishing', 'distortions', 'distracted', 'distraction', 'distress', 'distressed', 'distributed', 'district', 'districts', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'disturb', 'disturbing', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'negro', 'negroes', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']
Step2:
>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a|c|h|g|l|o|m|t][g|s|p|r|o|u|e|b|a].*', w) or re.search('^dis[m|o][a|b].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*', w))
>>> sorted(f)
['disable', 'disabled', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'discerned', 'discernible', 'discernment', 'discerns', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'disheartening', 'dishes', 'dishonour', 'dislocated', 'dislocation', 'dislodged', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'dismember', 'dismembered', 'dismemberer', 'dismembering', 'dismemberment', 'disobedience', 'disobey', 'disobeying', 'disorder', 'disordered', 'disorderliness', 'disorderly', 'disorders', 'distance', 'distances', 'distant', 'distantly', 'distended', 'distension', 'distortions', 'distracted', 'distraction', 'distress', 'distressed', 'distributed', 'district', 'districts', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'disturb', 'disturbing', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']
Step 3:
>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a|c|o|m][g|s|p|r|o|u|e|b|a].*', w) or re.search('^dist(r)[u|e].*', w) or re.search('^dis[g|h|m|o][a|b|o|u].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*', w))
>>> sorted(f)
['disable', 'disabled', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'discerned', 'discernible', 'discernment', 'discerns', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dishonour', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'dismember', 'dismembered', 'dismemberer', 'dismembering', 'dismemberment', 'disobedience', 'disobey', 'disobeying', 'disorder', 'disordered', 'disorderliness', 'disorderly', 'disorders', 'distress', 'distressed', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']
Step 4:
>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a][g|p]*(?!p)(?!e)(?!o).*', w) or re.search('^dist(r)[u|e].*', w) or re.search('^dis[g|h|m|o][a|b|o|u].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*(?!o).*', w))
>>> sorted(f)
['disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disaster', 'disasters', 'disastrous', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dishonour', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'disobedience', 'disobey', 'disobeying', 'distress', 'distressed', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']
Sentiments are expressed as words in the tests.
Following steps in NLTK is used to find specific sentimental words in the text
Step1:
>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis.*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg.*', w))
>>> sorted(f)
['dis', 'disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'disbands', 'disbelief', 'discerned', 'discernible', 'discernment', 'discerns', 'discharge', 'discharged', 'discharges', 'discharging', 'disciple', 'disciples', 'discipline', 'disclosed', 'disclosures', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disdain', 'disdained', 'disease', 'disembowelled', 'disembowelments', 'disencumber', 'disengaged', 'disentangling', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dish', 'disheartening', 'dishes', 'dishonour', 'disincline', 'disinfecting', 'disintegrate', 'disinterested', 'disinterred', 'disjointedly', 'disks', 'dislike', 'dislocated', 'dislocation', 'dislodged', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'dismember', 'dismembered', 'dismemberer', 'dismembering', 'dismemberment', 'dismissal', 'dismissed', 'disobedience', 'disobey', 'disobeying', 'disorder', 'disordered', 'disorderliness', 'disorderly', 'disorders', 'disparagement', 'dispel', 'dispensed', 'dispenses', 'dispersed', 'dispirited', 'dispirits', 'displaced', 'display', 'displayed', 'displays', 'disport', 'disposed', 'disposing', 'disposition', 'disproved', 'dispute', 'disputes', 'disputing', 'disquietude', 'disrated', 'disreputable', 'dissatisfaction', 'dissect', 'dissemble', 'dissembling', 'dissent', 'dissertations', 'dissimilar', 'dissociated', 'dissolutions', 'dissolve', 'dissolved', 'distance', 'distances', 'distant', 'distantly', 'distended', 'distension', 'distilled', 'distinct', 'distinction', 'distinctions', 'distinctive', 'distinctly', 'distinguish', 'distinguished', 'distinguishing', 'distortions', 'distracted', 'distraction', 'distress', 'distressed', 'distributed', 'district', 'districts', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'disturb', 'disturbing', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'negro', 'negroes', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']
Step2:
>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a|c|h|g|l|o|m|t][g|s|p|r|o|u|e|b|a].*', w) or re.search('^dis[m|o][a|b].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*', w))
>>> sorted(f)
['disable', 'disabled', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'discerned', 'discernible', 'discernment', 'discerns', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'disheartening', 'dishes', 'dishonour', 'dislocated', 'dislocation', 'dislodged', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'dismember', 'dismembered', 'dismemberer', 'dismembering', 'dismemberment', 'disobedience', 'disobey', 'disobeying', 'disorder', 'disordered', 'disorderliness', 'disorderly', 'disorders', 'distance', 'distances', 'distant', 'distantly', 'distended', 'distension', 'distortions', 'distracted', 'distraction', 'distress', 'distressed', 'distributed', 'district', 'districts', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'disturb', 'disturbing', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']
Step 3:
>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a|c|o|m][g|s|p|r|o|u|e|b|a].*', w) or re.search('^dist(r)[u|e].*', w) or re.search('^dis[g|h|m|o][a|b|o|u].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*', w))
>>> sorted(f)
['disable', 'disabled', 'disagreeable', 'disappearance', 'disappeared', 'disappearing', 'disappears', 'disappointed', 'disaster', 'disasters', 'disastrous', 'discerned', 'discernible', 'discernment', 'discerns', 'discolour', 'discoloured', 'discomforts', 'disconnected', 'discount', 'discourse', 'discourseth', 'discoursing', 'discover', 'discovered', 'discoverer', 'discoverers', 'discoveries', 'discovering', 'discovery', 'discreditably', 'discreet', 'discreetly', 'discretion', 'discriminating', 'discrimination', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dishonour', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'dismember', 'dismembered', 'dismemberer', 'dismembering', 'dismemberment', 'disobedience', 'disobey', 'disobeying', 'disorder', 'disordered', 'disorderliness', 'disorderly', 'disorders', 'distress', 'distressed', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']
Step 4:
>>> f=FreqDist(w for w in set(text1) if re.search('^ash.*(ed)$', w) or re.search('^dis[a][g|p]*(?!p)(?!e)(?!o).*', w) or re.search('^dist(r)[u|e].*', w) or re.search('^dis[g|h|m|o][a|b|o|u].*', w) or re.search('^sh[o|a][c|k].*', w) or re.search('^neg[a|l].*(?!o).*', w))
>>> sorted(f)
['disable', 'disabled', 'disadvantage', 'disaffection', 'disagreeable', 'disaster', 'disasters', 'disastrous', 'disgorge', 'disguise', 'disguisement', 'disguises', 'disgust', 'disgusted', 'dishonour', 'dismal', 'dismally', 'dismantled', 'dismasted', 'dismasting', 'dismay', 'disobedience', 'disobey', 'disobeying', 'distress', 'distressed', 'distrust', 'distrusted', 'distrustful', 'distrusting', 'negations', 'negative', 'negatived', 'negatively', 'neglect', 'neglected', 'shake', 'shaken', 'shakes', 'shaking', 'shock', 'shocked', 'shocking', 'shocks']
No comments:
Post a Comment