diff --git a/bert/__init__.py b/bert/__init__.py new file mode 100644 index 0000000..bfc661c --- /dev/null +++ b/bert/__init__.py @@ -0,0 +1,2 @@ +from .bert_model import BERTClass +from .bert_utils import score, process_tweet, load_torch_model, model_path, EMOTIONS \ No newline at end of file diff --git a/bert/bert_model.py b/bert/bert_model.py new file mode 100644 index 0000000..da75d9a --- /dev/null +++ b/bert/bert_model.py @@ -0,0 +1,16 @@ +import torch +from transformers import RobertaModel + + +class BERTClass(torch.nn.Module): + def __init__(self, num_of_cols, path='roberta-large', do=0.3): + super(BERTClass, self).__init__() + self.l1 = RobertaModel.from_pretrained(path) + self.l2 = torch.nn.Dropout(do) + self.l3 = torch.nn.Linear(1024, num_of_cols) + + def forward(self, ids, mask, token_type_ids): + _, output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids) + output_2 = self.l2(output_1) + output = self.l3(output_2) + return output diff --git a/bert/bert_utils.py b/bert/bert_utils.py new file mode 100644 index 0000000..f141517 --- /dev/null +++ b/bert/bert_utils.py @@ -0,0 +1,43 @@ +import torch + + +EMOTIONS = ('anger', 'anticipation', + 'disgust', 'fear', 'joy', + 'love', 'optimism', 'pessimism', + 'sadness', 'surprise', 'trust', + 'positive', 'negative') +emotion_dict = {w: i for i, w in enumerate(EMOTIONS)} +model_path = "pytorch_model/roberta.mdl" + + +def load_torch_model(path): + return torch.load(path) + + +def process_tweet(tweet_text, tokenizer, max_len): + tweet_text = " ".join(tweet_text.split()) + + inputs = tokenizer.encode_plus( + tweet_text, + None, + add_special_tokens=True, + max_length=max_len, + pad_to_max_length=True, + return_token_type_ids=True, + truncation=True) + ids = torch.tensor(inputs['input_ids'], dtype=torch.long) + + mask = torch.tensor(inputs['attention_mask'], dtype=torch.long) + token_type_ids = torch.tensor(inputs["token_type_ids"], dtype=torch.long) + return {'ids': ids, 'mask': mask, 'token_type_ids': token_type_ids} + + +def score(model, processed_tweet, emotions=EMOTIONS): + scores = model(processed_tweet['ids'].unsqueeze(0), + processed_tweet['mask'].unsqueeze(0), + processed_tweet['token_type_ids'].unsqueeze(0))[0] + scores = torch.sigmoid(scores).cpu().detach().numpy() + return {em: sc for em, sc in zip(emotions, scores)} + + + diff --git a/docs/DATA/Alabama.csv b/docs/DATA/Alabama.csv index 7f91fd8..c311017 100644 --- a/docs/DATA/Alabama.csv +++ b/docs/DATA/Alabama.csv @@ -1,6 +1,2 @@ -day,sentiment,size -2020-07-12,0.05302500000000002,4 -2020-07-13,0.03456184210526316,76 -2020-07-14,0.04700648148148148,108 -2020-07-15,-0.0006421052631578932,57 -2020-07-16,0.066242,50 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.3015020601451397,0.038985081715509295,0.14878999069333076,0.014827042119577527,0.11390312761068344,0.7505872845649719,0.05936731165274978,0.31447573471814394,0.28431560983881354,0.5255083693191409,0.24883337691426277,0.5445212544873357,0.09429504768922926,4 diff --git a/docs/DATA/Arizona.csv b/docs/DATA/Arizona.csv index 47fd21a..8aa19d0 100644 --- a/docs/DATA/Arizona.csv +++ b/docs/DATA/Arizona.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,-0.1250777777777778,9 -2020-07-13,-0.09132947368421052,190 -2020-07-14,-0.06046603773584906,265 -2020-07-15,-0.03349191176470588,136 -2020-07-16,-0.1582727272727273,121 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.23755767598748206,0.023104903213679792,0.14403161693364383,0.0327863041497767,0.12714696507900952,0.6953953826427459,0.08358534058555961,0.2518014424294233,0.07638918530195951,0.42920216083526613,0.19713289745151996,0.47476908430457115,0.12054740577936172,25 +2020-11-04,0.7562837004661559,0.07123123854398727,0.7263697981834412,0.0308438241481781,0.3018142580986023,0.11684727668762207,0.012963643297553062,0.02107349596917629,0.1356756091117859,0.01912453770637512,0.7245832681655884,0.03115087188780308,0.02562497928738594,1 +2020-11-05,0.012433372437953949,0.00312386266887188,0.006566472817212343,0.003944657277315855,0.009704690426588058,0.9803977608680724,0.08267910033464433,0.2563155293464661,0.10452904552221297,0.8552642464637756,0.15587541460990906,0.88493812084198,0.3538698852062225,1 diff --git a/docs/DATA/Arkansas.csv b/docs/DATA/Arkansas.csv index a31a5d1..e552e8f 100644 --- a/docs/DATA/Arkansas.csv +++ b/docs/DATA/Arkansas.csv @@ -1,6 +1,3 @@ -day,sentiment,size -2020-07-12,0.10543999999999998,5 -2020-07-13,-0.06194782608695652,46 -2020-07-14,0.08924912280701756,57 -2020-07-15,-0.041186538461538455,52 -2020-07-16,-0.10162692307692309,26 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.23212537076324224,0.019104840350337327,0.19649475533515215,0.01080608763732016,0.07600330654531717,0.5888017304241657,0.03190354933030903,0.21136209834367037,0.04217066476121545,0.5217698067426682,0.23364664893597364,0.4976955056190491,0.03692121058702469,4 +2020-11-04,0.06386490166187286,0.01563972793519497,0.030792737379670143,0.010021865367889404,0.08188095688819885,0.9552139639854432,0.3341321647167206,0.8910010457038879,0.04730863124132157,0.34613150358200073,0.13874706625938416,0.6023640036582947,0.01418360974639654,1 diff --git a/docs/DATA/California.csv b/docs/DATA/California.csv index cd3edb0..6d55872 100644 --- a/docs/DATA/California.csv +++ b/docs/DATA/California.csv @@ -1,6 +1,5 @@ -day,sentiment,size -2020-07-12,-0.15645111111111107,45 -2020-07-13,-0.02706094276094276,1188 -2020-07-14,-0.012675468043899294,1549 -2020-07-15,0.052086598984771575,985 -2020-07-16,-0.001346081081081085,740 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.32113456363705073,0.05082612788220021,0.22435065616260874,0.04621642149921337,0.20560797614214774,0.6709796903824264,0.07631975741718303,0.21775756830518897,0.08474239847961475,0.449492619889365,0.24511527972803873,0.45308960745619103,0.13540763564577157,110 +2020-11-04,0.30563517639206517,0.03288842463451955,0.2680046359180576,0.09714317762862062,0.1685643567600184,0.6388972691363759,0.04396173699448506,0.17543157997230688,0.06565501934124364,0.41128147289984757,0.1825358991821607,0.4733356758952141,0.1023943962322341,9 +2020-11-05,0.40556070518990356,0.04360106851284703,0.3747098023692767,0.12697675354623547,0.23052359310289225,0.45981765538454056,0.024339797208085656,0.09428844663004081,0.10510303033515811,0.3254749371359746,0.3379930642743905,0.31143613811582327,0.04938292084261775,6 +2020-11-06,0.4733906735976537,0.058563362807035446,0.228560211447378,0.009964610682800412,0.28300620677570504,0.6705215871334076,0.0388451541463534,0.06455952736238639,0.09371282439678907,0.5216292863090833,0.27292581150929135,0.4434216767549515,0.07736938633024693,6 diff --git a/docs/DATA/Colorado.csv b/docs/DATA/Colorado.csv index 02202e2..93d4d3a 100644 --- a/docs/DATA/Colorado.csv +++ b/docs/DATA/Colorado.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,0.5363,4 -2020-07-13,-0.03581914893617022,94 -2020-07-14,0.0813233870967742,124 -2020-07-15,-0.05240933333333334,75 -2020-07-16,-0.037176811594202906,69 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.3669791527790949,0.037398156215203926,0.2675011196406558,0.03726757386903046,0.2377263533999212,0.5593474742490798,0.06194823674741201,0.1790207489975728,0.06874638347653672,0.3027458528522402,0.28660413110628724,0.3268219727324322,0.11798187723616138,16 +2020-11-04,0.4549795736869176,0.059562074641386666,0.36049628878633183,0.03504542882243792,0.30702362954616547,0.42107434074083966,0.0247716026691099,0.08588385488837957,0.15334681856135526,0.3657170968751113,0.35698168228069943,0.3556275845815738,0.022752274138232078,3 +2020-11-05,0.020194333046674732,0.004727352876216172,0.022838065400719643,0.008183152414858341,0.01522822864353657,0.9875780940055848,0.2379361391067505,0.8477628827095032,0.009316797368228436,0.8541224598884583,0.03594334051012993,0.7409178614616394,0.015243194997310637,1 diff --git a/docs/DATA/Connecticut.csv b/docs/DATA/Connecticut.csv index 405b807..ac5e6ee 100644 --- a/docs/DATA/Connecticut.csv +++ b/docs/DATA/Connecticut.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,-0.04930000000000001,2 -2020-07-13,0.03545000000000001,68 -2020-07-14,-0.04928139534883723,86 -2020-07-15,-0.05538571428571428,49 -2020-07-16,-0.105725,36 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.4249416422098875,0.04288992565125227,0.22110985638573766,0.054275984468404204,0.24531769729219377,0.6392235979437828,0.08527335745748132,0.3633726090192795,0.06873727915808558,0.20901273703202605,0.30076829274185,0.2140060537494719,0.071769725298509,8 +2020-11-04,0.020164553076028824,0.0041690189391374605,0.008491436019539833,0.0025588886346668005,0.008992798626422882,0.975445032119751,0.10016622394323348,0.6018856763839722,0.029166176915168762,0.8317875862121582,0.07176153361797333,0.7508914470672607,0.027514209970831868,1 +2020-11-05,0.3076126277446747,0.017890188843011856,0.18135808408260345,0.014399176463484762,0.08024957776069641,0.9320885539054872,0.0287961196154356,0.237002432346344,0.04590897634625435,0.9208155870437622,0.07397577166557312,0.7859705090522766,0.024026697501540184,1 diff --git a/docs/DATA/Florida.csv b/docs/DATA/Florida.csv index a5fb4a7..5dbd56e 100644 --- a/docs/DATA/Florida.csv +++ b/docs/DATA/Florida.csv @@ -1,6 +1,5 @@ -day,sentiment,size -2020-07-12,-0.0807903846153846,52 -2020-07-13,-0.047556652360515025,699 -2020-07-14,-0.012420770877944328,934 -2020-07-15,-0.021688387096774186,465 -2020-07-16,-0.050482203389830504,236 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.25652093219105154,0.028237350488780066,0.18372169153008144,0.02819305231241742,0.1476468457840383,0.7173904466908425,0.0877923269290477,0.28425522230099887,0.068319197758683,0.5142312348471023,0.23402939218794927,0.5237346212379634,0.06526403533644043,32 +2020-11-04,0.5113325210081207,0.06876867243813144,0.3133550054497189,0.09498526766482326,0.3498080393506421,0.4186457594235738,0.03229407841960589,0.12148978209329976,0.07504237970958154,0.2419493802719646,0.36800287663936615,0.24554836336109373,0.05134557601478365,9 +2020-11-05,0.3271783869713545,0.055848425836302326,0.23400724062230438,0.014475754258455709,0.2244352656416595,0.5863748025149107,0.06435455475002527,0.138499541208148,0.08949919661972672,0.4447937519289553,0.3760647177696228,0.4038089578971267,0.11228384915739298,8 +2020-11-06,0.9943521022796632,0.232911616563797,0.9423460364341736,0.4621496796607971,0.9576054215431212,0.04261334612965584,0.012336350977420809,0.018657751381397247,0.027139844372868538,0.017469106242060658,0.07325223833322525,0.01490642689168453,0.010408044792711737,1 diff --git a/docs/DATA/Georgia.csv b/docs/DATA/Georgia.csv index 015e2d4..a28a8a8 100644 --- a/docs/DATA/Georgia.csv +++ b/docs/DATA/Georgia.csv @@ -1,6 +1,3 @@ -day,sentiment,size -2020-07-12,0.1629,8 -2020-07-13,-0.016727510917030567,229 -2020-07-14,-0.0049057692307692274,312 -2020-07-15,0.05097535545023697,211 -2020-07-16,-0.04495,188 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.2133195023983717,0.028559500041107338,0.1663217172647516,0.012946579582057893,0.11400957256555558,0.697607214252154,0.07271318547427655,0.21028837139407794,0.06896446030586958,0.40920254252851007,0.2507905044903358,0.4455710994700591,0.18323441905279955,15 +2020-11-05,0.556139662861824,0.09718435537070036,0.5070638991892338,0.07622809056192636,0.43838514760136604,0.5152828395366669,0.026034176698885858,0.10340303834527731,0.05589510500431061,0.49802897963672876,0.13161974027752876,0.5032134726643562,0.051485702395439155,2 diff --git a/docs/DATA/Hawaii.csv b/docs/DATA/Hawaii.csv index 7c8870e..dc0230f 100644 --- a/docs/DATA/Hawaii.csv +++ b/docs/DATA/Hawaii.csv @@ -1,5 +1,4 @@ -day,sentiment,size -2020-07-13,-0.06683571428571429,28 -2020-07-14,0.10648372093023255,43 -2020-07-15,-0.01919677419354838,31 -2020-07-16,-0.02331666666666667,12 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.1573030687868595,0.03470570268109441,0.06291771866381168,0.004463118898759906,0.10141616656134526,0.8210318634907404,0.06524601144095261,0.15795853237311044,0.05833070445805788,0.6651051988204321,0.271047202249368,0.6588717823227247,0.07717758727570374,6 +2020-11-04,0.7704015374183655,0.1594344526529312,0.4310869574546814,0.011660306714475157,0.5693297982215881,0.14307689666748047,0.014399657025933266,0.011431162245571613,0.12418948113918304,0.01987207494676113,0.7691964507102966,0.048833418637514114,0.039936646819114685,1 +2020-11-05,0.05751895904541016,0.03616471588611603,0.03157311677932738,0.0035738237202167507,0.031843770295381546,0.6872934103012085,0.10445238649845123,0.05700751394033433,0.2872485816478729,0.09620582312345503,0.5332883596420288,0.16125935316085815,0.6454863548278809,1 diff --git a/docs/DATA/Ilinois.csv b/docs/DATA/Ilinois.csv index 07cbd61..5518a5b 100644 --- a/docs/DATA/Ilinois.csv +++ b/docs/DATA/Ilinois.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,0.048309090909090906,11 -2020-07-13,0.046224193548387096,186 -2020-07-14,0.0192776,250 -2020-07-15,-0.059910714285714296,140 -2020-07-16,-0.05345247524752475,101 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.26456337949881953,0.03461449459427968,0.20513582825272655,0.037389891513157636,0.17818810433770219,0.7400215755527219,0.05815851166456317,0.17662667653833827,0.06544744937370221,0.5476877698674798,0.2006769668466101,0.5352853536605835,0.1767291680444032,24 +2020-11-04,0.07347320020198822,0.013040672056376934,0.03006547875702381,0.003714586608111858,0.03648651763796806,0.9011728763580322,0.1585478037595749,0.33513158559799194,0.0354321226477623,0.22981394827365875,0.2204464972019196,0.2427978068590164,0.61409592628479,1 +2020-11-05,0.22857878357172012,0.022035093113247837,0.13621810916811228,0.0178099098016641,0.07551754425678935,0.761780994279044,0.060480889997312,0.2285883618252618,0.05150866122650249,0.5426243575555938,0.26374691086156027,0.5285110345908574,0.06989301075892788,7 diff --git a/docs/DATA/Indiana.csv b/docs/DATA/Indiana.csv index 8caff98..bc0e11f 100644 --- a/docs/DATA/Indiana.csv +++ b/docs/DATA/Indiana.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,0.20798571428571425,7 -2020-07-13,0.0626168,125 -2020-07-14,0.04653956834532374,139 -2020-07-15,-0.029557291666666655,96 -2020-07-16,0.003431999999999994,50 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.40088723759566036,0.029188224247523716,0.31971484129982336,0.04136579320766032,0.19722795167139598,0.7072551591055733,0.07441057158367974,0.19973611725228174,0.03291886851989797,0.4464227024997984,0.25497943588665556,0.42587042174168993,0.05141019754643951,7 +2020-11-04,0.0183711051940918,0.00522207235917449,0.018458722159266472,0.007415824569761753,0.010699091479182243,0.9903537034988404,0.17490682005882266,0.7891470789909363,0.03383982554078102,0.9364686608314514,0.04137440025806427,0.846731424331665,0.022405290976166725,1 +2020-11-05,0.6340523958206177,0.109596349298954,0.4134844169020653,0.036649624817073345,0.3911500871181488,0.3283752351999283,0.02472253516316414,0.04224316589534283,0.09081712365150452,0.12639500619843602,0.42429088056087494,0.21583358570933345,0.03287216555327177,2 diff --git a/docs/DATA/Iowa.csv b/docs/DATA/Iowa.csv index fffd097..cc61b83 100644 --- a/docs/DATA/Iowa.csv +++ b/docs/DATA/Iowa.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,0.0,2 -2020-07-13,0.08905999999999999,50 -2020-07-14,0.028878125000000008,64 -2020-07-15,0.08198787878787879,33 -2020-07-16,-0.020173333333333356,15 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.1333929831162095,0.010108367307111621,0.12354034394957124,0.00610770401544869,0.03913842933252454,0.9034671783447266,0.10632835328578949,0.3261668495833874,0.02478954754769802,0.8451491296291351,0.09548558853566647,0.8203132450580597,0.14623422361910343,2 +2020-11-05,0.1336846798658371,0.010270201601088049,0.016106449067592614,0.0047767567448318005,0.10712303966283797,0.9878392219543456,0.07553289830684662,0.3968021869659424,0.011907636187970638,0.9624367356300354,0.04255806654691696,0.9310423731803894,0.059265483170747764,1 +2020-11-06,0.07083490490913391,0.010544621385633944,0.0905625745654106,0.0062260404229164115,0.05307034030556679,0.6654537916183472,0.03984729573130608,0.1985203921794892,0.11608807742595673,0.2879045605659485,0.23216721415519714,0.3286153078079224,0.19746600091457367,1 diff --git a/docs/DATA/Louisiana.csv b/docs/DATA/Louisiana.csv index 7da9158..313e718 100644 --- a/docs/DATA/Louisiana.csv +++ b/docs/DATA/Louisiana.csv @@ -1,6 +1,3 @@ -day,sentiment,size -2020-07-12,-0.04933333333333334,3 -2020-07-13,0.022117757009345793,107 -2020-07-14,0.03703666666666667,120 -2020-07-15,0.06212187499999999,64 -2020-07-16,-0.04449111111111111,45 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.32592846142748993,0.07838294546430309,0.21039214274949497,0.06912533364569147,0.23291946492261356,0.7051056275765101,0.06060621918489536,0.14606421192487082,0.06988834532805616,0.40581800954209435,0.22486290790968472,0.44814464387794334,0.21553140826937225,9 +2020-11-04,0.07402664236724377,0.015195035375654697,0.026809629984200008,0.007222899817861616,0.05041234940290451,0.9041750431060791,0.10825670883059502,0.3854124993085861,0.072608707472682,0.6547429412603378,0.11959738004952669,0.6815503835678101,0.06370295025408268,2 diff --git a/docs/DATA/Massachusetts.csv b/docs/DATA/Massachusetts.csv index e4da2ae..e018843 100644 --- a/docs/DATA/Massachusetts.csv +++ b/docs/DATA/Massachusetts.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,-0.19034999999999996,8 -2020-07-13,-0.02958260869565217,138 -2020-07-14,-0.03279435028248587,177 -2020-07-15,-0.03377916666666666,96 -2020-07-16,0.004066153846153855,65 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.2760397113031811,0.0412726575612194,0.1610362214139766,0.008224335861288838,0.19354534501002896,0.7213294804096222,0.08821553385092153,0.14575265617006355,0.04613760569029384,0.3581150138957633,0.3213547898663415,0.3728710135651959,0.33426356439789134,9 +2020-11-04,0.33149705082178116,0.0717334443082412,0.046096532605588436,0.012021386064589024,0.2883750821153323,0.8981393178304037,0.07525414228439331,0.2542116194963455,0.02893222899486621,0.7707673013210297,0.13590834165612858,0.6713078767061234,0.03437004672984282,3 +2020-11-05,0.33167946338653564,0.07075827848166227,0.141969982534647,0.006665413035079839,0.2721281051635742,0.4284886568784714,0.03063150867819786,0.07225010171532631,0.11083826795220375,0.08870634250342846,0.5164352804422379,0.1257872022688389,0.12224962003529072,2 diff --git a/docs/DATA/Michigan.csv b/docs/DATA/Michigan.csv index 7b12c99..feb47fb 100644 --- a/docs/DATA/Michigan.csv +++ b/docs/DATA/Michigan.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,0.0667777777777778,9 -2020-07-13,-0.03230817610062892,159 -2020-07-14,0.0150261780104712,191 -2020-07-15,-0.03867570093457942,107 -2020-07-16,-0.07575243902439022,82 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.4306485391436861,0.04044264794972081,0.2923427733162848,0.025948270224034786,0.22295041991254458,0.6207165408592957,0.037508553108916834,0.14210994384036615,0.03827952958929997,0.45841637167793053,0.2934051058613337,0.425356886564539,0.11838568979874253,13 +2020-11-04,0.10762075334787367,0.021486632525920868,0.04377336800098418,0.005826405249536037,0.03948273882269858,0.6882874369621277,0.0559348464012146,0.1524781584739685,0.3170079290866852,0.470561295747757,0.3802679479122162,0.5833770036697388,0.06470614671707152,1 +2020-11-05,0.06687249429523945,0.024004002567380667,0.02126152254641056,0.00329593347851187,0.049873758107423775,0.6786491125822067,0.1144702173769474,0.25795604661107063,0.08090667566284537,0.33164845034480095,0.3867498189210892,0.3290075585246086,0.09980490431189537,2 diff --git a/docs/DATA/Minnesota.csv b/docs/DATA/Minnesota.csv index 46bb0a2..95c679f 100644 --- a/docs/DATA/Minnesota.csv +++ b/docs/DATA/Minnesota.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,0.26844999999999997,2 -2020-07-13,-0.06158235294117648,85 -2020-07-14,-0.007119491525423745,118 -2020-07-15,0.10433866666666666,75 -2020-07-16,0.058630434782608674,46 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.3764048283919692,0.03885433229152113,0.18184639303945005,0.014801646917476322,0.25861726934090257,0.7134370254352689,0.08622947835829109,0.21841106447391212,0.050006947945803404,0.4623800325207412,0.22492930106818676,0.46970861218869686,0.10734344203956424,8 +2020-11-04,0.0413842722773552,0.006018162705004215,0.012336681596934795,0.0058684004470706,0.013203449547290802,0.9906895160675048,0.07817171514034271,0.2898565828800201,0.009088992141187193,0.9703167676925659,0.02682377956807613,0.963237464427948,0.3118612766265869,1 +2020-11-05,0.4990372061729431,0.03115578182041645,0.33225229382514954,0.008486274629831314,0.2646719217300415,0.497512549161911,0.0214279294013977,0.05401114374399185,0.08147967606782912,0.349259614944458,0.2097792625427246,0.3690343201160431,0.03939482942223549,1 diff --git a/docs/DATA/Missouri.csv b/docs/DATA/Missouri.csv index c535963..4b77a9c 100644 --- a/docs/DATA/Missouri.csv +++ b/docs/DATA/Missouri.csv @@ -1,6 +1,3 @@ -day,sentiment,size -2020-07-12,0.04034444444444443,9 -2020-07-13,-0.03743599999999998,100 -2020-07-14,0.10429492753623189,138 -2020-07-15,0.0839808823529412,68 -2020-07-16,-0.1482304347826087,46 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.29501753699566635,0.020211786297815188,0.28876675186412676,0.023534920997917656,0.0985902499939714,0.8240249710423606,0.07762801248048032,0.20218016899057797,0.11708272421466452,0.5037246181496552,0.2685865099940981,0.47153062745928764,0.29214691556990147,7 +2020-11-05,0.029955287463963032,0.004580587497912347,0.017848209012299776,0.004790026694536209,0.012887638993561271,0.9655819237232208,0.03336836397647858,0.2099008709192276,0.02250378392636776,0.902782529592514,0.07028426602482796,0.8176040947437286,0.1407734379172325,2 diff --git a/docs/DATA/Nevada.csv b/docs/DATA/Nevada.csv index 32b1c65..4228752 100644 --- a/docs/DATA/Nevada.csv +++ b/docs/DATA/Nevada.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,-0.45019999999999993,4 -2020-07-13,-0.11314495412844038,109 -2020-07-14,-0.047435915492957746,142 -2020-07-15,-0.016067889908256885,109 -2020-07-16,-0.21941911764705882,68 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.2866919315420091,0.03543233242817223,0.1433218540623784,0.00928332851617597,0.21587930689565837,0.6842488870024681,0.04898360837250948,0.1355501231737435,0.05455237627029419,0.48317344998940825,0.2736857463605702,0.4426499749533832,0.08139890874736011,8 +2020-11-04,0.3203414771705866,0.038331806659698486,0.2584290988743305,0.012536672409623861,0.16615197248756886,0.6051302552223206,0.04856245033442974,0.12894634809345007,0.21898967027664185,0.5112454257905483,0.3193604424595833,0.494944978505373,0.07979198638349771,2 +2020-11-05,0.07183542102575302,0.008673295378685,0.06411527097225189,0.004961665719747543,0.03368192911148071,0.7362236380577087,0.04359735921025276,0.12372810393571855,0.04634736478328705,0.5472456812858582,0.1576923131942749,0.4461484849452973,0.060406200587749474,1 diff --git a/docs/DATA/New Hampshire.csv b/docs/DATA/New Hampshire.csv index 62abe8e..daeae64 100644 --- a/docs/DATA/New Hampshire.csv +++ b/docs/DATA/New Hampshire.csv @@ -1,6 +1,2 @@ -day,sentiment,size -2020-07-12,-0.34,1 -2020-07-13,-0.05323750000000001,40 -2020-07-14,-0.014979487179487183,39 -2020-07-15,-0.033265,20 -2020-07-16,0.051962499999999995,16 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.42120628866056603,0.06429180324388047,0.35082721694683033,0.05571345488230387,0.2650855545264979,0.5442727555831274,0.06382287984403472,0.06847683417921265,0.1450206326941649,0.28044481109827757,0.2920422585060199,0.3096103662004073,0.3284599802767237,6 diff --git a/docs/DATA/New Jersey.csv b/docs/DATA/New Jersey.csv index c24bff6..43e9b60 100644 --- a/docs/DATA/New Jersey.csv +++ b/docs/DATA/New Jersey.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,0.14122499999999996,12 -2020-07-13,-0.006141520467836259,171 -2020-07-14,0.003696190476190476,210 -2020-07-15,0.06935952380952382,126 -2020-07-16,-0.03596052631578947,76 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.2727145543321967,0.028263779434685905,0.21759776049293578,0.05155382334487513,0.12674820066119233,0.7451030984520912,0.05603455472737551,0.22883758383492628,0.06371535593643785,0.5975386809247235,0.22936355415731668,0.5688324157769481,0.0724095778229336,12 +2020-11-04,0.044858310371637344,0.007531887385994195,0.04793759807944298,0.005957072135061026,0.021540002897381786,0.9891403913497924,0.14251956343650818,0.914500892162323,0.045999657362699516,0.8052092790603638,0.03249102085828781,0.7553545236587524,0.0363805927336216,1 +2020-11-05,0.5698960572481155,0.08755353093147279,0.4152383953332901,0.0248777037486434,0.45674876868724823,0.3186882957816124,0.019092564471065998,0.07653854042291641,0.075270751491189,0.07275768741965294,0.46525922417640686,0.07144658174365759,0.04582978133112192,2 diff --git a/docs/DATA/New York.csv b/docs/DATA/New York.csv index 8aed555..149446b 100644 --- a/docs/DATA/New York.csv +++ b/docs/DATA/New York.csv @@ -1,6 +1,5 @@ -day,sentiment,size -2020-07-12,0.10117073170731704,41 -2020-07-13,-0.02221574074074074,540 -2020-07-14,-0.00193256484149856,694 -2020-07-15,0.014899455040871938,367 -2020-07-16,0.08066061946902654,226 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.24949510047833126,0.03303052540868521,0.15077878767624497,0.01683967364175866,0.16039460847775142,0.7458089679479599,0.07936350979531805,0.18538662244876225,0.053916403837502,0.5228315405547619,0.21974518684049446,0.5386026264478763,0.19375461392725507,30 +2020-11-04,0.04001453146338463,0.014478554017841816,0.008262543939054012,0.001996527425944805,0.016820482909679413,0.9044698476791382,0.09862039238214493,0.3235032558441162,0.04438399150967598,0.7344487905502319,0.1258396953344345,0.7646641135215759,0.03336155042052269,1 +2020-11-05,0.1001627625276645,0.015515745927890142,0.061314632184803486,0.005056623990337054,0.05139712647845348,0.7973077297210693,0.10194005196293195,0.25609998653332394,0.04725164951135715,0.6322567947208881,0.20603895466774702,0.6483588640888532,0.1905043963342905,6 +2020-11-06,0.0883687362074852,0.007734755054116248,0.025155620649456967,0.008400487713515759,0.03903059288859368,0.9913604855537416,0.1967044174671173,0.8311027288436891,0.008526196703314781,0.9045591354370116,0.046217843890190125,0.823433518409729,0.02958408929407597,1 diff --git a/docs/DATA/North Carolina.csv b/docs/DATA/North Carolina.csv index 2a06612..35eb176 100644 --- a/docs/DATA/North Carolina.csv +++ b/docs/DATA/North Carolina.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,-0.31113749999999996,8 -2020-07-13,0.018556043956043957,182 -2020-07-14,0.11740041493775934,241 -2020-07-15,0.050519631901840494,163 -2020-07-16,-0.04972000000000001,75 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.5905693223079046,0.06721028468261163,0.44421829003840685,0.020018213234531384,0.4165642677495877,0.30430606628457707,0.04536593371691803,0.1112101503337423,0.08234575157985091,0.11733814577261607,0.4925016363461812,0.1643358919148644,0.04164952722688516,6 +2020-11-04,0.26366672199219465,0.037413881936421,0.2081147632561624,0.037243140822586916,0.1433820977496604,0.6394386154909929,0.049586998453984656,0.18457482941448689,0.07472855504602194,0.47639329607288045,0.2753028105944395,0.4925689647595088,0.07012381373594205,6 +2020-11-05,0.6522671580314636,0.04367416724562645,0.2640852630138397,0.005667489022016525,0.5057918429374695,0.2937978208065033,0.017641885206103325,0.031892064958810806,0.05995210632681847,0.0432773157954216,0.4050977826118469,0.08679478615522385,0.01255789864808321,1 diff --git a/docs/DATA/Ohio.csv b/docs/DATA/Ohio.csv index 212bb6a..dfc9a8e 100644 --- a/docs/DATA/Ohio.csv +++ b/docs/DATA/Ohio.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,-0.023946666666666665,15 -2020-07-13,-0.007257476635514026,214 -2020-07-14,-0.052589015151515144,264 -2020-07-15,0.0356721052631579,190 -2020-07-16,-0.007229059829059837,117 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.29896986471827736,0.04190591856075281,0.14805281462825157,0.016714217342351647,0.1968802334740758,0.6786946363069795,0.07527456322515552,0.2142172341488979,0.10815718863159418,0.4620605105195533,0.2822632117366249,0.49128105101937597,0.18149132603271442,22 +2020-11-04,0.4322092831134796,0.03905314579606056,0.2705475054681301,0.012336047133430839,0.2725895866751671,0.6413806080818176,0.025032932870090008,0.0694548487663269,0.05737726669758558,0.5076537914574146,0.269286323338747,0.4877282455563545,0.1652972903102636,2 +2020-11-05,0.08354133926331997,0.016096678096801043,0.07360222842544319,0.011594315059483051,0.03736303746700287,0.5954001918435097,0.03782085794955492,0.25659101642668247,0.0954863429069519,0.5169603787362576,0.17400567978620526,0.4947385042905808,0.020105944946408272,2 diff --git a/docs/DATA/Oklahoma.csv b/docs/DATA/Oklahoma.csv index 6103f04..a73fc1e 100644 --- a/docs/DATA/Oklahoma.csv +++ b/docs/DATA/Oklahoma.csv @@ -1,6 +1,3 @@ -day,sentiment,size -2020-07-12,-0.49,1 -2020-07-13,-0.03771785714285714,56 -2020-07-14,-0.049616666666666656,114 -2020-07-15,-0.021383098591549306,71 -2020-07-16,0.012051515151515145,66 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.1477663889527321,0.022242305800318718,0.065678964368999,0.004587358882417903,0.1023437436670065,0.8993495255708694,0.06155466847121715,0.0792778106406331,0.028796524740755558,0.4785703159868718,0.2248607985675335,0.5495398715138435,0.4336628336459398,4 +2020-11-05,0.2791905403137207,0.034177649766206734,0.21949520707130432,0.012554931454360485,0.21847625076770785,0.2731767296791077,0.018384028226137158,0.05614161863923073,0.08714407682418822,0.09526090323925017,0.35984158515930176,0.1781417280435562,0.019812509417533875,1 diff --git a/docs/DATA/Oregon.csv b/docs/DATA/Oregon.csv index 995d820..daa96d7 100644 --- a/docs/DATA/Oregon.csv +++ b/docs/DATA/Oregon.csv @@ -1,6 +1,2 @@ -day,sentiment,size -2020-07-12,0.12366666666666666,6 -2020-07-13,-0.004040206185567008,97 -2020-07-14,-0.06045765765765767,111 -2020-07-15,-0.016810280373831782,107 -2020-07-16,-0.06701194029850749,67 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.3103470553954442,0.031520494570334755,0.1853221133351326,0.03437355993729499,0.14988094640688765,0.7411392347680198,0.056507445561389126,0.21119742592175803,0.039042267172286906,0.6057435091998842,0.21578983838359514,0.5689535505241818,0.06967910762048429,9 diff --git a/docs/DATA/Pensilvania.csv b/docs/DATA/Pensilvania.csv index 7ca53cc..0dd808d 100644 --- a/docs/DATA/Pensilvania.csv +++ b/docs/DATA/Pensilvania.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,-0.15682222222222225,9 -2020-07-13,-0.010927741935483869,155 -2020-07-14,-0.03341967213114754,244 -2020-07-15,-0.09797852760736198,163 -2020-07-16,-0.038158823529411766,85 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.39974173402879387,0.047020108468132094,0.34628934442298487,0.026737208128906786,0.20709221909055486,0.49592340807430446,0.09054718402330764,0.1785487028537318,0.09981512930244207,0.2233074092073366,0.3773421938531101,0.26476822048425674,0.1380727599025704,16 +2020-11-04,0.18496670201420784,0.036324450901399054,0.09507363041241963,0.012547585181891918,0.12172836344689131,0.5618181427319845,0.15130366136630377,0.38847072919209796,0.10149402171373367,0.13511928046743074,0.2735749570031961,0.2566513742009799,0.03693141912420591,3 +2020-11-05,0.5001329034566879,0.08236178383231163,0.4152394197881222,0.024301677360199392,0.39646780863404274,0.3770449198782444,0.048789138440042734,0.05356716178357601,0.11404168978333473,0.06494025141000748,0.4531261771917343,0.10511394217610359,0.243684945628047,2 diff --git a/docs/DATA/Rhode Island.csv b/docs/DATA/Rhode Island.csv index 712bb06..1672dc1 100644 --- a/docs/DATA/Rhode Island.csv +++ b/docs/DATA/Rhode Island.csv @@ -1,5 +1,2 @@ -day,sentiment,size -2020-07-13,0.22779285714285713,14 -2020-07-14,-0.11217428571428573,35 -2020-07-15,0.045525,24 -2020-07-16,-0.06339999999999998,10 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.3562235401943326,0.05555573529563844,0.24425851805135607,0.06194998461287469,0.2924958044663072,0.7304949760437012,0.058424109034240244,0.12249785363674164,0.10935683944262564,0.5135649795643985,0.2285862047225237,0.5158318689092993,0.24386627990752457,10 diff --git a/docs/DATA/Tennessee.csv b/docs/DATA/Tennessee.csv index 0474e7e..be86c0b 100644 --- a/docs/DATA/Tennessee.csv +++ b/docs/DATA/Tennessee.csv @@ -1,6 +1,3 @@ -day,sentiment,size -2020-07-12,-0.19394000000000003,10 -2020-07-13,0.06709454545454546,165 -2020-07-14,-0.017867659574468085,235 -2020-07-15,-0.03731363636363637,110 -2020-07-16,0.0854257731958763,97 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.17734362946992574,0.033298659985038365,0.06120210174809803,0.008209742753851142,0.16288184975697237,0.8775474429130554,0.09817996265536005,0.22298123941502787,0.04112972179427743,0.6445050400427796,0.1503368572077968,0.6153793226588856,0.2045568132942373,11 +2020-11-05,0.014690713025629519,0.006082680076360703,0.016155600547790527,0.003354669548571109,0.011880803853273392,0.9881938695907592,0.09716641157865524,0.3346337378025055,0.007794736418873072,0.9674357175827026,0.04712288454174995,0.905606746673584,0.04178175702691078,1 diff --git a/docs/DATA/Texas.csv b/docs/DATA/Texas.csv index 00ca1f0..994eb5d 100644 --- a/docs/DATA/Texas.csv +++ b/docs/DATA/Texas.csv @@ -1,6 +1,5 @@ -day,sentiment,size -2020-07-12,0.18467500000000003,36 -2020-07-13,-0.018557306590257876,698 -2020-07-14,0.008012913553895417,937 -2020-07-15,0.06737764298093588,577 -2020-07-16,-0.0506274193548387,372 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.31179834039150073,0.05335191594037626,0.22948083943421288,0.03177723999085304,0.2061874216389177,0.6783475199980395,0.06629626680764236,0.21238762489520013,0.08193361912188786,0.48846165425077614,0.2326921086891421,0.479733399797364,0.1185830259429557,56 +2020-11-04,0.4136319164186716,0.04414625125937164,0.29265895979478956,0.09400031429249792,0.27719351565465333,0.6222476921975613,0.06087350235320628,0.27678186222910883,0.08934869132936,0.36064473651349543,0.2340444277971983,0.385146994702518,0.039141710568219426,10 +2020-11-05,0.18780494139840206,0.024710624090706308,0.17613214378555617,0.0770431073421302,0.10939643710541229,0.8185851667076349,0.068519558912764,0.21611599003275236,0.05488298134878278,0.7063199177694818,0.11743727574745814,0.7326980202148358,0.2314596464857459,6 +2020-11-06,0.08360745012760162,0.016258575022220608,0.04392597824335098,0.0038032419979572296,0.0563390925526619,0.9722236394882202,0.06594023108482361,0.2700243890285492,0.022910742089152336,0.9650841355323792,0.07830429822206497,0.891670823097229,0.04918128252029419,1 diff --git a/docs/DATA/Virginia.csv b/docs/DATA/Virginia.csv index 80502da..b1e004d 100644 --- a/docs/DATA/Virginia.csv +++ b/docs/DATA/Virginia.csv @@ -1,6 +1,5 @@ -day,sentiment,size -2020-07-12,0.1377545454545455,11 -2020-07-13,-0.03370496894409937,161 -2020-07-14,-0.11400969162995594,227 -2020-07-15,-0.0057849624060150396,133 -2020-07-16,0.04900833333333332,84 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.11516190692782402,0.019159846085434157,0.037961460960408054,0.007813693179438511,0.11899128090590239,0.9301064908504486,0.09273471310734749,0.24482717116673788,0.06664697437857588,0.8727378646532694,0.1316689302523931,0.8339523673057556,0.1003842627008756,6 +2020-11-04,0.8148767352104187,0.06754257157444954,0.7631795108318329,0.02877083793282509,0.38489942252635956,0.1922166794538498,0.011953257955610752,0.051746753975749016,0.17784768529236317,0.050618790090084076,0.565017819404602,0.061523297801613815,0.014065748080611229,2 +2020-11-05,0.3575411941856146,0.012291342718526721,0.3410279583185911,0.01842431555269286,0.10208889283239841,0.8913592100143433,0.10103473067283632,0.49559883773326874,0.05168268457055092,0.6472961604595184,0.23622479289770126,0.581752672791481,0.02830171957612038,2 +2020-11-06,0.8636137843132019,0.040007680654525764,0.6578770279884338,0.02305883914232254,0.30819785594940186,0.42112135887146,0.024091046303510662,0.122372567653656,0.08129870891571045,0.2104240357875824,0.6281933784484863,0.1878235042095185,0.031232865527272224,1 diff --git a/docs/DATA/Washington.csv b/docs/DATA/Washington.csv index bf73564..5c12031 100644 --- a/docs/DATA/Washington.csv +++ b/docs/DATA/Washington.csv @@ -1,6 +1,4 @@ -day,sentiment,size -2020-07-12,-0.0267,6 -2020-07-13,-0.04552896551724137,145 -2020-07-14,-0.021117412935323383,201 -2020-07-15,0.04470708661417324,127 -2020-07-16,-0.04006521739130435,115 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.37925146316626557,0.04638053298306962,0.2874599786958209,0.08541549091158365,0.22812556568533182,0.6707992770329669,0.05400596879836586,0.16988353893436767,0.09240524505299551,0.5325121160389649,0.2561032072537475,0.513429316105666,0.08489911756650717,27 +2020-11-04,0.17574904362360635,0.03521699830889702,0.06994017989685138,0.004958752387513717,0.13549510141213736,0.656789168715477,0.09878545999526978,0.12775485465923944,0.07175746839493513,0.3534865602850914,0.3948871393998464,0.31880892813205713,0.3040543422102928,3 +2020-11-05,0.2893146065374215,0.028905972838401794,0.24227005677918592,0.14924512656095126,0.1304660119737188,0.7050752962629,0.037148680382718645,0.13309886989494166,0.03457685373723507,0.5763773638755083,0.17805999579528967,0.5628751671562592,0.0318237499644359,6 diff --git a/docs/DATA/Wisconsin.csv b/docs/DATA/Wisconsin.csv index 9e23391..cc3dbb2 100644 --- a/docs/DATA/Wisconsin.csv +++ b/docs/DATA/Wisconsin.csv @@ -1,6 +1,3 @@ -day,sentiment,size -2020-07-12,0.030499999999999996,9 -2020-07-13,0.026413924050632913,79 -2020-07-14,-0.023815463917525773,97 -2020-07-15,0.010784444444444449,45 -2020-07-16,-0.12386808510638299,47 +day,positive,trust,joy,love,optimism,negative,pessimism,sadness,surprise,anger,anticipation,disgust,fear,size +2020-11-03,0.35824910054604214,0.052772595857580505,0.328968729202946,0.03301657074674343,0.26106811842570704,0.5739962297181288,0.049683348120500646,0.15134682211404046,0.12492894257108371,0.41841272233674925,0.26444069668650627,0.4404254040370385,0.08336705761030316,6 +2020-11-04,0.11114771726230781,0.018477432429790497,0.029384758323431015,0.004086107248440385,0.12010504739979903,0.9262698491414388,0.10588141654928525,0.16300296410918239,0.04672203740725916,0.7886518836021423,0.18923604115843773,0.7310778101285299,0.289490374426047,3 diff --git a/docs/index.html b/docs/index.html index 0617418..8c9e519 100644 --- a/docs/index.html +++ b/docs/index.html @@ -31,8 +31,8 @@
diff --git a/dump.py b/dump.py index 6aeb400..f9f016a 100644 --- a/dump.py +++ b/dump.py @@ -50,13 +50,14 @@ def aggregate_n_dump(lan, config, days, country_code): negative_emotions = ['negative', 'pessimism', 'sadness', 'surprise', 'anger', 'anticipation', 'disgust', 'fear'] for sentiment in positive_emotions+negative_emotions: places[sentiment] = tweets.groupby(["day", "state"])[sentiment].apply(lambda x: np.mean([float(s) for s in x])) - places["size"] = tweets.groupby(["day", "state"]).sentiment.apply(lambda x: len(x)) + places["size"] = tweets.groupby(["day", "state"]).positive.apply(lambda x: len(x)) for abbr in state_map: state = state_map[abbr] if state in places.index.get_level_values(1): places.xs(state, level=1).reset_index().to_csv("docs/DATA/"+state+".csv", index=False) + state_map = {"NV": "Nevada", "TX": "Texas", "FL": "Florida", diff --git a/twitter/sentiment.py b/twitter/sentiment.py index 4e271ae..9319181 100644 --- a/twitter/sentiment.py +++ b/twitter/sentiment.py @@ -1,14 +1,29 @@ # https://github.com/cjhutto/vaderSentiment from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer +from transformers import RobertaTokenizer +import torch +from bert import score, process_tweet, model_path, EMOTIONS, BERTClass +from torch import cuda +device = 'cuda' if cuda.is_available() else 'cpu' +tokenizer = RobertaTokenizer.from_pretrained("roberta-large") analyzer = SentimentIntensityAnalyzer() +model = BERTClass(num_of_cols=len(EMOTIONS)).to(device) +model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage)) + def analyse(tweet): return analyzer.polarity_scores(tweet) +def analyse_with_roberta(tweet): + processed_tweet = process_tweet(tweet_text=tweet, tokenizer=tokenizer, max_len=100) + scores = score(model, processed_tweet) + return scores + + def analyse_per_language(tweet, lan): """ returns the sentiment of a tweet. @@ -18,7 +33,7 @@ def analyse_per_language(tweet, lan): :return: """ if lan == 'en': - return analyse(tweet) + return analyse_with_roberta(tweet) else: - return {'compound': 0.5} + return {x: 0.5 for x in EMOTIONS} diff --git a/twitter/stream.py b/twitter/stream.py index 60aecbd..38b569b 100644 --- a/twitter/stream.py +++ b/twitter/stream.py @@ -37,6 +37,12 @@ def __init__(self, lan: str, self.country = [] self.country_code = [] self.full_name = [] + + #### emotions + self.anger, self.anticipation, self.disgust, self.fear, self.joy = [], [], [], [], [] + self.love, self.optimism, self.pessimism, self.sadness, self.surprise = [], [], [], [], [] + self.trust, self.positive, self.negative = [], [], [] + self.handler = handler self.update_data_size = update_data_size self.max_size = max_size @@ -59,7 +65,7 @@ def on_status(self, status): lang = detect(txt) if lang == self.lan and txt not in self.texts: self.locations.append(user_location) - self.sentiments.append(analyse_per_language(txt, self.lan)["compound"]) + # self.sentiments.append(analyse_per_language(txt, self.lan)["compound"]) self.created_at.append(created_at) self.texts.append(txt) self.retweet.append(is_retweet) @@ -77,15 +83,30 @@ def on_status(self, status): self.full_name.append(None) if self.get_size_of_data() % self.update_data_size == 0: self.dump_data() - except: - print(f"Could not detect the language for: {txt}") + scores = analyse_per_language(txt, lang) + self.anticipation.append(scores['anticipation']) + self.anger.append(scores['anger']) + self.disgust.append(scores['disgust']) + self.fear.append(scores['fear']) + self.joy.append(scores['joy']) + self.love.append(scores['love']) + self.optimism.append(scores['optimism']) + self.pessimism.append(scores['pessimism']) + self.sadness.append(scores['sadness']) + self.surprise.append(scores['surprise']) + self.trust.append(scores['trust']) + self.positive.append(scores['positive']) + self.negative.append(scores['negative']) + except Exception as ex: + print(ex) #todo: add to logger def get_size_of_data(self): return len(self.texts) def get_last_results(self, num_of_results=10): - return {'sentiment': self.sentiments[-num_of_results:], + return { + # 'sentiment': self.sentiments[-num_of_results:], 'tweet_id': self.tweet_ids[-num_of_results:], 'text': self.texts[-num_of_results:], 'user_location': self.locations[-num_of_results:], @@ -95,7 +116,21 @@ def get_last_results(self, num_of_results=10): 'place_type': self.place_types[-num_of_results:], 'country': self.country[-num_of_results:], 'country_code': self.country_code[-num_of_results:], - 'full_name': self.full_name[-num_of_results:]} + 'full_name': self.full_name[-num_of_results:], + 'anger': self.anger[-num_of_results:], + 'anticipation': self.anticipation[-num_of_results:], + 'disgust': self.disgust[-num_of_results:], + 'fear': self.fear[-num_of_results:], + 'joy': self.joy[-num_of_results:], + 'love': self.love[-num_of_results:], + 'optimism': self.optimism[-num_of_results:], + 'pessimism': self.pessimism[-num_of_results:], + 'sadness': self.sadness[-num_of_results:], + 'surprise': self.surprise[-num_of_results:], + 'trust': self.trust[-num_of_results:], + 'positive': self.positive[-num_of_results:], + 'negative': self.negative[-num_of_results:] + } def dump_data(self): buffered_data = self.get_last_results(num_of_results=self.update_data_size) @@ -120,14 +155,19 @@ def init_lists(self): self.country = [] self.country_code = [] self.full_name = [] + self.anger, self.anticipation, self.disgust, self.fear, self.joy = [], [], [], [], [] + self.love, self.optimism, self.pessimism, self.sadness, self.surprise = [], [], [], [], [] + self.trust, self.positive, self.negative = [], [], [] def empty_lists(self): """ empties the lists, calls the garbage collector and re-initialize the lists :return: """ - del self.texts, self.sentiments, self. locations, self.created_at, self.users, \ - self.retweet, self.tweet_ids, self.place_types, self.country, self.country_code, self.full_name + del self.texts, self.sentiments, self. locations, self.created_at, self.users + del self.retweet, self.tweet_ids, self.place_types, self.country, self.country_code, self.full_name + del self.anger, self.anticipation, self.disgust, self.fear, self.joy, self.love, self.optimism + del self.pessimism, self.sadness, self.surprise, self.trust, self.positive, self.negative gc.collect() self.init_lists()