diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 0000000..e69de29 diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 0000000..5197305 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index eefca6a..712ff5d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,29 +1,29 @@ -name: twitter-data-analysis - -on: - push: - branches: [main] - pull_request: - branches: [main] - -permissions: - contents: read - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Test with pytest - run: | - python -m pytest +name: twitter-data-analysis + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Test with pytest + run: | + python -m pytest diff --git a/.gitignore b/.gitignore index 54e6782..7081366 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ -__pycache__/ -data/ \ No newline at end of file +__pycache__/ +data/ +.ipynb_checkpoints diff --git a/LICENSE b/LICENSE index a13471e..e3b94af 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,21 @@ -MIT License - -Copyright (c) 2022 10 Academy - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +MIT License + +Copyright (c) 2022 10 Academy + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index ba4e845..b64ff6b 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,29 @@ -# Twitter-Data-Analysis - -### So here are the bare minimum requirement for completing this task - -1. Use this template to create a repository called Twitter-Data-Analysis in your github account. See ["Creating a repository from a template."](https://docs.github.com/en/articles/creating-a-repository-from-a-template) for more information. -2. [Download](https://drive.google.com/drive/folders/19G8dmehf9vU0u6VTKGV-yWsQOn3IvPsd) and extract the necessary data and put it in the data directory. The data should not not be added to git tracking. -3. Create a branch called “bugfix” to fix the bugs in the fix_clean_tweets_dataframe.py and fix_extract_dataframe.py -4. In branch “bugfix” use the git mv command to rename fix_clean_tweets_dataframe.py to clean_tweets_dataframe.py and fix_extract_dataframe.py to extract_dataframe.py -5. Fix the bugs on clean_tweets_dataframe.py and extract_dataframe.py -6. Multiple times, push the code you are working on to git, and once the fix is complete, merge the fix_bug branch to main branch -7. Create a new branch called “testing” for updating the unit tests in the test/ folder to be applicable to the code you fixed. - a. Build your unit and integration tests to run on small data (< 1 MB) that you copied from what is provided - avoid pushing large data to github - b. Think about the key elements (units can be functions, classes, or modules; multiple of them working together to accomplish a task requires integration testing) of the code base you are working on. Write the following - - Unit tests: for individual key functions and classes - - Integration tests: for the integration of multiple units working together -8. After completing the unit and integration tests, merge the “testing” branch with the main branch -9. In all cases when you merge, make sure you first do Pull Request, review, then accept the merge. -10. Use github actions in your repository such that when you git push new code (or merge a branch) to the main branch, the unit test in tests/*.py runs automatically. All tests should pass. - - -After Completing this Challenge, you would have explore - -- Unittesting -- Modular Coding -- Software Engineering Best Practices -- Python Package Structure -- Bug Fix (Debugging) - -Have Fun and Cheers +# Twitter-Data-Analysis + +### So here are the bare minimum requirement for completing this task + +1. Use this template to create a repository called Twitter-Data-Analysis in your github account. See ["Creating a repository from a template."](https://docs.github.com/en/articles/creating-a-repository-from-a-template) for more information. +2. [Download](https://drive.google.com/drive/folders/19G8dmehf9vU0u6VTKGV-yWsQOn3IvPsd) and extract the necessary data and put it in the data directory. The data should not not be added to git tracking. +3. Create a branch called “bugfix” to fix the bugs in the fix_clean_tweets_dataframe.py and fix_extract_dataframe.py +4. In branch “bugfix” use the git mv command to rename fix_clean_tweets_dataframe.py to clean_tweets_dataframe.py and fix_extract_dataframe.py to extract_dataframe.py +5. Fix the bugs on clean_tweets_dataframe.py and extract_dataframe.py +6. Multiple times, push the code you are working on to git, and once the fix is complete, merge the fix_bug branch to main branch +7. Create a new branch called “testing” for updating the unit tests in the test/ folder to be applicable to the code you fixed. + a. Build your unit and integration tests to run on small data (< 1 MB) that you copied from what is provided - avoid pushing large data to github + b. Think about the key elements (units can be functions, classes, or modules; multiple of them working together to accomplish a task requires integration testing) of the code base you are working on. Write the following + - Unit tests: for individual key functions and classes + - Integration tests: for the integration of multiple units working together +8. After completing the unit and integration tests, merge the “testing” branch with the main branch +9. In all cases when you merge, make sure you first do Pull Request, review, then accept the merge. +10. Use github actions in your repository such that when you git push new code (or merge a branch) to the main branch, the unit test in tests/*.py runs automatically. All tests should pass. + + +After Completing this Challenge, you would have explore + +- Unittesting +- Modular Coding +- Software Engineering Best Practices +- Python Package Structure +- Bug Fix (Debugging) + +Have Fun and Cheers diff --git a/clean_tweets_dataframe.py b/clean_tweets_dataframe.py new file mode 100644 index 0000000..49f6602 --- /dev/null +++ b/clean_tweets_dataframe.py @@ -0,0 +1,143 @@ +import re +import pandas as pd +from defaults import * + +class Clean_Tweets: + """ + The PEP8 Standard AMAZING!!! + """ + def __init__(self, df:pd.DataFrame): + self.df = df + print('Automation in Action...!!!') + + def drop_unwanted_column(self, df:pd.DataFrame)->pd.DataFrame: + """ + remove rows that has column names. This error originated from + the data collection stage. + """ + unwanted_rows = self.df[self.df['retweet_count'] == 'retweet_count' ].index + self.df.drop(unwanted_rows , inplace=True) + self.df = self.df[self.df['polarity'] != 'polarity'] + return df + + def drop_duplicate(self, df:pd.DataFrame)->pd.DataFrame: + """ + drop duplicate rows + """ + self.df.drop_duplicates(subset='original_text', inplace=True) + return df + + def convert_to_datetime(self, df:pd.DataFrame)->pd.DataFrame: + """ + convert column to datetime + """ + self.df['created_at'] = pd.to_datetime(self.df['created_at'], errors='coerce') + return df + + def convert_to_numbers(self, df:pd.DataFrame)->pd.DataFrame: + """ + convert columns like polarity, subjectivity, retweet_count + favorite_count etc to numbers + """ + self.df['id'] = pd.to_numeric(self.df['id'], errors='coerce') + self.df['subjectivity'] = pd.to_numeric(self.df['subjectivity'], + errors='coerce') + self.df['listed_count'] = pd.to_numeric(self.df['listed_count'], + errors='coerce') + self.df['retweet_count'] = pd.to_numeric(self.df['retweet_count'], + errors='coerce') + self.df['friends_count'] = pd.to_numeric(self.df['friends_count'], + errors='coerce') + self.df['favorite_count'] = pd.to_numeric(self.df['favorite_count'], + errors='coerce') + self.df['statuses_count'] = pd.to_numeric(self.df['statuses_count'], + errors='coerce') + self.df['followers_count'] = pd.to_numeric(self.df['followers_count'], + errors='coerce') + self.df['polarity'] = pd.to_numeric(self.df['polarity'], + errors='coerce') + return df + + def remove_non_english_tweets(self, df:pd.DataFrame)->pd.DataFrame: + """ + remove non english tweets from lang + """ + self.df.query("lang == 'en'", inplace=True) + return df + + def drop_nulls(self, df: pd.DataFrame) -> pd.DataFrame: + """ + drop nulls + """ + self.df = self.df.dropna(axis=0, how='any', inplace=False) + return df + + def find_hashtags(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Method to find hashtags from tweets + This function will extract hashtags + """ + self.df = re.findall('(#[A-Za-z]+[A-Za-z0-9-_]+)', df) + return df + + def text_category(self, series: pd.Series) -> list: + """ + function that return positive, negative or neutral based on polarity + """ + polarities = [] + for pol in series: + if pol >= 0.00000000001: + polarities.append("positive") + elif pol == 0.00000000000: + polarities.append("neutral") + elif pol <= -0.00000000001: + polarities.append("negative") + else: + polarities.append('UNK') + return polarities + + def fill_missing(self, df: pd.DataFrame, column: str, value): + """ + fill null values of a specific column with the provided value + """ + + df[column] = df[column].fillna(value) + + return df + + def replace_empty_string(self, df:pd.DataFrame, column: str, value: str): + """ + replace empty strings in a specific column with the provided value + """ + + df[column] = df[column].apply(lambda x: value if x == "" else x) + + return df + + def remove_characters(self, df: pd.DataFrame, column: str): + """ + removes non-alphanumeric characters with the exception of underscore hyphen and space + from the specified column + """ + + df[column] = df[column].apply(lambda text: re.sub("[^a-zA-Z0-9\s_-]", "", text)) + + return df + + def extract_device_name(self, source: str): + """ + returns device name from source text + """ + res = re.split('<|>', source)[2].strip() + return + +if __name__ == "__main__": + """ + read the twitter dataset and Pass the data to the Clean_Tweets + class + """ + global_tweet_df = pd.read_json(global_data, lines=True) + global_cleaner = Clean_Tweets(global_tweet_df) + + african_tweet_df = pd.read_json(african_data, lines=True) + african_cleaner = Clean_Tweets(african_tweet_df) diff --git a/defaults.py b/defaults.py new file mode 100644 index 0000000..c36af2f --- /dev/null +++ b/defaults.py @@ -0,0 +1,16 @@ +""" +A script to store all default paths and strings. +""" + +# the global data set +global_data = 'data/global_twitter_data.json' + +# the processed global data set +processed_global_data = 'data/processed_global_tweet_data.json' + + +# the african data set +african_data = 'data/africa_twitter_data.json' + +# the processed african data set +processed_african_data = 'data/processed_africa_tweet_data.json' diff --git a/extract_dataframe.py b/extract_dataframe.py new file mode 100644 index 0000000..add5256 --- /dev/null +++ b/extract_dataframe.py @@ -0,0 +1,277 @@ +import json +import pandas as pd +import numpy as np +from textblob import TextBlob +from defaults import * + + +def read_json(json_file: str) -> list: + """ + json file reader to open and read json files into a list + Args: + ----- + json_file: str - path of a json file + + Returns + ------- + length of the json file and a list of json + """ + + tweets_data = [] + for tweets in open(json_file, 'r'): + tweets_data.append(json.loads(tweets)) + return len(tweets_data), tweets_data + +class TweetDfExtractor: + """ + this function will parse tweets json into a pandas dataframe + + Return + ------ + dataframe + """ + def __init__(self, tweets_list): + """ + The initializer for the TweetDf Extractor class + """ + self.tweets_list = tweets_list + + def find_statuses_count(self)->list: + """ + an example function + """ + statuses_count = [x['user']['statuses_count'] + for x in self.tweets_list] + return statuses_count + + def find_full_text(self)->list: + """ + a function to find and return full text of a twit from a dataframe + """ + text = [] + for x in self.tweets_list: + try: + text.append(x['full_text']) + except KeyError: + #text.append(x['text']) + text.append('NA') + return text + + def find_sentiments(self, text)->list: + """ + a function to find and return polarity and subjectivity of a twit + """ + polarity = [TextBlob(x).polarity for x in text] + subjectivity = [TextBlob(x).subjectivity for x in text] + return (polarity, subjectivity) + + def find_created_time(self)->list: + """ + a function to find and return the date the twit was created at + """ + created_at = [x['created_at'] for x in self.tweets_list] + return created_at + + def find_source(self)->list: + """ + a function to find and return the source of a tweet + """ + source = [x['source'] for x in self.tweets_list] + return source + + def find_screen_name(self)->list: + """ + a function to find and return the screen name from where the + tweet originated + """ + screen_name = [x['user']['screen_name'] for x in self.tweets_list] + return screen_name + + def find_followers_count(self)->list: + """ + function to find and return the follower count of a twitter + """ + followers_count = [x['user']['followers_count'] for x in + self.tweets_list] + return followers_count + + def find_friends_count(self)->list: + """ + function to find and return the friends count of a twitter + """ + friends_count = [x['user']['friends_count'] for x in self.tweets_list] + return friends_count + + def is_sensitive(self)->list: + """ + try: + is_sensitive = [x['possibly_sensitive'] for x in self.tweets_list] + except KeyError: + is_sensitive = None + return is_sensitive + """ + # function to find and return the possible sensitivity of a tweet + is_sensitive = [] + for tweet in self.tweets_list: + if 'possibly_sensitive' in tweet.keys(): + is_sensitive.append(tweet['possibly_sensitive']) + else: + is_sensitive.append(None) + return is_sensitive + + def find_favorite_count(self)->list: + """ + function to find and return the favorite count of a tweet + """ + favorite_count = [] + for tweet in self.tweets_list: + if 'retweeted_status' in tweet.keys(): + favorite_count.append( + tweet['retweeted_status']['favorite_count']) + else: + favorite_count.append(0) + return favorite_count + + def find_retweet_count(self)->list: + """ + function to find and return the retweet count of a tweet + """ + retweet_count = [] + for tweet in self.tweets_list: + if 'retweeted_status' in tweet.keys(): + retweet_count.append( + tweet['retweeted_status']['retweet_count']) + else: + retweet_count.append(0) + return retweet_count + + def find_hashtags(self)->list: + """ + function to find and return the hashtags of a tweet + """ + hashtags = [x['entities']['hashtags'] for x in self.tweets_list] + return hashtags + + def find_mentions(self)->list: + """ + function to find and return the mentions of a tweet + """ + mentions = [x['entities']['user_mentions'] for x in self.tweets_list] + return mentions + + def find_location(self)->list: + """ + function to find and return the location of a tweet + """ + location = [x.get('user', {}).get('location', None) for x in + self.tweets_list] + return location + + def find_lang(self) -> list: + """ + function to find and return the language of a tweet + """ + lang = [x['lang'] for x in self.tweets_list] + return lang + + # TODO : make this method + def find_authors(self) -> list: + """ + function to find and return authors of tweets + """ + authors = [] + for x in range(22000): + authors.append(x) + return authors + + def get_tweet_df(self, save: bool=False, save_as : str = 'processed_tweet_data', as_csv : bool = False) -> pd.DataFrame: + """ + required columns to be generated + """ + # added_column_Names = ['status_count', 'screen_name'] + selected_columns = ['created_at', 'source', 'original_text','polarity', + 'subjectivity', 'lang', 'favorite_count', 'status_count', + 'retweet_count', 'screen_name', 'original_author', + 'followers_count','friends_count','possibly_sensitive', + 'hashtags', 'user_mentions', 'place'] + + created_at = self.find_created_time() + source = self.find_source() + text = self.find_full_text() + polarity, subjectivity = self.find_sentiments(text) + lang = self.find_lang() + fav_count = self.find_favorite_count() + status_count = self.find_statuses_count() + retweet_count = self.find_retweet_count() + screen_name = self.find_screen_name() + author = self.find_screen_name() + followers_count = self.find_followers_count() + friends_count = self.find_friends_count() + sensitivity = self.is_sensitive() + hashtags = self.find_hashtags() + mentions = self.find_mentions() + location = self.find_location() + + selected_data = [created_at, source, text, polarity, subjectivity, lang, fav_count, status_count, retweet_count, screen_name, author, followers_count, friends_count, sensitivity, hashtags, mentions, location] + + sel_data = {} + for i in range(0, len(selected_columns), 1): + sel_data[selected_columns[i]] = selected_data[i] + + final_dataframe = pd.DataFrame(data = sel_data) + + """print({len(status_count)}, {len(created_at)}, {len(source)}, + {len(text)}, {len(polarity)}, {len(subjectivity)}, + {len(fav_count)}, {len(retweet_count)}, {len(screen_name)}, + {len(followers_count)}, {len(friends_count)}, + {len(sensitivity)}, {len(hashtags)}, {len(mentions)}, + {len(location)}, {len(lang)}, {len(author)})""" + + """print(status_count, created_at, source, + text, polarity, subjectivity, + fav_count, retweet_count, screen_name, + followers_count, friends_count, + sensitivity, hashtags, mentions, + location, lang, author)""" + + """print({type(status_count)}, {type(created_at)}, {type(source)}, + {type(text)}, {type(polarity)}, {type(subjectivity)}, + {type(fav_count)}, {type(retweet_count)}, {type(screen_name)}, + {type(followers_count)}, {type(friends_count)}, + {type(sensitivity)}, {type(hashtags)}, {type(mentions)}, + {type(location)}, {type(lang)}, {type(author)})""" + + if save: + if as_csv: + data_path = 'data/' + save_as + '.csv' + final_dataframe.to_csv(data_path, index=False) + print(f'File {save_as} successfully saved as {data_path}') + else: + data_path = 'data/' + save_as + '.json' + final_dataframe.to_json(data_path, indent=4) + print(f'File {save_as} successfully saved as {data_path}') + return final_dataframe + + +if __name__ == "__main__": + # required column to be generated you should be creative and add more features + columns = ['created_at', 'source', 'original_text','clean_text', 'sentiment','polarity','subjectivity', 'lang', 'favorite_count', 'retweet_count', + 'original_author', 'screen_count', 'followers_count','friends_count','possibly_sensitive', 'hashtags', 'user_mentions', 'place', 'place_coord_boundaries'] + + # for the global data set + _, global_tweet_list = read_json(global_data) + # to make sure all the data is passe to he + print(f"Total number of data: {_}") + global_tweet = TweetDfExtractor(global_tweet_list) + global_tweet_df = global_tweet.get_tweet_df(save= True, save_as='processed_global_tweet_data') + print(global_tweet_df) + + """# for the african data set + _, african_tweet_list = read_json(african_data) + # to make sure all the data is passe to he + print(f"Total number of data: {_}") + african_tweet = TweetDfExtractor(african_tweet_list) + african_tweet_df = african_tweet.get_tweet_df(save = True, save_as='processed_african_tweet_data') + print(african_tweet_df)""" + + # TODO : use all defined functions to generate a dataframe with the specified columns above diff --git a/fix_clean_tweets_dataframe.py b/fix_clean_tweets_dataframe.py deleted file mode 100644 index 7b45a35..0000000 --- a/fix_clean_tweets_dataframe.py +++ /dev/null @@ -1,58 +0,0 @@ -class Clean_Tweets: - """ - The PEP8 Standard AMAZING!!! - """ - def __init__(self, df:pd.DataFrame): - self.df = df - print('Automation in Action...!!!') - - def drop_unwanted_column(self, df:pd.DataFrame)->pd.DataFrame: - """ - remove rows that has column names. This error originated from - the data collection stage. - """ - unwanted_rows = df[df['retweet_count'] == 'retweet_count' ].index - df.drop(unwanted_rows , inplace=True) - df = df[df['polarity'] != 'polarity'] - - return df - def drop_duplicate(self, df:pd.DataFrame)->pd.DataFrame: - """ - drop duplicate rows - """ - - --- - - return df - def convert_to_datetime(self, df:pd.DataFrame)->pd.DataFrame: - """ - convert column to datetime - """ - ---- - - ---- - - df = df[df['created_at'] >= '2020-12-31' ] - - return df - - def convert_to_numbers(self, df:pd.DataFrame)->pd.DataFrame: - """ - convert columns like polarity, subjectivity, retweet_count - favorite_count etc to numbers - """ - df['polarity'] = pd.---- - - ---- - ---- - - return df - - def remove_non_english_tweets(self, df:pd.DataFrame)->pd.DataFrame: - """ - remove non english tweets from lang - """ - - df = ---- - - return df \ No newline at end of file diff --git a/fix_extract_dataframe.py b/fix_extract_dataframe.py deleted file mode 100644 index 3bd792d..0000000 --- a/fix_extract_dataframe.py +++ /dev/null @@ -1,137 +0,0 @@ -import json -import pandas as pd -from textblob import TextBlob - - -def read_json(json_file: str)->list: - """ - json file reader to open and read json files into a list - Args: - ----- - json_file: str - path of a json file - - Returns - ------- - length of the json file and a list of json - """ - - tweets_data = [] - for tweets in open(json_file,'r'): - tweets_data.append(json.loads(tweets)) - - - return len(tweets_data), tweets_data - -class TweetDfExtractor: - """ - this function will parse tweets json into a pandas dataframe - - Return - ------ - dataframe - """ - def __init__(self, tweets_list): - - self.tweets_list = tweets_list - - # an example function - def find_statuses_count(self)->list: - statuses_count - - def find_full_text(self)->list: - text = - - - def find_sentiments(self, text)->list: - - return polarity, self.subjectivity - - def find_created_time(self)->list: - - return created_at - - def find_source(self)->list: - source = - - return source - - def find_screen_name(self)->list: - screen_name = - - def find_followers_count(self)->list: - followers_count = - - def find_friends_count(self)->list: - friends_count = - - def is_sensitive(self)->list: - try: - is_sensitive = [x['possibly_sensitive'] for x in self.tweets_list] - except KeyError: - is_sensitive = None - - return is_sensitive - - def find_favourite_count(self)->list: - - - def find_retweet_count(self)->list: - retweet_count = - - def find_hashtags(self)->list: - hashtags = - - def find_mentions(self)->list: - mentions = - - - def find_location(self)->list: - try: - location = self.tweets_list['user']['location'] - except TypeError: - location = '' - - return location - - - - - def get_tweet_df(self, save=False)->pd.DataFrame: - """required column to be generated you should be creative and add more features""" - - columns = ['created_at', 'source', 'original_text','polarity','subjectivity', 'lang', 'favorite_count', 'retweet_count', - 'original_author', 'followers_count','friends_count','possibly_sensitive', 'hashtags', 'user_mentions', 'place'] - - created_at = self.find_created_time() - source = self.find_source() - text = self.find_full_text() - polarity, subjectivity = self.find_sentiments(text) - lang = self.find_lang() - fav_count = self.find_favourite_count() - retweet_count = self.find_retweet_count() - screen_name = self.find_screen_name() - follower_count = self.find_followers_count() - friends_count = self.find_friends_count() - sensitivity = self.is_sensitive() - hashtags = self.find_hashtags() - mentions = self.find_mentions() - location = self.find_location() - data = zip(created_at, source, text, polarity, subjectivity, lang, fav_count, retweet_count, screen_name, follower_count, friends_count, sensitivity, hashtags, mentions, location) - df = pd.DataFrame(data=data, columns=columns) - - if save: - df.to_csv('processed_tweet_data.csv', index=False) - print('File Successfully Saved.!!!') - - return df - - -if __name__ == "__main__": - # required column to be generated you should be creative and add more features - columns = ['created_at', 'source', 'original_text','clean_text', 'sentiment','polarity','subjectivity', 'lang', 'favorite_count', 'retweet_count', - 'original_author', 'screen_count', 'followers_count','friends_count','possibly_sensitive', 'hashtags', 'user_mentions', 'place', 'place_coord_boundaries'] - _, tweet_list = read_json("../covid19.json") - tweet = TweetDfExtractor(tweet_list) - tweet_df = tweet.get_tweet_df() - - # use all defined functions to generate a dataframe with the specified columns above \ No newline at end of file diff --git a/notebooks/EDA.ipynb b/notebooks/EDA.ipynb new file mode 100644 index 0000000..4435bdc --- /dev/null +++ b/notebooks/EDA.ipynb @@ -0,0 +1,1623 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "# imports\n", + "import pandas as pd\n", + "import sys\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# import custom libraries and scripts\n", + "# sys.path.append(os.path.abspath(os.path.join(\"../..\")))\n", + "sys.path.append(\".\")\n", + "sys.path.append(\"..\")\n", + "\n", + "from defaults import *\n", + "from extract_dataframe import read_json\n", + "from extract_dataframe import TweetDfExtractor\n", + "from clean_tweets_dataframe import Clean_Tweets" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# read processed data set\n", + "tweets_df = pd.read_csv('../data/clean_data.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
created_atsourceoriginal_textpolaritysubjectivitylangfavorite_countstatus_countretweet_countscreen_nameoriginal_authorfollowers_countfriends_countpossibly_sensitivehashtagsuser_mentionsplace
02022-08-07 22:31:20+00:00Twitter for AndroidRT @i_ameztoy: Extra random image (I):\\n\\nLets...-1.250000e-010.190625en480972i_ameztoyi_ameztoy204972621unknown[{'text': 'City', 'indices': [132, 137]}][{'screen_name': 'i_ameztoy', 'name': 'Iban Am...unknown
12022-08-07 22:31:16+00:00Twitter for AndroidRT @IndoPac_Info: #China's media explains the ...-1.000000e-010.100000en6915831201ZIisqZIisq65272unknown[{'text': 'China', 'indices': [18, 24]}, {'tex...[{'screen_name': 'IndoPac_Info', 'name': 'Indo...unknown
22022-08-07 22:31:07+00:00Twitter for AndroidChina even cut off communication, they don't a...0.000000e+000.000000en016270Fin21FreeFin21Free85392unknown[{'text': 'XiJinping', 'indices': [127, 137]}][{'screen_name': 'ZelenskyyUa', 'name': 'Волод...Netherlands
32022-08-07 22:31:06+00:00Twitter for AndroidPutin to #XiJinping : I told you my friend, Ta...1.000000e-010.350000en016270Fin21FreeFin21Free85392unknown[{'text': 'XiJinping', 'indices': [9, 19]}][]Netherlands
42022-08-07 22:31:04+00:00Twitter for iPhoneRT @ChinaUncensored: I’m sorry, I thought Taiw...-6.938894e-180.556250en152118958381VizziniDoloresVizziniDolores9102608unknown[][{'screen_name': 'ChinaUncensored', 'name': 'C...Ayent, Schweiz
52022-08-07 22:31:02+00:00Twitter for AndroidRT @benedictrogers: We must not let this happe...2.000000e-010.500000en1164848336GraceCh15554845GraceCh15554845207540.0[{'text': 'Taiwan', 'indices': [84, 91]}][{'screen_name': 'benedictrogers', 'name': 'Be...Melbourne, Victoria
62022-08-07 22:30:59+00:00Twitter for AndroidRT @TGTM_Official: What kind of country can co...1.583333e-010.800000en11064173411Philipkuma1Philipkuma112264unknown[{'text': 'Taiwan', 'indices': [101, 108]}, {'...[{'screen_name': 'TGTM_Official', 'name': 'The...unknown
72022-08-07 22:30:59+00:00Twitter for AndroidRT @ChinaInfo777: #PinkFloyd singer Roger Wate...0.000000e+000.000000en10241025nhohn2011nhohn2011870508unknown[{'text': 'PinkFloyd', 'indices': [18, 28]}, {...[{'screen_name': 'ChinaInfo777', 'name': 'Chin...Florida, USA
82022-08-07 22:30:50+00:00Twitter for AndroidRT @AmbQinGang: China's SC&amp;FM Wang Yi elab...0.000000e+000.000000en1221630239ClaudioColomaRIClaudioColomaRI127263unknown[{'text': 'Taiwan', 'indices': [80, 87]}][{'screen_name': 'AmbQinGang', 'name': 'Qin Ga...El mundo periférico
92022-08-07 22:30:45+00:00Twitter Web AppRT @CGMeifangZhang: Chinese ambassador to the ...2.000000e-010.375000en4910718825jmarzola1jmarzola1213877unknown[{'text': 'USA', 'indices': [66, 70]}, {'text'...[{'screen_name': 'CGMeifangZhang', 'name': 'Zh...unknown
\n", + "
" + ], + "text/plain": [ + " created_at source \\\n", + "0 2022-08-07 22:31:20+00:00 Twitter for Android \n", + "1 2022-08-07 22:31:16+00:00 Twitter for Android \n", + "2 2022-08-07 22:31:07+00:00 Twitter for Android \n", + "3 2022-08-07 22:31:06+00:00 Twitter for Android \n", + "4 2022-08-07 22:31:04+00:00 Twitter for iPhone \n", + "5 2022-08-07 22:31:02+00:00 Twitter for Android \n", + "6 2022-08-07 22:30:59+00:00 Twitter for Android \n", + "7 2022-08-07 22:30:59+00:00 Twitter for Android \n", + "8 2022-08-07 22:30:50+00:00 Twitter for Android \n", + "9 2022-08-07 22:30:45+00:00 Twitter Web App \n", + "\n", + " original_text polarity \\\n", + "0 RT @i_ameztoy: Extra random image (I):\\n\\nLets... -1.250000e-01 \n", + "1 RT @IndoPac_Info: #China's media explains the ... -1.000000e-01 \n", + "2 China even cut off communication, they don't a... 0.000000e+00 \n", + "3 Putin to #XiJinping : I told you my friend, Ta... 1.000000e-01 \n", + "4 RT @ChinaUncensored: I’m sorry, I thought Taiw... -6.938894e-18 \n", + "5 RT @benedictrogers: We must not let this happe... 2.000000e-01 \n", + "6 RT @TGTM_Official: What kind of country can co... 1.583333e-01 \n", + "7 RT @ChinaInfo777: #PinkFloyd singer Roger Wate... 0.000000e+00 \n", + "8 RT @AmbQinGang: China's SC&FM Wang Yi elab... 0.000000e+00 \n", + "9 RT @CGMeifangZhang: Chinese ambassador to the ... 2.000000e-01 \n", + "\n", + " subjectivity lang favorite_count status_count retweet_count \\\n", + "0 0.190625 en 4 8097 2 \n", + "1 0.100000 en 691 5831 201 \n", + "2 0.000000 en 0 1627 0 \n", + "3 0.350000 en 0 1627 0 \n", + "4 0.556250 en 1521 18958 381 \n", + "5 0.500000 en 116 48483 36 \n", + "6 0.800000 en 1106 4173 411 \n", + "7 0.000000 en 10 24102 5 \n", + "8 0.000000 en 1221 630 239 \n", + "9 0.375000 en 49 107188 25 \n", + "\n", + " screen_name original_author followers_count friends_count \\\n", + "0 i_ameztoy i_ameztoy 20497 2621 \n", + "1 ZIisq ZIisq 65 272 \n", + "2 Fin21Free Fin21Free 85 392 \n", + "3 Fin21Free Fin21Free 85 392 \n", + "4 VizziniDolores VizziniDolores 910 2608 \n", + "5 GraceCh15554845 GraceCh15554845 207 54 \n", + "6 Philipkuma1 Philipkuma1 12 264 \n", + "7 nhohn2011 nhohn2011 870 508 \n", + "8 ClaudioColomaRI ClaudioColomaRI 127 263 \n", + "9 jmarzola1 jmarzola1 213 877 \n", + "\n", + " possibly_sensitive hashtags \\\n", + "0 unknown [{'text': 'City', 'indices': [132, 137]}] \n", + "1 unknown [{'text': 'China', 'indices': [18, 24]}, {'tex... \n", + "2 unknown [{'text': 'XiJinping', 'indices': [127, 137]}] \n", + "3 unknown [{'text': 'XiJinping', 'indices': [9, 19]}] \n", + "4 unknown [] \n", + "5 0.0 [{'text': 'Taiwan', 'indices': [84, 91]}] \n", + "6 unknown [{'text': 'Taiwan', 'indices': [101, 108]}, {'... \n", + "7 unknown [{'text': 'PinkFloyd', 'indices': [18, 28]}, {... \n", + "8 unknown [{'text': 'Taiwan', 'indices': [80, 87]}] \n", + "9 unknown [{'text': 'USA', 'indices': [66, 70]}, {'text'... \n", + "\n", + " user_mentions place \n", + "0 [{'screen_name': 'i_ameztoy', 'name': 'Iban Am... unknown \n", + "1 [{'screen_name': 'IndoPac_Info', 'name': 'Indo... unknown \n", + "2 [{'screen_name': 'ZelenskyyUa', 'name': 'Волод... Netherlands \n", + "3 [] Netherlands \n", + "4 [{'screen_name': 'ChinaUncensored', 'name': 'C... Ayent, Schweiz \n", + "5 [{'screen_name': 'benedictrogers', 'name': 'Be... Melbourne, Victoria \n", + "6 [{'screen_name': 'TGTM_Official', 'name': 'The... unknown \n", + "7 [{'screen_name': 'ChinaInfo777', 'name': 'Chin... Florida, USA \n", + "8 [{'screen_name': 'AmbQinGang', 'name': 'Qin Ga... El mundo periférico \n", + "9 [{'screen_name': 'CGMeifangZhang', 'name': 'Zh... unknown " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Automation in Action...!!!\n" + ] + } + ], + "source": [ + "cleaner = Clean_Tweets(tweets_df.copy())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Making explorations" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7440, 17)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# shape of dataframe\n", + "tweets_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 7440 entries, 0 to 7439\n", + "Data columns (total 17 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 created_at 7440 non-null object \n", + " 1 source 7440 non-null object \n", + " 2 original_text 7440 non-null object \n", + " 3 polarity 7440 non-null float64\n", + " 4 subjectivity 7440 non-null float64\n", + " 5 lang 7440 non-null object \n", + " 6 favorite_count 7440 non-null int64 \n", + " 7 status_count 7440 non-null int64 \n", + " 8 retweet_count 7440 non-null int64 \n", + " 9 screen_name 7440 non-null object \n", + " 10 original_author 7440 non-null object \n", + " 11 followers_count 7440 non-null int64 \n", + " 12 friends_count 7440 non-null int64 \n", + " 13 possibly_sensitive 7440 non-null object \n", + " 14 hashtags 7440 non-null object \n", + " 15 user_mentions 7440 non-null object \n", + " 16 place 7440 non-null object \n", + "dtypes: float64(2), int64(5), object(10)\n", + "memory usage: 988.2+ KB\n" + ] + } + ], + "source": [ + "tweets_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "created_at 0\n", + "source 0\n", + "original_text 0\n", + "polarity 0\n", + "subjectivity 0\n", + "lang 0\n", + "favorite_count 0\n", + "status_count 0\n", + "retweet_count 0\n", + "screen_name 0\n", + "original_author 0\n", + "followers_count 0\n", + "friends_count 0\n", + "possibly_sensitive 0\n", + "hashtags 0\n", + "user_mentions 0\n", + "place 0\n", + "dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "No missing values" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
polaritysubjectivityfavorite_countstatus_countretweet_countfollowers_countfriends_count
count7440.0000007440.0000007440.0000007.440000e+037440.0000007.440000e+037440.000000
mean0.0567830.295638203.3512104.900565e+0438.7131724.107761e+041715.558871
std0.2301560.2878051655.6901481.432954e+05326.7570254.910108e+055305.897528
min-1.0000000.0000000.0000001.000000e+000.0000000.000000e+000.000000
25%0.0000000.0000000.0000001.549250e+030.0000007.275000e+01106.000000
50%0.0000000.2500000.0000007.904000e+030.0000003.670000e+02440.000000
75%0.1363640.5000004.0000003.510900e+042.0000001.833000e+031505.000000
max1.0000001.00000065170.0000004.108317e+0617409.0000001.449852e+07208360.000000
\n", + "
" + ], + "text/plain": [ + " polarity subjectivity favorite_count status_count retweet_count \\\n", + "count 7440.000000 7440.000000 7440.000000 7.440000e+03 7440.000000 \n", + "mean 0.056783 0.295638 203.351210 4.900565e+04 38.713172 \n", + "std 0.230156 0.287805 1655.690148 1.432954e+05 326.757025 \n", + "min -1.000000 0.000000 0.000000 1.000000e+00 0.000000 \n", + "25% 0.000000 0.000000 0.000000 1.549250e+03 0.000000 \n", + "50% 0.000000 0.250000 0.000000 7.904000e+03 0.000000 \n", + "75% 0.136364 0.500000 4.000000 3.510900e+04 2.000000 \n", + "max 1.000000 1.000000 65170.000000 4.108317e+06 17409.000000 \n", + "\n", + " followers_count friends_count \n", + "count 7.440000e+03 7440.000000 \n", + "mean 4.107761e+04 1715.558871 \n", + "std 4.910108e+05 5305.897528 \n", + "min 0.000000e+00 0.000000 \n", + "25% 7.275000e+01 106.000000 \n", + "50% 3.670000e+02 440.000000 \n", + "75% 1.833000e+03 1505.000000 \n", + "max 1.449852e+07 208360.000000 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# stats about numerical columns\n", + "tweets_df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Univariate" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 i_ameztoy\n", + "1 ZIisq\n", + "2 Fin21Free\n", + "3 Fin21Free\n", + "4 VizziniDolores\n", + " ... \n", + "7435 PelosiLibArmy\n", + "7436 SonnyMullins13\n", + "7437 TECO_Toronto\n", + "7438 samserjio93\n", + "7439 ZeitounRimal\n", + "Name: original_author, Length: 7440, dtype: object" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df.original_author" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TrumpThuan 116\n", + "AarianNewsX 57\n", + "CGMeifangZhang 43\n", + "SoizaDavid 42\n", + "doos94619918 36\n", + " ... \n", + "AoxiPRNew 1\n", + "Eloy_Sauvan 1\n", + "carnivorecabbie 1\n", + "FarmSailing 1\n", + "ZeitounRimal 1\n", + "Name: original_author, Length: 4624, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df.original_author.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAFhCAYAAACf9rbcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4oklEQVR4nO3dd5ikVZn+8e/NjOQMIypIUDFgAHFAEFGCAUUREUFARAzsGlHXhKJgWDMqhlVQQFwBBURBwUCSoAjMwEhGEPEHCIIrWcn3749zqqam6e5pZrpOtVP357r66qq3qvo801Ndz/ue8BzZJiIiAmCxQQcQERFTR5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpxL8tSatImlO/bpJ0Q8/9xRfg5z1V0jmS7pX0/hGPbSPpSklXS/rwKK/9Zm33Mkn/6oljx4X5N47Szkcm8+dFjKSsU4hFgaT9gbtsf2khfsajgbWA7YFbOz9L0jTgj8CLgeuB84FdbF82ys9YG/i57WcsaBzzifEu28s+wtdMs/1gP+KJRU+uFGKRImlrSRdKuljSoZKWqMevlfSFevw8SU8a+VrbN9s+H7h/xEMbA1fbvsb2fcAPgVdNIJYTJT2r3r5Q0sfr7U9Kemu9/QFJ50u6SNInel77+hrnHEkHSZom6XPAUvXYEWM9rx6/S9IBkv4AbCrpc/Uq5iJJC5w4Y9GXpBCLkiWB7wE7234mMB14W8/jt9fj3wC++gh+7urAdT33r6/H5ucsYHNJKwAPAJvV45sDZ0p6CbAuJelsADxH0gskPQ3YGdjM9gbAg8Butj8M/Mv2BrZ3G+t5tY1lgHNtrw9cDrwaeLrtZwGffgT/9hgySQqxKJkG/Nn2H+v9w4EX9Dx+VM/3TRvEc1ZtfzPgRGBZSUsD69i+EnhJ/boQuAB4KiVJbA08Bzhf0px6/wmj/Pzxnvcg8ON6+3bgHuAQSTsA/5zUf2UsUqYPOoCIhjzG7fm5AXh8z/016rH5OR+YCVwDnAysCrwVmF0fF/BZ2wf1vkjSu4DDbe8zn5+vcZ53T2ccwfYDkjamJI0dgXcCW00g/hhCuVKIRcmDwNo94wW7A2f0PL5zz/dzHsHPPR9YV9I6dVbT64AT5veiOv5wHfDa2t5ZwPuBM+tTfgW8SdKyAJJWr4PdpwI71ttIWlnSWvU190t6VL093vO66s9fwfZJwHuB9R/Bvz2GTK4UYlFyD7AncIyk6ZQP82/3PL6SpIuAe4FdRr5Y0mOAWcDywEOS3gOsZ/sOSe+kfIhPAw61fekEYzoL2Nr2vySdRbnKOAvA9q/ruMA5kgDuAl5v+zJJ+wK/lrQYZeD7HcBfgIOBiyRdUMcVxnper+WA4yUtSbm6eN8EY48hlCmpMRQkXQvMtP33QccSMZWl+ygiIrpypRAREV25UoiIiK4khYiI6Orb7CNJhwKvAG7u1IGR9EXglcB9wJ+APW3fVh/bB3gzZVrhu23/an5trLrqql577bX7En9ExKJq9uzZf7c9Y7TH+jamIOkFlCl23+9JCi8BTquLaT4PYPtDktajrDLdGHgccArw5PkV8Zo5c6ZnzZrVl/gjIhZVkmbbnjnaY33rPrJ9JvCPEcd+bfuBevf3lDnbUIqL/dD2vbb/DFxNSRAREdHQIMcU3gT8ot5e0IJjERExiQaSFCR9lFI18ogFeO1ekmZJmnXLLbdMfnAREUOseVKQ9EbKAPRunjugMeGCY7YPtj3T9swZM0YdJ4mIiAXUNClI2gb4ILCd7d7yvScAr5O0hKR1KOWDz2sZW0RE9HdK6lHAFsCqkq4H9gP2AZYATq4FwH5v+z9tXyrpaOAySrfSO7J9YEREe//WZS4yJTUi4pEbyJTUiIj495OkEBERXYv+JjtHauFev+u/b/daRMQjlSuFiIjoSlKIiIiuJIWIiOhKUoiIiK4khYiI6EpSiIiIriSFiIjoSlKIiIiuJIWIiOhKUoiIiK4khYiI6EpSiIiIriSFiIjoSlKIiIiuJIWIiOhKUoiIiK4khYiI6EpSiIiIriSFiIjoSlKIiIiuJIWIiOhKUoiIiK4khYiI6EpSiIiIrr4lBUmHSrpZ0iU9x1aWdLKkq+r3lepxSfqapKslXSRpw37FFRERY+vnlcL3gG1GHPswcKrtdYFT632AlwHr1q+9gG/1Ma6IiBhD35KC7TOBf4w4/Crg8Hr7cGD7nuPfd/F7YEVJj+1XbBERMbrWYwqr2b6x3r4JWK3eXh24rud519djDyNpL0mzJM265ZZb+hdpRMQQGthAs20DXoDXHWx7pu2ZM2bM6ENkERHDq3VS+FunW6h+v7kevwF4fM/z1qjHIiKiodZJ4QRgj3p7D+D4nuNvqLOQNgFu7+lmioiIRqb36wdLOgrYAlhV0vXAfsDngKMlvRn4C7BTffpJwMuBq4F/Anv2K66IiBhb35KC7V3GeGjrUZ5r4B39iiUiIiYmK5ojIqIrSSEiIrqSFCIioitJISIiupIUIiKiK0khIiK6khQiIqIrSSEiIrqSFCIioitJISIiupIUIiKiK0khIiK6khQiIqIrSSEiIrqSFCIioitJISIiupIUIiKiK0khIiK6khQiIqIrSSEiIrqSFCIioitJISIiuuabFCS9VtJy9fa+ko6TtGH/Q4uIiNYmcqXwMdt3Sno+8CLgEOBb/Q0rIiIGYSJJ4cH6fVvgYNsnAov3L6SIiBiUiSSFGyQdBOwMnCRpiQm+LiIi/s1M5MN9J+BXwEtt3wasDHygn0FFRMRgTCQpHGT7ONtXAdi+Edh9YRqV9F5Jl0q6RNJRkpaUtI6kcyVdLelHktJFFRHR2ESSwtN770iaBjxnQRuUtDrwbmCm7WcA04DXAZ8HvmL7ScCtwJsXtI2IiFgwYyYFSftIuhN4lqQ7JN1Z798MHL+Q7U4HlpI0HVgauBHYCji2Pn44sP1CthEREY/QmEnB9mdtLwd80fbytperX6vY3mdBG7R9A/Al4P9RksHtwGzgNtsP1KddD6w+2usl7SVplqRZt9xyy4KGERERo5hI99FHJb1e0scAJD1e0sYL2qCklYBXAesAjwOWAbaZ6OttH2x7pu2ZM2bMWNAwIiJiFBNJCt8ENgV2rffvqscW1IuAP9u+xfb9wHHAZsCKtTsJYA3ghoVoIyIiFsBEksJzbb8DuAfA9q0s3OK1/wdsImlpSQK2Bi4DTgd2rM/Zg4Uft4iIiEdoIknh/jrjyACSZgAPLWiDts+lDChfAFxcYzgY+BDwPklXA6tQymlERERD0+f/FL4G/ARYTdJ/U87m912YRm3vB+w34vA1wAKPVURExMKbb1KwfYSk2ZRuHgHb276875FFRERzE61htCrwT9vfAP4uaZ0+xhQREQMykf0U9qP093fWJjwK+EE/g4qIiMGYyJXCq4HtgLsBbP8VWK6fQUVExGBMJCncZ9vMnX20TH9DioiIQZlIUji67qewoqS3AqcA3+lvWBERMQgTmX30JUkvBu4AngJ83PbJfY8sIiKam29SkPRm4Ezb2VgnImIRN5HFa2sCB0lam1LN9EzgLNtz+hhXREQMwHzHFGzvZ3srymY7Z1G24pzd78AiIqK9iXQf7UupYroscCHwfkpyiIiIRcxEuo92AB4ATgTOAM6xfW9fo4qIiIGYSPfRhpQ9EM4DXgxcLOnsfgcWERHtTaT76BnA5sALgZnAdaT7KCJikTSR7qPPUWYcfQ04v+6WFhERi6CJrGg+xfYXbP+ukxAk7d3nuCIiYgAmkhTeMMqxN05yHBERMQWM2X0kaRdgV2AdSSf0PLQc8I9+BxYREe2NN6bwO+BGygY7B/QcvxO4qJ9BRUTEYIyZFGz/BfgLsGm7cCIiYpAmuh1nREQMgSSFiIjoGjMpSDq1fv98u3AiImKQxhtofqyk5wHbSfohoN4HbV/Q18giIqK58ZLCx4GPAWsAXx7xmIGt+hVUREQMxnizj44FjpX0MdufahhTREQMyET2aP6UpO2AF9RDv7H98/6GFRERgzDf2UeSPgvsDVxWv/aW9JmFaVTSipKOlXSFpMslbSppZUknS7qqfl9pYdqIiIhHbiJTUrcFXmz7UNuHAtsAr1jIdg8Efmn7qcD6wOXAh4FTba8LnFrvR0REQxNdp7Biz+0VFqZBSStQuqIOAbB9n+3bgFcBh9enHQ5svzDtRETEIzeR/RQ+C1wo6XTKtNQXsHBn8esAtwCHSVofmE3pnlrN9o31OTcBq432Ykl7AXsBrLnmmgsRRkREjDSR7TiPAjYBjgN+DGxq+0cL0eZ0YEPgW7afDdzNiCRj25Rpr6PFc7DtmbZnzpgxYyHCiIiIkSZypUA9gz9hvk+cmOuB622fW+8fS0kKf5P0WNs3SnoscPMktRcRERPUvPaR7ZuA6yQ9pR7amjKr6QRgj3psD+D41rFFRAy7CV0p9MG7gCMkLQ5cA+xJSVBHS3ozpWT3TgOKLSJiaI2bFCRNAy6tU0cnje05wMxRHtp6MtuZMo7U/J8zP7uOOsQSETGpxu0+sv0gcKWkTPOJiBgCE+k+Wgm4VNJ5lJlCANjerm9RRUTEQEwkKXys71FERMSUMJGCeGdIWgtY1/YpkpYGpvU/tIiIaG0iBfHeSllLcFA9tDrw0z7GFBERAzKRdQrvADYD7gCwfRXw6H4GFRERgzGRpHCv7fs6dyRNZ4wSFBER8e9tIknhDEkfAZaS9GLgGOBn/Q0rIiIGYSJJ4cOUqqYXA/8BnATs28+gIiJiMCYy++ghSYcD51K6ja6sVUwjImIRM9+kIGlb4NvAnyj7Kawj6T9s/6LfwUVERFsTWbx2ALCl7asBJD0ROBFIUoiIWMRMZEzhzk5CqK4B7uxTPBERMUBjXilI2qHenCXpJOBoypjCa4HzG8QWERGNjdd99Mqe238DXlhv3wIs1beIIiJiYMZMCrb3bBlIREQM3kRmH61D2Slt7d7np3R2RMSiZyKzj34KHEJZxfxQX6OJiIiBmkhSuMf21/oeSUREDNxEksKBkvYDfg3c2zlo+4K+RRUREQMxkaTwTGB3YCvmdh+53o+IiEXIRJLCa4En9JbPjoiIRdNEVjRfAqzY5zgiImIKmMiVworAFZLOZ94xhUxJjYhYxEwkKezX9yiijSO1cK/fNRXTIxZ1E9lP4YwWgURExOBNZEXznczdk3lx4FHA3baXX5iGJU0DZgE32H5FXTn9Q2AVYDawewa3F0ELe7UCuWKJ6KP5DjTbXs728jUJLAW8BvifSWh7b+DynvufB75i+0nArcCbJ6GNiIh4BCYy+6jLxU+Bly5Mo5LWALYFvlvvi7Lu4dj6lMOB7RemjYiIeOQm0n20Q8/dxYCZwD0L2e5XgQ8Cy9X7qwC32X6g3r8eWH0h24iIiEdoIrOPevdVeAC4FnjVgjYo6RXAzbZnS9piAV6/F7AXwJprrrmgYURExCgmMvtosvdV2AzYTtLLgSWB5YEDgRUlTa9XC2sAN4wRz8HAwQAzZ87MiGNExCQabzvOj4/zOtv+1II0aHsfYJ/axhbA+23vJukYYEfKDKQ9gOMX5OdHRMSCG2+g+e5RvqDMCvpQH2L5EPA+SVdTxhgO6UMbERExjvG24zygc1vScpQppHtSzuQPGOt1j4Tt3wC/qbevATaejJ8bERELZtwxBUkrA+8DdqNME93Q9q0tAouIiPbGG1P4IrADZVD3mbbvahZVREQMxHhXCv9FqYq6L/DRsr4MAFEGmheqzEXEwKTURsSYxhtTeESrnSMi4t9fPvgjIqIrSSEiIrqSFCIiomsitY8ioh8y4B1TUK4UIiKiK0khIiK6khQiIqIrSSEiIrqSFCIioitJISIiujIlNWKYZVpsjJArhYiI6MqVQkQMXq5YpoxcKURERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHRlSmpEBEydabEDjiNXChER0ZWkEBERXUkKERHR1TwpSHq8pNMlXSbpUkl71+MrSzpZ0lX1+0qtY4uIGHaDuFJ4APgv2+sBmwDvkLQe8GHgVNvrAqfW+xER0VDzpGD7RtsX1Nt3ApcDqwOvAg6vTzsc2L51bBERw26gYwqS1gaeDZwLrGb7xvrQTcBqY7xmL0mzJM265ZZb2gQaETEkBpYUJC0L/Bh4j+07eh+zbWDUiba2D7Y90/bMGTNmNIg0ImJ4DCQpSHoUJSEcYfu4evhvkh5bH38scPMgYouIGGaDmH0k4BDgcttf7nnoBGCPensP4PjWsUVEDLtBlLnYDNgduFjSnHrsI8DngKMlvRn4C7DTAGKLiBhqzZOC7bOBsYp7bN0yloiImFdWNEdERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpRERE15RLCpK2kXSlpKslfXjQ8UREDJMplRQkTQO+CbwMWA/YRdJ6g40qImJ4TKmkAGwMXG37Gtv3AT8EXjXgmCIihoZsDzqGLkk7AtvYfku9vzvwXNvv7HnOXsBe9e5TgCsXstlVgb8v5M9YWFMhBpgacUyFGGBqxDEVYoCpEcdUiAGmRhyTEcNatmeM9sD0hfzBzdk+GDh4sn6epFm2Z07Wz/t3jWGqxDEVYpgqcUyFGKZKHFMhhqkSR79jmGrdRzcAj++5v0Y9FhERDUy1pHA+sK6kdSQtDrwOOGHAMUVEDI0p1X1k+wFJ7wR+BUwDDrV9aZ+bnbSuqIUwFWKAqRHHVIgBpkYcUyEGmBpxTIUYYGrE0dcYptRAc0REDNZU6z6KiIgBSlKIiIiuJIWIiOgayqQgaXVJz5P0gs7XoGOKmCokrTPKsY0GEcugSdp7IscWJUM30Czp88DOwGXAg/WwbW/XoO2vAx+xfeeI408FvmH7Rf2OYSqR9L7xHrf95QYxrAy8E/grcAjwEWBT4HLgM7Zv7XcMNY5lgQ8Cr6Gsz7kP+BPwbdvfaxFDTywXAK+0fUO9/0LK+/OZjdrfcLzHbV/QIo4aywW2Nxxx7ELbz24VQ2tTakpqI9sDT7F97wDavgmYI+ljto+UtDSwP/BqygdCU5LuBEaeFdwOzAL+y/Y1fQ5hufr9KcBGzF2T8krgvD633fED4GLgOcDr6+3PAy8Gvke72ltHAD8BXgrsBCxDqf21r6Qn2/5IozgA/gP4qaRXAhsCnwVe3rD9A+r3JYGZwB8AAc+ivDc37XcAknYBdgXWkdS7Vmo54B/9bn+UeDYBvg48DVicMmX/btvLT3pjtofqC/gFsOwA238CcCJwJnA18Blg6QHF8inKB8BywPKUmlKdK6nfNIzjTGC5nvvLAWc2antO/S7ghtEeaxTHH0bcP79+Xwy4YgDvjU2BiyjJeUbr9msMxwHP7Ln/DODYRm2vBWwBnAO8sOdrQ2D6AH4Xs4AnARdSEsKewGf70dYwXin8k3K2firQvVqw/e5G7T9Uv0+n/OdebvufjdoeaTvb6/fcP1jSHNsfktTyzHQ1SndJx331WAuLSVqJkoiWlbS27WslrUI5I2vlbknPt322pO2oZ6O2H5KkFgFI+hnzXjkuTblyPEQSbtDFOsJTbF/cuWP7EklPa9Gw7b8Af6FelUhanrk9K8szgKsF21dLmmb7QeAwSRcC+0x2O8OYFE5gQKUzJH0M2AP4qO0fSVodOFDSW4C32b6scUj/lLQTcGy9vyNwT73dcrDp+8B5kn5S729P6bpp4bPAFfX2m4DvSjJlP49PNIoB4D9r2+sCl9ZYkDSDssdIC19q1M5EXSzpu5QuPoDdKFcvzdSqzJ+k/F08RLmiNOWKv6V/1tI/cyR9AbiRPk0UGrqB5kGSdCCwrx8+0Pwy4Mu2m5wF9bT7BOBAytmQgd8D76UUIXyO7bMbxrIhsHm9e6btCxu2PY3yt/CApOnABpSupBtbxTCV1NlHN9q+p95fCljN9rWN41gSeBvQmR14JvCtTlyNYrgK2NT2QMtlS1oL+Bvl6vW9wArA/9i+etLbGrakUM/EPks5E1yyc9x268w/D0lLeDCD3wMjaXnbd9QZQA9ju/klei9JT7V9xfyf2bf2T7O91QDanQU8z2WjK+oZ6m9tN5uWWhP1Kba3bNXmGHH8EthhgF28zQ1j99FhwH7AV4AtKQM2Tddr1Mu/TwP/An5JmVXxXuZeJreKYwbwVmBtet4Ltt/UKIQjgVcAs5m3u2pQl+gj/RpYs0VDkkZ2iwh4cue47We1iKOa3kkIte37amJoxvaDkh6StILt21u2PcI+wO8knctgxiABkLQZZabiWsz7tzrpfyPDmBSWsn2qJNXBpP0lzQY+3jCGl9j+oKRXA9cCO1AujZsmBeB44CzgFOau2WjG9ivq94ctlmpF0tfGeghYsWEo1wJ3MPdkQZT/m1c2jKHjFknb2T4BQNKrGMxuY3dRxhVOBu7uHGz8gXwQcBplqvJD83luPx1COXGcTZ//VocxKdwraTHgqlqm+wZg2cYxdH7v2wLH2L690QSTkZa2/aFBNNyrzgM/Cjh+AJfpewL/Rc9ZYI9dWgVhe7t6knAw8CXbJ0i6v564tPafwBGSvkFJTtcBbxhAHMfVr0F6lO1xF1k2crvtX7RoaBjHFDairFZdkTJPfwXgC7Z/3zCGz1Fm2PwL2LjG8nPbz20VQ43j08DvbJ/Ust1R4nghZW3EtpSNln5I+X30fUBR0mmUwf/fjfLYn1tfxUhahvK+fCJlsH+Nlu2PiGVZANt3DSqGQZP0GcpV3M+Yt/uo6XhX/cyYRkmSvXFM+uruoUsKU0UdXL299p0uQ1m8dVPjGO6krJy9F7if2pfvfqySnFg804CtKOMc27SIo/4/3DPVBhIlrU+Z9fLtAbW/LfB05p2M8cnGMQx8UoikP49y2K0npkg6fYw4Jn0iwtB1H9Vf7sMyYctZHpLOBs4AzpL02zpF9e75vGzS2V5u/s9qo057fCXlimFD4PAW7Q56htNIkmZS9il/EPjNgGL4NmXh2pbAdynrV1qVHek18Ekhgxzv6tVyFtbQXSlIek7P3SUpBcgesN2s9lCdB755/dqEcqZ+lu33toqhJ5aVgHWZ90zszMYxHE3pRvsl8CPgDNtNBvWmSiG62oV2AHAbpQ7Tb4GVKFdwu9u+rmEsF9l+Vs/3ZYFf2N58vi+e3Dhm236OpItdi/F1jjWMYYdRDt8OXGz75gbtv972DzRG8Uj3oWjk0F0p2J494tBvJTU9C7L9Z0n3UD6A7qOcBTVduAZQV1LvTfkwnENJUOdQunBaOgTYpS7fb22qFKL7KmVW2i31pOHLtjeT9GLK7+cljeKAMtYFZRXt44D/Ax7bsP2OqTAp5M2UxZ2d7pstKDOA1pH0Sdv/2+f2l6nfm13VD+OVQu9CqcUoZ2Vfs/2UhjH8iTLF70jKtMM5rc6MR8RxMaU66e9tb6BSwvsztkc7O+p3LM/g4X3H32/Q7h966z9JOt/2RvXD6DLbT+13DLXdizprEerYyvmuJZslXWr76S3iqO19jFKRc2tKiQ0D37Hdctr2VJkU8ivgDbb/Vu+vRinLsgtl5f0zWsXSytBdKTB3oZSAB4A/U84GWvoa8HzKG+vZwBmSzrT9p8Zx3GP7HkmdFdVXSGqWHDsk7Uc5A1sPOAl4GXA25Y+v3wZeiK6aJekQypz47ajjCSrl1ac1jAPbn6o3fyzp58CSg1hAZvv8evMuynjCIDy+kxCqm+uxf0i6v1UQteTHm3n44P+kLzQduqQwFQaObB9IKYS3LOXNvj+lC6fpHz9wvaQVgZ8CJ0u6lVIZsrUdgfWBC23vWc/GWi3k6xSiezJwCfUEQW0L0UEpYf5WSlfFKcCh9bgpXVvN1A+gt1NOXAycLalpzaEax5OBD/DwVbwtuzd/UxPjMfX+a+qxZSjjP638L6Vw40spBfp2o1xFTbqh6z4CkPQ8Hl7aocVZaaf9Ayh/cMsCv6OcFZ/l/m9qM15ML6Rcnv+yt8RBo7bPs71xXVm+JXAnpaR4k66bmFcd+L+TuYl5V2BF269tHMcfgG8zYhXvKOOC/Wh7Cdv31qvFHSh/r1AmAPzYjT84VXd76xn8fxTlM2OTyW5r6K4UJP0vZWHQHHq246RNV0XHOZS+0b/N95l9VvuvV6N0owE8Bvh/jcOYVa9YvkP5ALiL8jtqQqVa7A7MnQr6R+BI23c0jGEb27+st1cAvkwZ77kEeG/j98ozbK/Xc/90Sa3LukOZFfitAbQL5f23IfB927sDPx5QHB2drqrb6vjbTcCj+9HQ0CUFyvZ+67XO9CMcB+wqaR3bn5K0JvAY201nQUl6F2Ue+N+YW9fFlAJ9zdh+e735bZWqlMvbblI3X9K7KesjzqB8CF9ISQ6/l/R2279pEQdlB75f1tsHUOrlv5KSrA6irIBv5QJJm3QGdCU9l7LzV2s/k/R2yuyw1quJF5e0K/C80aal2m5dfuPgOn18X8p+MMvSp3ptQ9d9JOkY4N0eYK18Sd+ifAhvZftp9T/7125YmrjGcTXwXNv/17LdETFMpwwsd7qKLqd0YT3QqP2LgQ3qyvKlgZNsb1ET9fFutEG7ejaIV9n9boOex+a538cYLqacFDyKsm9254pxTcqWoOuN9do+xTOw1cSSnk/pt9+Jh2/K5X4M8E4VQ3OloLlbDS4HXFbXJvSefbTcavC5tjdU2U4P27eqcWni6jrKQpyBUNl57jTKWfGFlBlhrwAOkLSl7b82CmU6pdtoCeo8eNv/r/bbtvLoukBJwPKS1HM122oV7ysatTMhg5wU4rLB1NmSZtk+ZFBxSDqHslPjaaM8dqrtrSe7zaFJCkytrQbvr335Zcf4MtOl2TqFntWR11BmUpzIvAly0ldJjuG/KTtpfXVEfO+m1LzZo0EM3wXOV6mXvznw+RrDDNruw/sd5i5QOhxYlVLC+jGU8a++c63IKumJwPV1oHULSndiyzE3ahyzKQv3jrR9W+v2q/+t78fO7m9nUFa7t5qOuibwDUknAfuMaHfUzakW1tB0H0n6te2Wq0LHJGk35q3xsyOlUucx475w8trfb7zHbTfZm1jSFWPNMJJ0ZasFhZKeTllRfokHuNPaVCFpDmXsbW3KupHjgafbfnnjOJ5EmbK9M2VM4zBKN2uzDy2VPaIfxdxaXLsDD9p+S6P2L6DMfPoa5fNiF9tXdh7rdDlOaptDlBT68gtcUHX18NaU7oJTbfdlzvEYbS/tMaqC1sHv0fpy+xHHhWP12Y/3WCuSlnWjstF1MPdyl+1JlwI+TPkQuIyyyrxZN1/nb0XSB4F/2f76IP8/VFaXvwL4FqWb7zDgwBYDzhqx4n2sY31sv3esaQdKOZTP2P52v/5Phqn7aMXRZhF0tJ5NUM9IB3VWertKnfhP+OHlNX5M+TBqYYUx/k8EDKR89wiX0Wg7Tspitc4HzYHAPyldWVtTPgRblh65X9IulI11Oju/tRxf6ZL0LMrVwssp780jKGfOpwEbNAjhQUlP7FQbqNOXB1GjC9vH1W7O70l6OX2qAzVMSWEFytnGaKULTIMdnlT2L+hcmqnn9nRgcdut/j+uoazV+K2kXUdcGbQs7XAGY2832aRSq8aoPkn5PbQsvrZYz4yrmT1XtWfX7pyW9qSs9P5vl+KN61BW1DZVxxRuo4wrfNh2Z9zrXJU9i1t4P2WdxjWU98RatC25MU8lVts3AC+W9AH6VCQx3UcDpFLm4h2UEgc/sf1fjdrtdA+8njKg+9HOiu6p+HvqJ5VqtV+k1MEa6b22V2wUxzGU6bCHSToM+KbtWSqlHo5oPV15KpD0hAGv8p8GvBv4H8oUXYAre5JTy1hWaTV1fJiSwsD7qDvq6t33UC7PjwS+0nKtwIh+yrUpZ4E3UJLT6YNIChrQTl+Sfge8y6OUTpB0ne3H9zuG2tYKlG6jzSkVdDekTBm+jrKu5g8NYjja9k496xW6D1Hm5jdd1FhjGugOcKolWFq1N04cV1FmoR1G2duibx/cw5QUnmH7knr7MZRNXUwpUdxkG0xJq1I2id+Z0of89ZYDiD1xzJMg60DexyhTQJey3bR2vsbY6ct236vXqlSF/T/bfx/lsdXcuBSJpOWBdShdite3bF/SY23fKGmt0R7vTFltGM/A3hc9MXyFMp7yI3p2R3Qf9kaeTxwCXgS8ibLy/mjge7b/OOltDUtS6FDZWObjlIEqAS8EPmn70HFfODlt3w3cQsn2d458vNX6AEmftr3vKMc3Afa3vU2LOHranRI7fU0FdX3EGpTBzGtazX4aJY7VKB8+UD6I+77L2CgxDPx9oYZ7I0+UpC0pxQqXAf5AGW+ZtFphwzTQ3PEB4Nmd7hpJq1AqlfY9KVD6rjtZeGD7I4+WEOrx3wNNE0I1sJ2+VHb0+qHtv9d58YdSFmtdCbzF9sWN4liPMhd9bcqMpwspq5zPAPZuPCV1J8p79TeUE6evS/qA7WNbxVANfAc4N9wbeTz1c+r1lHUSfwPeRSm/sQGlrPfkrf62PVRflASweM/9xYHfDTquAf0uLgYuGvF1FmWj9FUaxvExyu5ar6FUf7wR+FSjti/tuX0i8Op6ewvgtw1/B78HnlJvbwwcXm+/FTi28fviD8Cje+7PAP7QMoZBvy96YliNMvvpF/X+esCbB/C7+GP9fawxymMfmsy2hrH76PvAMymrNA28irkfiLhBF07tIngrD9/ToWmRLUlfoHRTHFkPvY7Sh3sT8HzbY00X7WdMS9Bwp6/eldOqW3H2PNbdIrNBHCO3Be2dDHC57WZ7eEu62PYze+4vRkkKzxznZf2Oqen7oqfdX1C6ez9qe32VAo4Xtv5dSKUWVh1zsu2HdT9PlmHsPvpT/eo4vn5v2Z1zPOWM/BQGtBCmepHnnWl08Yjpqk2o7Fn9Rdvfdpnud6+kn9tuUaDtWEnfo+xm9RNJ76GUat6KtvtK/Ellb+TTKAvV5gCoFOVrVRCv45cqexMfVe/vDPyiVePjLTKVhNsuNF3V9tGS9gGw/YCkQfzNPqdOVV6OkiNuA97kPmw4NHRJwY3q+szH0rY/NOgggGmSNnbdx0Flo/TOlqBNSldX9wNbqpR6+A+Xnd9Wb9Gw7Y9KeiPlA/CJlEqpe1G2KN2tRQzVm4CPAPtQum/2rseXpk1hwC7bH6gfzJ3dxg62/ZOGIRxLSYpz6v3eBZVNFpr2uLv253eKV27CYCoLHwq83fZZNY7nU65gJv1Kdhi7j2YCH+Xh+742m4Mt6dOUcYyTWrU5RhwbUd5sy1L+8O4A3gJcCmxr++hGcfTW2nkN8Frgpx6iRXRTTZ2Wuq7tU1T2mZjWzy6LEW1vT+nKfBLlqvoo21e3aHuUWDYEvg48g7IL3gxgRzfaBKonjgs9Yp1VvxaaDmNSuJIyA+liespVu+EcbJVyF8tQylXfz9zFQQOp91MXTtG6v7an/e4bXtKLgG8AK9vuy3aDE4jn+7bf0LjN4yi1fX5q++75Pb/PsbyVcrW0su0nSlqXUi560mv3zyeOZShjfjsDq1D69c9oGUONYzplRbMoK5pblc3uJCUoC12XolzRmvI7ucf2WGVaFtjQdR8Bt9geuZNSU7YHNh21Vx28ew11wLusj2m7YrTqbitYz0xfSqMuE0kj3wuidGWtWONptfnScyknKV+XdArlj//E2pXW2jsoM6DOBbB9laRBJOh7KF01d1Cu7Jcc/+l9szFzJ4VsWMc1Wu0vccCI+71l7/tyRj+MSWE/lRrppzLvxjJNq6SqbMG5LvMu329SBK7H8ZQ/utn0/C5akfRUl2qxN/ScEXX8vFEYa1CqoX6X8kcmyl4CI/8Y++1m2zvW2SWvosxOO1jSzyndJ79uGMu9tu/rnCTUM+WWexhsRek+2pgyGeNA24PYIxpJ/0sZa5rD3EkhptGmQx7AOolh7D76AWU/4Evp2ay+5XTQuqp6b8oH0hxgE+AcN14lKekS289o2eaI9g+2vdcgV43W6ZZ7U0ozf8D2HEnXuME+wCPieFj/cB3gfC2wU8v3Rp2qfBuly+JdwNuBy2x/tFH7D1GmiJ9N+QCe50PK9rtbxFFjuRxYz1Pgg1KN6kANY1JotqPXODFcTCkh8HvbG6hsuPMZ2y1r5iPpYEr9pSardqcySWtQFu39DdjOdqt9FDrtn2n7BfN/Zv/VRPlmSmlmAb8Cvtvqg1HSuF2Htg8f7/FJjuUYSkHCG1u1OUYczepADWNSOIwyJ/6yAcZwvu2NVOrkP9dlL9xLbT+9cRyXUWZ4/JnSfTSQaph1Lv7bmLsP7m+Ag1oO6PXEsi2wme2PtG57KpK0MmUVbdPZNqPEsRiwrO07Grd7OqWUxHnM293caqypE0ezOlDDOKawCTBH0iA/CK+vA5k/BU6WdCvQtAJl9bIBtDmab1EqUf5Pvb97PdZkH1yYpxDddcBnWrU7IoaNKe/F81VqIW0DXNF66rKk3wDbUT4fZgM3S/qd7fc2juNIymY/DwLnA8tLOtD2FxuGsX/DtsbTrA7UMCaFQRR8m4ftV9eb+9czkRWAXw4gjr8A1Jklg5rZAbCR593z9jRJfd8/AMYsRDdD0pk0LEQnaT9Kkp4u6WTKbKTTgQ9Lerbt/24RR7WCy17RbwG+b3s/SYO4UlivxrEbZUX1hylJqmVSeBJwpu2rGrY5mp/XE8kvAhdQxlm+24+GhjEpDKy/TNLy9U2+cs/hTn/+skDfNyIfEc92lFk2j6Ns+7cWcDllMKulQe6Deyiwh+0r65n6O2w/t87VP4TSd9vCjpRuiiUotafWqO+VL1GmhrZMCtMlPRbYibLQc1AeVbsWtwe+Yft+Sa3/ftcEDlLZjGo2ZZvYs2zPaRmE7U/Vmz+uM9L6VgdqGJPCicydergkpeTslbT5IDySsk/07J4Yer83nfECfIrSnXaK7Wer1GlvVvOoxweYuw8ulLP2VvvgLmX7SgDb59UBPWx/R2Pv39wPD9h+kNI98KdO37ntf9XZOC19kjK4fHbtynoCMIgz5YOAayllP86sq6ybjinY3g9A0lKUacIfAL7K3HIwzUh6Hj1FNPu1XmLoBppHqvPj3267Sf+1yuTvx9tuWWxtrFhm2Z5Zu2qebfshjajW2ef2NwKus31TXUj3H5SzwqspG4f0/cqpriS+kLmF6Fay/aZ6hnpJq5lqks4FtrT9T0mL2X6oHl+BAW2ROhVJmm67WV0uSfsCm1Gu5C+kTJM9q/VspLHWS/Rjeu7QJwV4eKngRa29ceI4hfIh/FlgVUoX0ka2n9eo/QsolVr/IekFwA8p8+I3AJ5mu+9dN7Wf9iOUOvl/AD5n+876Yfw0l42H+k7SEh5lQ3iVLVwf23LacF2n8GnK4OYvKUXX3mv7B61iqHGsQlnB+3zKlfTZlF0Sm+5nTikOeSJwBmU90SAWejZbLzF0SWFEl8BilA3SV7H90oYxHE7pIz2/VZsj2l+iToNdhlJKQJSKoCsAR7T6o+u9KpH0TUoJkv3r/Tm2N2gRxyhxDWT6Y0/7z6cUozuszopa1vafG7Y/p66feTWlu/N9lMHWJleQPXGcTOnD7ySj3YAtbL+ocRzLU64Wnk9ZTHiz7eeP/6pJj6HZeolhHFPorTvUOQP4ceMYngvsJukvlM3AW0+LPYeSDL9te/d6rNmCoB7TeroDtqYUYeto+t6cItMfO7OQZlIKsB1Gmar7A8qHUiud3/22wDG2b5c03vP75bE9A6wAn5a0c8sAJD0D2Jyyl/tMypTlsxq2/zPKVdJywGWS+r5eYqiSgqRpwHK23z/gUJpdlYxhcUm7As/TKBuauF0dqKOAMyT9ndJV0akV/yTa16yfCtMfAV4NPJsy7RDbf5XUuoDizyVdQfk/eVu9WrmncQwAv5b0OqBTwn1HygB4SwdSpgZ/k7Lj2l2N2z+BsiXoyES0OWV70kk3NN1HnTNSSefY3nTQ8cDD1we0Gnyu3RO7UaYcjqwSaretA7UJZRHOr11LRkt6MqXL5IKGcVxKGcs4ktK1d0bLQfeeOM6zvbHm7jGxDKUfu/Uq85WB220/qLKfwvK2b2rU9p3MnZW3DHNrlC0G3OUGJeZVigB+hrL5Uefv8vHM3ZqzyWr7Ov10n5FjSpKeSSmNM+lb5g7TlcJ5lC6TOSrlko+hdN0AbaukDnp9gO2zgbPr7KNDWrQ5TiwPG8i1/ccBhDLw6Y/V0ZIOAlasayXeBHynRcOStrJ9Wu/V44huoyZ/I54apeW/SOmyWcd1c6E6tvCl+rX3OK+dTKuNNsnA9sV17cSkG6Yrhc6Z12E9h7trBBqfHf+BsgfwPOsD3IfiVmO0/7A//l4tE+RU1nr6Y0+7L6anGJ3tkxu1+4m6evmwUR5u+jfSE1NnW1BTpoL+tFG7VwFPHjnbp3ZBX2F73VZxjNWWpKttP2my2xymK4VH15lHlzA3GXS0zoz32/4/SYvVOemnS/pqw/ZfSJmXP9qlZ+s9cKeEOgV1P+YW5TuDsoir+W50NQk0SQQj2t2vfm+1cHBckv6HUmbiqHroPyW92PY7GjTv0aZ/1u60lp8XsyS91fY8V4sqJUhm96PBYUoK05i7F/FIrZPCbSpVDs8CjpB0M9BsAGuq/fFPEYdSThh2qvd3p/QfNyln3tOP/rCHaLRVq+azgtv2l/sdwwhbUdaKGLpTuS9t1PZlkt7gESuGJb0euKJRDADvAX5SJ0B0ksBMYHHKpIRJN3TdR4OOA6AOHv6LMnD2ekp9ldUanQH1xrEaZTDtcbZfplIcbtNBjzMMwmjrIga5VmIQ6nRYKNNhN2LuJIRXUmr3Ny2BUgdZ3+G5hRvXokwCmPTB1VHaXp1yxfwv5v0wXgp4te0b+h3DiHi2BDobYl1q+7R+tTVMVwoDmWg9Gtt3S3o2sCtlMcy1wLEDCOV71NkU9f4fgR9RCsENm39Jen4dhEfSZswtV9yUSumV7ipe2xe2aNf2J2r7ZwIb9gyw7k9Zz9PEiLn5l9e5+aas7zmvRQz1Q/+5KluDdiaAnGT71BbtjxLP6ZSpsX03TElh60EHUKda7lK//k75AJbtLQYU0qq2j5a0D0CdstuqOulU8zbg8Dq2IErF2nF3AOsHSR+nnCh0xnW+J+kY259uGMZqwH099++rx1r5UsO2xlXPyPt2Vj4VDU1ScIPiahNwBWUc4RW2rwaQ1HTjkhHuVqkv0+mz3YQBDKxOBS6lkNev0w4ZVIkLyvqR9W3fAyDpc5QiaC2TwveB8yT9pN5/NQ1XvNs+o3O7dhmta/sUlUqlQ/OZNSiLDTqAIbMDZRXi6ZK+I2lrBtut9T5Kv/ETJf2W8mHwrgHGMzCSVpD0ZcpZ4WmSDqhXDa39lXk3PFoCaNp/7bKhz56Ukh8PAG+03Xw3urpO41jKGhIoO+P9tHUcw2ZoBpqnkjrQ/CpKN9JWlA/jn9j+9QBimU4ZWBRwZauVmlONpB9TZh91zoh3p5yxN5l91BPHTymDvCdTruBeTOlHvx7AfSiVPEoM76bsHXAc5X2xPfAd21/vd9sj4pgDbAyca/vZ9diUqDC8KEtSGDBJK1H6kHe23WTcY6xFax3DuHhtqsw+kjTuOIbtvnfjqGy9uWlP2ZFBldo412UXvAvrIs/pwAWt4xg26Z8bMNu3AgfXr1aOpfRTz6n3Ry7kG7qkwBSZfdTiQ38CxLzboT7IYLo5z5D0EWCpusr77cDPBhDHUMmVwhCStD3wOspq0eOBozoD38NK0vqUbrzOOMKtlL2bm25YL2ldyqZH6zFvscRmW7XWRWx7AJ2B5u2B79n+aqsYahwC3kJPyQ/gu6OtNI7Jk6QwxHrGNnYGVqFUfzxj/FctWkas4u1U5YRSLNGtV/FKOptSbuMrlEVjewKL2f544zg6ayWg1Bxqslaip/1plEVaT23ZbqT7aNjdQ5mCegelUuuS4z99kdSpyNlZxXs8JTm8nkYLpUZYyvapklRX8u4vaTbQNCm4lC1vVrp8lPYflHSlpDU9BfYzHyZJCkOortJ8HWVmxynAgbZnDTaqwZgqq3h73KuyHehVkt5JmY667ADimApWAi6tK5p7y9xP+m5jMVe6j4aQpIeAiygboZsRhdhaTHucaiRdCTzLdVN2SUsAF9l+SuM4NqLsrbEi8CnKGMcXPMq+E4sqlZ33VuPhJ62bAzcOY22ulnKlMJzeRPvKsFPdyFW821NqQzVl+/x68y7KeMIw+iqj7zb2D0oBxySFPsqVQkRVB1c3r3fPbDm4qrIb4JiGqctE0vm2NxrjsSxe67NcKQwhlT2an9CpFS/pWGDl+vCn+1mWdyob8ODqpsB1lA1lzmUKVfUdgBXHeWypVkEMq9Q+Gk6fAHoHlp8CfADYH/jgIAIKHgN8hFIz/0BKeYu/2z5j2KYJU3cbG3mwn7uNxVzpPhpCIy/PJR3XqfEj6be2NxtcdFEHuXehbB7/CdvfGHBITdXNn35CKdn9sN3GbN80qNiGQZLCEBrEZuAxfzUZbEtJCGtTKtge2nqXr6mi5W5jMVfGFIbTFZK2tT3PPHxJrwCuHFBMQ03S9ykfgCdRrg4uGXBIA9dyt7GYK1cKQ6jOAz8R+B1zB1afAzyPsgHQHwcV27Cqa0c6C7R6/yhFKbexfPuoYhglKQyhmhQeAzyZufvPXkrZo/lG238aVGwRMVhJCkNI0s8ZfXHQM4HP2H7lYCKLiEHLlNThtNrIhABQj63dPpyImCqSFIbTiuM8lsVBEUMsSWE4ZXFQRIwqYwpDKIuDImIsSQpDLIuDImKkJIWIiOjKmEJERHQlKURERFeSQsQoJK0iaU79uknSDT33Fx/jNf8p6Q319hslPa7nsfdIWrpV/BELKmMKEfMhaX/gLttfegSv+Q3wftuz6v1rgZm2//4IfsY02w8+smgjFk6uFCImZjFJswEkrS/Jktas9/8kaWlJ+0t6v6QdKVN8j6hXFnsDjwNOl3R6fc1LJJ0j6QJJx0hath6/VtLnJV0AvHYg/9IYakkKERPzELCkpOUp+zjPAjaXtBZws+1/dp5o+9j6+G62N7B9IPBXYEvbW0paFdgXeJHtDetz39fT1v/Z3tD2D9v80yLmyn4KERP3O2Az4AXAZ4BtKKWtz3qEP2cTYD3gt5KgLBo8p+fxHy10pBELKEkhYuLOpFwlrAUcD3yIsvfBieO9aBQCTra9yxiP3z3G8Yi+S/dRxMSdBbweuMr2Q8A/gJcDZ4/y3DuB5ca4/3tgs7qvBZKWkfTkvkUd8QgkKURMkO1rKWf5Z9ZDZwO32b51lKd/D/h2HWheCjgY+KWk023fArwROErSRZSuo6f2OfyICcmU1IiI6MqVQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdP1/sGpHRnGCXDEAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots()\n", + "ax.tick_params(axis='x', labelsize=10)\n", + "ax.tick_params(axis='y', labelsize=10)\n", + "ax.set_xlabel('Twitter', fontsize=10)\n", + "ax.set_ylabel('Number of tweets' , fontsize=10)\n", + "ax.set_title('Top 10 Tweeters', fontsize=10)\n", + "tweets_df.original_author.value_counts()[:10].plot(ax=ax, kind='bar', color='orange')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### locations" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unknown 2805\n", + "Việt Nam 116\n", + "India 107\n", + "United States 72\n", + "Turn on 🔔 57\n", + " ... \n", + "New York, New York 1\n", + "Fontaines-Saint-Martin, France 1\n", + "🇺🇲🇺🇲🇺🇲 1\n", + "Lisbon 1\n", + "🇺🇲🇷🇺🇺🇦🇫🇷🇦🇪🇮🇱🏳️‍🌈 1\n", + "Name: place, Length: 1809, dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df.place.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Most locations are unknown" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAFOCAYAAAB3xTGMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAhaklEQVR4nO3deZhkZX328e/NACLrgIwEhmVA8c1rRJCMohKJSGRVUQkKkojEVzBCRM0CqBGCVxQVjGIMiDA6RIEgCA4IEiAs4sYMwzKsMrKEnUEIDCDEgfv94zwNxdDdp7q7qk5V9/25rrrqnOecOufXxdC/ftYj20RERIxmhaYDiIiI/pdkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySImLUkvk3RNed0v6Z6W/ZUncN3XS1om6c9HOP74+KMe9b7TJX2sZX8DSWd0414Ry1PmWcRUIOkI4HHbR0/wOtOAC4GngDm2X/TLWtLjtlefyH1GuPcs4Fzbr+n0tSPqpGYRU4qkHSRdLWmRpDmSXlLK75D05VJ+paRXjnCJvwHOBB4c4323kvRLSddJOkvS2qX8lZIuknStpIWSXiFpdUkXl/1FknYvlzkKeEWpGX1F0ixJ15frrCLpO+X8qyVtX8o/JOmHkn4i6VZJXy7l0yR9V9L15TOfHPOXGVNKkkVMJasA3wXeb3sLYEXgr1uOP1rK/xX42vIfljQTeA9w3DjufTJwiO3XAouAw0v594Fv2t4SeDNwH1Wt5T22twa2B46RJOBQ4De2t7L998td/0DAJf69gbmSVinHtgLeD2wBvF/SRqVspu3XlM98Zxw/U0whSRYxlUwDbrf967I/F9iu5fipLe9vGubzX6P6hf/sWG4qaS1guu3LWu8raQ2qX9hnAdh+yvaTgIAvSLoOuAiYCaxXc5s/Ab5XrnMzcCfwqnLsYtuP2n4KuBHYBLgN2EzSNyTtDDw2lp8ppp4Vmw4goo94hO0hs4HTqj/yWRfYVdIy22d3OI59gBnAH9v+vaQ7qGpF4/V0y/YzwIq2H5G0JbAT8FHgfcBfTeAeMcmlZhFTyTPArJb+iL8ELms5/v6W918s/2Hbm9qeZXsWcAbwsXYShe1HgUckvaX1vraXAndLejeApJdIWhVYC3iwJIrtqWoCAEuBNUa4zU+pkgySXgVsDNwyUkyS1gVWsH0m8Flg67qfI6a21CxiKnkK2A/4gaQVgfnA8S3H1y5NP09TtfuP16qS7m7Z/yqwL3B8SQa3lTigShzfknQk8HtgT6p+jHMkLQIWADcD2P6tpJ+VTu3zgW+23OPfgOPKZ5YBH7L9dKkFDWcm8B1JQ38wHjaBnzemgAydjaAaDQXMtv1Q07FE9KM0Q0VERK3ULCIiolZqFhERUSvJIiIiaiVZRERErUk5dHbdddf1rFmzmg4jImKgXHXVVQ/ZnjHcsUmZLGbNmsWCBQuaDiMiYqBIunOkY2mGioiIWkkWERFRK8kiIiJqJVlEREStJIuIiKiVZBEREbWSLCIiolaSRURE1JqUk/K64pQRHyLTXz6QVYQjovNSs4iIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUatryULSRpIukXSjpBskHVzKj5B0j6RrymvXls8cJmmxpFsk7dRSvnMpWyzp0G7FHBERw+vmM7iXAX9re6GkNYCrJF1Yjv2L7aNbT5b0amAv4I+ADYCLJL2qHP4m8HbgbmC+pHm2b+xi7BER0aJrycL2fcB9ZXuppJuAmaN8ZHfgNNtPA7dLWgy8oRxbbPs2AEmnlXOTLCIieqQnfRaSZgGvA35Vig6SdJ2kOZLWLmUzgbtaPnZ3KRupPCIieqTryULS6sCZwCdsPwYcB7wC2Iqq5nFMh+6zv6QFkhYsWbKkE5eMiIiiq8lC0kpUieL7tn8IYPsB28/Yfhb4Ns83Nd0DbNTy8Q1L2UjlL2D7BNuzbc+eMWNG53+YiIgprJujoQScBNxk+6st5eu3nPYe4PqyPQ/YS9JLJG0KbA5cCcwHNpe0qaSVqTrB53Ur7oiIeLFujobaFvhLYJGka0rZp4G9JW0FGLgDOADA9g2STqfquF4GHGj7GQBJBwEXANOAObZv6GLcERGxnG6OhroC0DCHzhvlM/8M/PMw5eeN9rmIiOiuzOCOiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqDWmZCFpbUmv7VYwERHRn2qThaRLJa0paR1gIfBtSV/tfmgREdEv2qlZrGX7MeC9wMm2twH+rLthRUREP2knWawoaX3gfcC5XY4nIiL6UDvJ4kjgAmCx7fmSNgNu7W5YERHRT1asO8H2D4AftOzfBuzRzaAiIqK/1CYLSTOAjwCzWs+3/VfdCysiIvpJO81QPwLWAi4CftzyGpWkjSRdIulGSTdIOriUryPpQkm3lve1S7kkHStpsaTrJG3dcq19y/m3Stp3PD9oRESMX23NAljV9iHjuPYy4G9tL5S0BnCVpAuBDwEX2z5K0qHAocAhwC7A5uW1DXAcsE0Zsns4MBtwuc4824+MI6aIiBiHdmoW50radawXtn2f7YVleylwEzAT2B2YW06bC7y7bO9ONTTXtn8JTC+jsHYCLrT9cEkQFwI7jzWeiIgYv3aSxcFUCeMpSUvL67Gx3ETSLOB1wK+A9WzfVw7dD6xXtmcCd7V87O5SNlJ5RET0SDujodaYyA0krQ6cCXzC9mOSWq9tSZ7I9Vvusz+wP8DGG2/ciUtGRETR1tpQkt4l6ejyeke7F5e0ElWi+L7tH5biB0rzEuX9wVJ+D7BRy8c3LGUjlb+A7RNsz7Y9e8aMGe2GGBERbWhnbaijqJqibiyvgyV9sY3PCTgJuMl261pS84ChEU37Uo22Gir/YBkV9Ubg0dJcdQGwY1nEcG1gx1IWERE90s5oqF2BrWw/CyBpLnA1cFjN57YF/hJYJOmaUvZp4CjgdEkfBu6kWkYE4Lxyr8XAk8B+ALYflvR5YH4570jbD7cRd0REdEg7yQJgOjD0C3qtdj5g+wpAIxzeYZjzDRw4wrXmAHPauW9ERHReO8nii8DVki6h+uW/HdXciIiImCLaGQ11qqRLgdeXokNs39/VqCIioq+M2MEt6Q/L+9bA+lTzG+4GNmhdiiMiIia/0WoWn6Kat3DMMMcMvK0rEUVERN8ZMVnY3r9s7mL7qdZjklbpalQREdFX2pmU9/M2yyIiYpIasWYh6Q+o1mB6qaTX8fww2DWBVXsQW0RE9InR+ix2olpOfEOgdQb2UqrJdRERMUWM1mcxF5graQ/bZ/YwpoiI6DPtzLM4U9JuwB8Bq7SUH9nNwCIion+0s5Dg8cD7gb+h6rfYE9iky3FFREQfaWc01JttfxB4xPY/AW8CXtXdsCIiop+0kyx+V96flLQB8HuqGd0RETFFtLOQ4LmSpgNfARZSzd4+sZtBRUREf2mng/vzZfNMSecCq9h+tLthRUREP2mng/vAUrPA9tPACpI+1u3AIiKif7TTZ/ER2/8ztGP7EeAjXYsoIiL6TjvJYlp5njYAkqYBK3cvpIiI6DftdHD/BPgPSd8q+weUsoiImCLaSRaHUCWIvy77F5LRUBERU0o7o6GelXQScAXVsNlbbD/T9cgiIqJv1CYLSW8F5gJ3UC33sZGkfW1f3tXIIiKib7TTDHUMsKPtWwAkvQo4FfjjbgYWERH9o53RUCsNJQoA278GVupeSBER0W/aqVkskHQi8L2yvw+woHshRUREv2knWfw1cCDw8bL/U+DfuhZRRET0nXZGQz1N9VjVr9adGxERk9OIyULSIqqhssOy/dquRBQREX1ntJrFOyZyYUlzyjUetP2aUnYE1bpSS8ppn7Z9Xjl2GPBh4Bng47YvKOU7A18HpgEn2j5qInFFRMTYjZgsbN85wWt/F/hX4OTlyv/F9tGtBZJeDexF9ZzvDYCLyhBdgG8CbwfuBuZLmmf7xgnGFhERY9BOB/e42L5c0qw2T98dOK30j9wuaTHwhnJsse3bACSdVs5NsoiI6KF25ll02kGSrpM0R9LapWwmcFfLOXeXspHKX0TS/pIWSFqwZMmS4U6JiIhxGjFZSLq4vH+pg/c7DngFsBVwH9Xs8I6wfYLt2bZnz5gxo1OXjYgIRm+GWl/Sm4F3leYftR60vXCsN7P9wNC2pG8D55bde4CNWk7dsJQxSnlERPTIaMnic8A/Uv2CXn6OhYG3jfVmkta3fV/ZfQ9wfdmeB5wi6atUHdybA1dSJajNJW1KlST2Aj4w1vtGRMTEjDYa6gzgDEn/aPvzY72wpFOBtwLrSrobOBx4q6StqJLNHVTPycD2DZJOp+q4XgYcOLQMuqSDgAuohs7OsX3DWGOJiIiJkT3ivLvnT5LeBWxXdi+1fe5o5zdt9uzZXrCgw8tXnaL6c/rBB+r/e0ZEDEfSVbZnD3esdjSUpC8CB1P91X8jcLCkL3Q2xIiI6GftzLPYDdjK9rMAkuYCVwOf7mZgERHRP9qdZzG9ZXutLsQRERF9rJ2axReBqyVdQjU6aTvg0K5GFRERfaWdJcpPlXQp8PpSdIjt+7saVURE9JW21oYqcyPmdTmWiIjoU02sDRUREQMmySIiImqNmiwkTZN0c6+CiYiI/jRqsihLbtwiaeMexRMREX2onQ7utYEbJF0JPDFUaPtdXYsqIiL6SjvJ4h+7HkVERPS1duZZXCZpE2Bz2xdJWpVqBdiIiJgi2llI8CPAGcC3StFM4OwuxhQREX2mnaGzBwLbAo8B2L4VeHk3g4qIiP7STrJ42vb/Du1IWpHq4UURETFFtJMsLpP0aeClkt4O/AA4p7thRUREP2knWRwKLAEWUT0G9Tzgs90MKiIi+ks7o6GeLQ88+hVV89MtbudZrBERMWnUJgtJuwHHA7+hep7FppIOsH1+t4OLiIj+0M6kvGOA7W0vBpD0CuDHQJJFRMQU0U6fxdKhRFHcBiztUjwREdGHRqxZSHpv2Vwg6TzgdKo+iz2B+T2ILSIi+sRozVDvbNl+APjTsr0EeGnXIoqIiL4zYrKwvV8vA4mIiP7VzmioTYG/AWa1np8lyiMipo52RkOdDZxENWv72a5GExERfamdZPGU7WO7HklERPStdobOfl3S4ZLeJGnroVfdhyTNkfSgpOtbytaRdKGkW8v72qVcko6VtFjSda3Xl7RvOf9WSfuO66eMiIgJaSdZbAF8BDiKaoLeMcDRbXzuu8DOy5UdClxse3Pg4rIPsAuweXntDxwHVXIBDge2Ad4AHD6UYCIionfaaYbaE9isdZnydti+XNKs5Yp3B95atucClwKHlPKTy5pTv5Q0XdL65dwLbT8MIOlCqgR06lhiiYiIiWmnZnE9ML1D91vP9n1l+35gvbI9E7ir5by7S9lI5S8iaX9JCyQtWLJkSYfCjYgIaK9mMR24WdJ84OmhwokOnbVtSR1bvdb2CcAJALNnz86quBERHdROsji8g/d7QNL6tu8rzUwPlvJ7gI1aztuwlN3D881WQ+WXdjCeiIhoQzvPs7isg/ebB+xL1Vm+L/CjlvKDJJ1G1Zn9aEkoFwBfaOnU3hE4rIPxREREG9qZwb2U55+5vTKwEvCE7TVrPncqVa1gXUl3U9VQjgJOl/Rh4E7gfeX084BdgcXAk8B+ALYflvR5nl+48Mihzu6IiOiddmoWawxtSxLVyKU3tvG5vUc4tMMw5xo4cITrzAHm1N0vIiK6p53RUM9x5Wxgp+6EExER/aidZqj3tuyuAMwGnupaRBER0XfaGQ3V+lyLZcAdVE1RERExRbTTZ5HnWkRETHGjPVb1c6N8zrY/34V4IiKiD41Ws3himLLVgA8DLwOSLCIipojRHqt6zNC2pDWAg6nmP5xGtfJsRERMEaP2WZQlwj8F7EO1SuzWth/pRWAREdE/Ruuz+ArwXqrF+baw/XjPooqIiL4y2qS8vwU2AD4L3CvpsfJaKumx3oQXERH9YLQ+izHN7o6IiMkrCSEiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImo1kiwk3SFpkaRrJC0oZetIulDSreV97VIuScdKWizpOklbNxFzRMRU1mTNYnvbW9meXfYPBS62vTlwcdkH2AXYvLz2B47reaQREVNcPzVD7Q7MLdtzgXe3lJ/syi+B6ZLWbyC+iIgpq6lkYeA/JV0laf9Stp7t+8r2/cB6ZXsmcFfLZ+8uZS8gaX9JCyQtWLJkSbfijoiYklZs6L5/YvseSS8HLpR0c+tB25bksVzQ9gnACQCzZ88e02cjImJ0jdQsbN9T3h8EzgLeADww1LxU3h8sp98DbNTy8Q1LWURE9EjPk4Wk1SStMbQN7AhcD8wD9i2n7Qv8qGzPAz5YRkW9EXi0pbkqIiJ6oIlmqPWAsyQN3f8U2z+RNB84XdKHgTuB95XzzwN2BRYDTwL79T7kiIiprefJwvZtwJbDlP8W2GGYcgMH9iC0iIgYQT8NnY2IiD6VZBEREbWSLCIiolaSRURE1EqyiIiIWkkWERFRK8kiIiJqJVlEREStJIuIiKiVZBEREbWSLCIiolaSRURE1EqyiIiIWkkWERFRK8kiIiJqJVlEREStJIuIiKiVZBEREbWSLCIiolaSRURE1EqyiIiIWkkWERFRK8kiIiJqrdh0ADFFnaKmI2jPB9x0BO3J9xldlppFRETUSrKIiIhaaYaKiGiVJr1hpWYRERG1BiZZSNpZ0i2SFks6tOl4IiKmkoFIFpKmAd8EdgFeDewt6dXNRhURMXUMRLIA3gAstn2b7f8FTgN2bzimiIgpY1A6uGcCd7Xs3w1s03qCpP2B/cvu45Ju6VFsE7Eu8FBHr7jPgHTOdUe+z87K99k5g/JdbjLSgUFJFrVsnwCc0HQcYyFpge3ZTccxWeT77Kx8n50zGb7LQWmGugfYqGV/w1IWERE9MCjJYj6wuaRNJa0M7AXMazimiIgpYyCaoWwvk3QQcAEwDZhj+4aGw+qEgWo2GwD5Pjsr32fnDPx3KTsLe0VExOgGpRkqIiIalGQRERG1kiwiIrpI0gqS1mw6jolKsoiBJ2lNSesMvZqOJ0LSKeXf5WrA9cCNkv6+6bgmIh3cPSbpJcAewCxaRqPZPrKpmAaVpAOAfwKeAob+Idv2Zs1FNdgkzQAOoVqDbZWhcttvayyoASTpGttbSdoH2Bo4FLjK9msbDm3cBmLo7CTzI+BR4Crg6YZjGXR/B7zGdmeXUZjavg/8B7Ab8FFgX2BJoxENppUkrQS8G/hX27+XNNB/mSdZ9N6GtnduOohJ4jfAk00HMcm8zPZJkg62fRlwmaT5TQc1gL4F3AFcC1wuaRPgsUYjmqAki977uaQtbC9qOpBJ4DCq7/NXtNTSbH+8uZAG3u/L+32SdgPuBdIPNEa2jwWObSm6U9L2TcXTCemz6DFJNwKvBG6n+gUnqnb2gW3LbIqkK4ErgEXAs0Pltuc2FtSAk/QO4KdUa7F9A1gT+CfbWV5nDCStB3wB2MD2LuX5O2+yfVLDoY1bkkWPleroi9i+s9exDDpJV9t+XdNxRCxP0vnAd4DP2N5S0orA1ba3aDi0cUszVO/9P+By4Oe2n2g6mAF3fnmOyTm8sBnq4eZCGkyS/sH2lyV9g+dHlj0nTXtjtq7t0yUdBs+tb/dM00FNRJJF790G7A0cK2kpVZX/cts/ajasgbR3eT+spcxAhs6O3U3lfUGjUUweT0h6GSXxSnoj1SjIgZVmqIZI+gPgfVTDP9e2vUbDIUVEh0jamqrP5zVUk/JmAHvavrbRwCYgyaLHJJ1INeHpAapaxRXAQtvLGg1sQEl6DS+eQHZycxENJknnMEzz0xDb7+phOAOvTL59Bvg/VINYbgFWsD2wc6vSDNV7L6N6Jsf/AA8DDyVRjI+kw4G3UiWL84BdqJJvksXYHV3e3wv8AfC9sr831R82MTa/sL018NxzdyQtpJrNPZCSLHrM9nsAJP1fYCfgEknTbG/YbGQD6c+BLalGmexXhit+r+YzMYwyAQ9Jxyz3rOhzJKUfo02leXkm8FJJr6OqVUA1BHnVxgLrgCSLHivj2N8CbAdMB/6Lqjkqxu53tp+VtKys6vkgL3xWe4zdapI2s30bgKRNgdUajmmQ7AR8CNgQ+GpL+VLg000E1ClJFr23M1Vy+Lrte5sOZsAtkDQd+DbVWluPA79oNKLB90ngUkm3Uf1VvAlwQLMhDY4yIXSupD1sn9l0PJ2UDu4GlOaS15fdK20/2GQ8k4GkWcCatq9rOpZBVzpn/7Ds3jzInbJNKsul/BEvHHwxsKtLJ1n0mKQ9qToTL6X6y+0twN/bPqPJuAaJpI1HO277v3sVy2Qk6c28eAn9DBoYA0nHU/VRbA+cSNW/dqXtDzca2AQkWfSYpGuBtw/VJsrzAy6yvWWzkQ0OSYuohnmqpdhUY9lfbntaI4FNApL+HXgFcA3V0E+o1i7LDO4xkHSd7de2vK8OnG/7LU3HNl7ps+i9FZZrdvoteWLhmCy/vk5pgjoE+DOqxdti/GYDr3b+ipyo35X3JyVtQPX/+foNxjNhSRa99xNJFwCnlv33U80RiDGStDnwGWAb4Bjg47Z/P/qnosb1VPMs7ms6kAF3bhl88RVgIVXN98RGI5qgNEM1QNIewLZl96e2z2oynkFTZm1/hqrz8MvAqbYHepG2fiHpEmAr4EpeuDhjZnCPgaSXDA0MKAMGVgGeGuTBAkkWMXDK6p13AT/m+Xb156R9ffwk/elw5UOT9qI9khaWGdyjlg2SNEP1mKT3Al8CXk7VQTv08KM1Gw1ssPxV0wFMVkkKEzOZZ3CnZtFjkhYD77R9U+3JET1Slssf7pdB/pgZA0n7Us3gng3M5/lksRT4ru0fNhTahCVZ9Jikn9netv7MiBhUmcEdEybp61SjTc7mhR2IA/sXR0RUJL0TuG7oMcmSPgfsAdwJHGz79ibjm4iM7++9NYEngR2Bd5bXOxqNaEBJelENbbiyiB76Z2AJPLdo6F9Q9bHNA45vMK4JS82ixySts/wzoiVtOsh/cTRlMo44icEm6dqh1RgkzQFusf2lsj/Q/zYzGqr3zpG0i+3H4LnnWvyA6vGL0QZJbwLeDMyQ9KmWQ2tSPVgqoikqS3s8CewA/FvLsVWG/8hgSLLovS9QJYzdqB65eDKwT7MhDZyVgdWp/v22Prv8MaoF2yKa8jWqdbUeA26yvQCgDKMd6FnxaYZqgKR3A/9A9YtuD9u/bjaiwSRpE9t3SlrV9pNNxxMBIGkm1Tyqa20/W8rWB1Ya5BWRkyx6RNI3eOE49h2A3wB3QGYdj4Uk2XZpjjoJWN32xpK2BA6w/bGGQ4yYdNIM1TvLP8f4qkaiGHBlSffPAx+lqvLvRDXSBNvXStqunLfxIP8VF9Fvkix6pDxuMSZuP+DcoR3bd0mtj7VgWXm/gwwNj+iYJIseK/MAjqB6tvGKPL+cwmZNxjVAjgE+R5Uw7ipPdbOklYCDgZsBbCdRRM9JWme048sPmx8k6bPoMUk3A5+kaoZ6bsVU279tLKgBJWld4OtUDz0S8J9Us2TzXUYjJN3O809x3Bh4pGxPB/7b9qbNRTcxSRY9JulXtrdpOo6I6B5J3wbOsn1e2d8FeLftA5qNbPySLHpM0lFUE8d+yAvXhlrYWFADRtI/2P7yMCPMgIwsi+ZJWjTM439fVDZI0mfRe0O1ij8u76L6hfe2ZsIZSDeW9+VHmEX0i3slfRb4XtnfB7i3wXgmLMmi9y4dpizVu7HZRdIjGWEWfWxv4HDgLKr/vy8vZQMryaL3Hm/ZXoVqxdk8CGlsfg0cXWbFnk71DO6rG44p4jll1NPBklaz/UTT8XRC+iwaVh7mfoHttzYdy6CRtAmwV3m9FDiVKnFk+ZRoVBnSfSKTaHWBJIuGSVobmG/7lU3HMsjKQm1zgNfazsqz0ShJv6Ja1HKe7deVsuttD+zq0mmG6jFJi3i+j2IaMAM4srmIBpekFYFdqGoWO1D1Bx3RYEgRzxlmdYFnRjp3ECRZ9F7rU/GWAQ/YXjbSyfFikt5O1Vm4K3AlcBqw/2RpG45JYbjVBQa6bzLNUDFwJP0XcApwpu1Hmo4nYnkjrC7w8Sz3ERERz5G0re2f1ZUNkiSLiIgOm4zPh0+fRUREh0zm58MnWUREdM6kfT58mqEiIjps6PnwTcfRSUkWEREdIulrtj8h6RyGXxH5XQ2E1RFphoqI6Jx/L+9HNxpFF6RmERERtVKziIjoMEnbUi09swnV71kBtr1Zk3FNRGoWEREdJulm4JPAVbSsCTXIz4dPzSIiovMetX1+00F0UmoWEREdJukoqkl4PwSeHiq3vbCxoCYoySIiosMkXTJMsW2/refBdEiSRURE1EqfRUREhyy3HhRUE/MeAq6wfXsDIXXMCk0HEBExiayx3GtNYDZwvqS9mgxsotIMFRHRZZLWAS4a5CXKU7OIiOiy8oQ81Z7Yx5IsIiK6TNL2wEA/Ajgd3BERHSJpES9ebXYd4F7gg72PqHPSZxER0SGSNlmuyMBvbT/RRDydlGQRERG10mcRERG1kiwiIqJWkkVEDUkvk3RNed0v6Z6W/ZVH+MxHJX2wbH9I0gYtxz4hadVexR/RCemziBgDSUcAj9tu+7GZki4F/s72grJ/BzDb9kNjuMY028/UnxnRHalZRIzdCpKuApC0pSRL2rjs/0bSqpKOkPR3kv6carmH75eayMHABsAlQyuTStpR0i8kLZT0A0mrl/I7JH1J0kJgT0kfl3SjpOskndbITx5TVuZZRIzds8AqktYE3gIsAN4i6QrgQdtPStVkXdtnSDqIF9YsPglsb/shSesCnwX+zPYTkg4BPgUcWe7126ElIiTdC2xq+2lJ03v200aQZBExXj8HtgW2A74A7Ey1nMNPx3idNwKvBn5WEszKwC9ajv9Hy/Z1VDWUs4GzxxN0xHglWUSMz+VUtYpNgB8Bh1BNwPrxGK8j4ELbe49wvHUy125UyemdwGckbWF72RjvFzEu6bOIGJ+fAn8B3Gr7WeBhYFfgimHOXUq1XPVw+78EtpX0SgBJq0l61fIXkLQCsJHtS6gS01rA6h36WSJqpWYRMQ6271DVbnR5KboC2ND2cIvFfRc4XtLvgDcBJwA/kXSv7e0lfQg4VdJLyvmfBX693DWmAd+TtBZVbeRY2//TyZ8pYjQZOhsREbXSDBUREbWSLCIiolaSRURE1EqyiIiIWkkWERFRK8kiIiJqJVlEREStJIuIiKj1/wG1tv0FCCzQQwAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# top 4 locations of users\n", + "fig, ax = plt.subplots()\n", + "ax.tick_params(axis='x', labelsize=10)\n", + "ax.tick_params(axis='y', labelsize=10)\n", + "ax.set_xlabel('Twitters', fontsize=10)\n", + "ax.set_ylabel('Number of locations' , fontsize=10)\n", + "ax.set_title('Top 4 Locations', fontsize=10)\n", + "tweets_df.place.value_counts()[:4].plot(ax=ax, kind='bar', color='orange')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### source" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Twitter Web App 2717\n", + "Twitter for Android 2360\n", + "Twitter for iPhone 1531\n", + "Twitter for iPad 191\n", + "TweetDeck 127\n", + "Name: source, dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df[\"source\"].value_counts()[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The top five sources of tweets" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAFmCAYAAACcOrbXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAngklEQVR4nO3deZgkVZ3u8e9LI7JvgsjeiO2CC4stgrigKCAouKACLsBwRR0YUcYZ0UFB3PCKzMjIoCggXh0QlE1hREBWZWsWWUSkRRBalmYRWhCYhvf+Eaeo7O6qimzIzMiueD/PU09FnIjM/FU93fWLOOcX58g2ERERE1ms6QAiImL4JVlEREStJIuIiKiVZBEREbWSLCIiolaSRURE1Fq86QAiBkXSc4Bzy+7zgCeA2WV/U9uPL+T7bQmcBvypNJ1s++BnHmnE8EmyiNawfR+wEYCkg4C/2T70Gb7tRbbf9gzfY6FIWtz23EF+ZkS6oaLVJG0l6WpJ10k6RtKzS/utkv5vab9c0gue5vtPkfR9SdeX9/pkad9I0qWSrpV0iqSVSvv5kqaX7VUk3Vq2d5d0uqRfAedKWlbSseU9r5X07nLe1pIukXSVpJMkLVvaD5H0u3LuM02Q0UJJFtFmSwLfB95n++VUd9of6zj+YGn/FvAf47zH5pJ+K+l/JL10jOMbAWvafll5r2NL+w+AT9t+BXAdcGAX8W4C7GT7DcDnRuIr7/ErSasABwBvtr0JMAPYr3S/vRN4aTn3S118VsQ8kiyizaYAf7L9h7J/HPD6juPHd3zffIzXXwWsa3tD4D+BU8c45xbg+ZL+U9K2wEOSVgBWtH3BOJ87nrNt31+23wwcMXLA9gPAZsAGwK8lXQPsBqwLPAg8Chwt6V3AI118VsQ8kiwixudxtqsG+yHbfyvbZwLPKlf3nec8AGwInA98FPhezWfOZfT/5ZLzHXu45rWiSigbla8NbO9Zxjc2BX4CvA34Rc37RCwgySLa7Algasd4xAeBCzqOv6/j+yXzv1jS8ySpbG9K9f/pvvnOWQVYzPZPqbqINrH9IPCApNeN8bm3Aq8s2ztNEPvZwN4dn7MScCmwxcjPI2kZSS8s4xYrlIT2SarkFbFQUg0VbfYosAdwkqTFgSuAb3ccX0nStcBjwC5jvH4n4GOS5gJ/B3b2gtM4rwkcK2nkwuwz5ftuwLclLU3VVbVHaT8UOFHSXsAZE8T+JeAISddTJb0v2D5Z0u7A8SMD9VQJag5wmqQlqe4+9pvgfSPGpExRHrGgUoU03fa9TccSMQzSDRUREbVyZxEREbVyZxEREbWSLCIiotakrIZaZZVVPHXq1KbDiIhYpFx55ZX32l51rGOTMllMnTqVGTNmNB1GRMQiRdJt4x1LN1RERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNSalE9w98LU/Sdad2Zwbj1k+6ZDiIjInUVERNRLsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiavUtWUhaW9J5kn4n6QZJ+5b2gyTNknRN+dqu4zWfkTRT0k2Stulo37a0zZS0f79ijoiIsfVz1tm5wD/bvkrScsCVks4ux/7d9qGdJ0vaANgZeCmwBnCOpBeWw0cAbwHuAK6QdLrt3/Ux9oiI6NC3ZGH7TuDOsj1H0o3AmhO8ZEfgBNuPAX+SNBPYtBybafsWAEknlHOTLCIiBmQgYxaSpgIbA5eVpn0kXSvpGEkrlbY1gds7XnZHaRuvff7P2EvSDEkzZs+e3esfISKi1fqeLCQtC/wU+ITth4AjgfWBjajuPL7Ri8+xfZTt6banr7rqqr14y4iIKPq6Up6kZ1Elih/ZPhnA9t0dx78L/LzszgLW7nj5WqWNCdojImIA+lkNJeBo4Ebbh3W0r95x2juB68v26cDOkp4taT1gGnA5cAUwTdJ6kpagGgQ/vV9xR0TEgvp5Z7EF8EHgOknXlLbPArtI2ggwcCvwEQDbN0g6kWrgei6wt+0nACTtA5wFTAGOsX1DH+OOiIj59LMa6mJAYxw6c4LXfBn48hjtZ070uoiI6K88wR0REbWSLCIiolaSRURE1EqyiIiIWkkWERFRq68P5cXkMHX/M5oOAYBbD9m+6RAiWit3FhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNTqW7KQtLak8yT9TtINkvYt7StLOlvSzeX7SqVdkg6XNFPStZI26Xiv3cr5N0varV8xR0TE2Pp5ZzEX+GfbGwCbAXtL2gDYHzjX9jTg3LIP8FZgWvnaCzgSquQCHAi8GtgUOHAkwURExGDUJgtJ75G0XNk+QNLJnVf947F9p+2ryvYc4EZgTWBH4Lhy2nHAO8r2jsAPXLkUWFHS6sA2wNm277f9AHA2sO3C/JAREfHMdHNn8TnbcyS9FngzcDTlqr9bkqYCGwOXAavZvrMcugtYrWyvCdze8bI7Stt47fN/xl6SZkiaMXv27IUJLyIianSTLJ4o37cHjrJ9BrBEtx8gaVngp8AnbD/Uecy2AXf7XhOxfZTt6banr7rqqr14y4iIKLpJFrMkfQd4H3CmpGd3+TokPYsqUfzI9sml+e7SvUT5fs/I5wBrd7x8rdI2XntERAxIN3/03wucBWxj+6/AysC/1L1Ikqi6rG60fVjHodOBkYqm3YDTOto/VKqiNgMeLN1VZwFbS1qpDGxvXdoiImJAFq87wfYjku4BXgvcTFXldHMX770F8EHgOknXlLbPAocAJ0raE7iNKhkBnAlsB8wEHgH2KJ9/v6QvAleU8w62fX8Xnx8RET1SmywkHQhMB14EHAs8C/ghVTIYl+2LAY1zeKsxzjew9zjvdQxwTF2sERHRH910Q70T2AF4GMD2X4Dl+hlUREQMl26SxeOdVUuSlulvSBERMWy6SRYnlmqoFSV9GDgH+G5/w4qIiGHSzQD3oZLeAjxENW7xedtn9z2yiIgYGt0McK8HXDSSICQtJWmq7Vv7HVxERAyHbrqhTgKe7Nh/orRFRERLdJMsFrf9+MhO2e56uo+IiFj0dZMsZkvaYWRH0o7Avf0LKSIihk3tmAXwUeBHkr5V9u+gejI7IiJaYsJkIWkK8DHbm5XZY7H9t4FEFhERQ2PCZGH7ibKORZJERESLddMNdbWk06kqoB4eaeyYcjwiIia5bpLFksB9wJs62gwkWUREtEQ3T3DvMYhAIiJieHXzBPexjLH0qe1/6EtEERExdLrphvp5x/aSVFOW/6U/4URExDDqphvqp537ko4HLu5bRBERMXS6eYJ7ftOA5/Y6kIiIGF7djFnMoRqzUPl+F/DpPscVERFDpJtuqCyhGhHRct0McFMmEnx92T3f9s8nOj8iIiaX2jELSYcA+wK/K1/7SvpKvwOLiIjh0c2dxXbARrafBJB0HHA18Nl+BhYREcOj22qoFTu2V+hDHBERMcS6ubP4KtVkgudRVUS9Hti/r1FFRMRQ6aYa6nhJ5wOvKk2ftn1XX6OKiIih0s0A9xbAQ7ZPB5YH/lXSun2PLCIihkY3YxZHAo9I2hDYD/gj8IO+RhUREUOlm2Qx17aBHYEjbB8B5EG9iIgW6SZZzJH0GeADwBmSFgOeVfciScdIukfS9R1tB0maJema8rVdx7HPSJop6SZJ23S0b1vaZkrKwHpERAO6qYZ6H7ArsKftuyStA3y9i9d9H/gWC3ZZ/bvtQzsbJG0A7Ay8FFgDOEfSC8vhI4C3AHcAV0g63fbvuvj8iJ6buv8ZTYcAwK2HbN90CNEy3VRD3QUc1rH/Z7oYs7B9oaSpXcaxI3CC7ceAP0maCWxajs20fQuApBPKuUkWERED9HSmKH+m9pF0bemmWqm0rQnc3nHOHaVtvPYFSNpL0gxJM2bPnt2PuCMiWmvQyeJIYH1gI+BO4Bu9emPbR9mebnv6qquu2qu3jYgIJkgWks4t37/Wqw+zfbftJ8o8U99ltKtpFrB2x6lrlbbx2iMiYoAmGrNYXdJrgB3KWIE6D9q+amE/TNLqtu8su+8ERiqlTgf+W9JhVAPc04DLy2dOk7QeVZLYmWqwPSIiBmiiZPF54HNUV/OHzXfMwJsmeuOyVveWwCqS7gAOBLaUtFF5/a3ARwBs3yDpRKqB67nA3rafKO+zD3AWMAU4xvYN3f94ERHRC+MmC9s/AX4i6XO2v7iwb2x7lzGaj57g/C8DXx6j/UzgzIX9/IiI6J1uSme/mJXyIiLarZuJBL9KVsqLiGi1bp7g3p6slBcR0WpZKS8iImplpbyIiKiVlfIiIqJWN3cWlAfpTu9zLBERMaSamEgwIiIWMUkWERFRa8JkIWmKpN8PKpiIiBhOEyaLMj/TTWV1vIiIaKluBrhXAm6QdDnw8Eij7R36FlVERAyVbpLF5/oeRUREDLVunrO4QNK6wDTb50hammq68IiIaIluJhL8MPAT4DulaU3g1D7GFBERQ6ab0tm9gS2AhwBs3ww8t59BRUTEcOkmWTxm+/GRHUmLU610FxERLdFNsrhA0meBpSS9BTgJ+Fl/w4qIiGHSTbLYH5gNXEe1ZvaZwAH9DCoiIoZLN9VQT5YFjy6j6n66yXa6oSIiWqQ2WUjaHvg28Eeq9SzWk/QR2//T7+AiImI4dPNQ3jeAN9qeCSBpfeAMIMkiIqIluhmzmDOSKIpbgDl9iiciIobQuHcWkt5VNmdIOhM4kWrM4j3AFQOILSIihsRE3VBv79i+G3hD2Z4NLNW3iCIiYuiMmyxs7zHIQCIiYnh1Uw21HvBPwNTO8zNFeUREe3RTDXUqcDTVU9tP9jWaiIgYSt0ki0dtH973SCIiYmh1Uzr7TUkHStpc0iYjX3UvknSMpHskXd/RtrKksyXdXL6vVNol6XBJMyVd2/n+knYr598saben9VNGRMQz0k2yeDnwYeAQqgf0vgEc2sXrvg9sO1/b/sC5tqcB55Z9gLcC08rXXsCRUCUX4EDg1cCmwIEjCSYiIganm26o9wDP75ymvBu2L5Q0db7mHYEty/ZxwPnAp0v7D8qcU5dKWlHS6uXcs23fDyDpbKoEdPzCxBIREc9MN3cW1wMr9ujzVrN9Z9m+C1itbK8J3N5x3h2lbbz2BUjaS9IMSTNmz57do3AjIgK6u7NYEfi9pCuAx0Yan2nprG1L6tnstbaPAo4CmD59embFjYjooW6SxYE9/Ly7Ja1u+87SzXRPaZ8FrN1x3lqlbRaj3VYj7ef3MJ6IiOhCN+tZXNDDzzsd2I1qsHw34LSO9n0knUA1mP1gSShnAV/pGNTeGvhMD+OJiIgudPME9xxG19xeAngW8LDt5WtedzzVXcEqku6gukM5BDhR0p7AbcB7y+lnAtsBM4FHgD0AbN8v6YuMTlx48Mhgd0REDE43dxbLjWxLElXl0mZdvG6XcQ5tNca5BvYe532OAY6p+7yIiOifbqqhnuLKqcA2/QknIiKGUTfdUO/q2F0MmA482reIIiJi6HRTDdW5rsVc4FaqrqiIiGiJbsYssq5FRETLTbSs6ucneJ1tf7EP8URExBCa6M7i4THalgH2BJ4DJFlERLTERMuqfmNkW9JywL5Uzz+cQDXzbEREtMSEYxZlivD9gPdTzRK7ie0HBhFYREQMj4nGLL4OvItqcr6X2/7bwKKKiIihMtFDef8MrAEcAPxF0kPla46khwYTXkREDIOJxiwW6unuiIiYvJIQIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhajSQLSbdKuk7SNZJmlLaVJZ0t6ebyfaXSLkmHS5op6VpJmzQRc0REmzV5Z/FG2xvZnl729wfOtT0NOLfsA7wVmFa+9gKOHHikEREtN0zdUDsCx5Xt44B3dLT/wJVLgRUlrd5AfBERrdVUsjDwS0lXStqrtK1m+86yfRewWtleE7i947V3lLZ5SNpL0gxJM2bPnt2vuCMiWmnxhj73tbZnSXoucLak33cetG1JXpg3tH0UcBTA9OnTF+q1ERExsUbuLGzPKt/vAU4BNgXuHuleKt/vKafPAtbuePlapS0iIgZk4MlC0jKSlhvZBrYGrgdOB3Yrp+0GnFa2Twc+VKqiNgMe7OiuioiIAWiiG2o14BRJI5//37Z/IekK4ERJewK3Ae8t558JbAfMBB4B9hh8yBER7TbwZGH7FmDDMdrvA7Yao93A3gMILSIixjFMpbMRETGkkiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1mlhWNSImgan7n9F0CADcesj2TYfQCrmziIiIWkkWERFRK8kiIiJqJVlEREStDHBHRDxDbRjsz51FRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqLXIJAtJ20q6SdJMSfs3HU9ERJssEslC0hTgCOCtwAbALpI2aDaqiIj2WCSSBbApMNP2LbYfB04Admw4poiI1pDtpmOoJWknYFvb/6fsfxB4te19Os7ZC9ir7L4IuGnggS5oFeDepoMYEvldjMrvYlR+F6OG4Xexru1Vxzowaab7sH0UcFTTcXSSNMP29KbjGAb5XYzK72JUfhejhv13sah0Q80C1u7YX6u0RUTEACwqyeIKYJqk9SQtAewMnN5wTBERrbFIdEPZnitpH+AsYApwjO0bGg6rG0PVLdaw/C5G5XcxKr+LUUP9u1gkBrgjIqJZi0o3VERENCjJIiIiaiVZ9IGk5SUt13QcETG8JL1yjLa3NRFLNzJm0UOSXgUcAywHCPgr8A+2r2wyrkGStMlEx21fNahYhoUkAe8Hnm/7YEnrAM+zfXnDoQ2MpHdNdNz2yYOKZVhIugr4kO3ry/4uwCdsv7rZyMaWZNFDkq4F9rZ9Udl/LfBftl/RbGSDI+m8srkkMB34LVXifAUww/bmTcXWFElHAk8Cb7L9EkkrAb+0/aqGQxsYSceWzecCrwF+VfbfCPzG9tBeUfeLpOcDPwF2BV4HfAh4m+0HGw1sHItE6ewi5ImRRAFg+2JJc5sMaNBsvxFA0snAJravK/svAw5qMLQmvdr2JpKuBrD9QHleqDVs7wEg6ZfABrbvLPurA99vMLTG2L5F0s7AqcCfga1t/73ZqMaXZNFbF0j6DnA8YOB9wPkjXTMt64J50UiiALB9vaSXNBlQg/63zJxsAEmrUt1ptNHaI4miuBtYp6lgmiDpOsq/hWJlqufHLpPEsPZEJFn01obl+4HztW9M9Y/jTYMNp1HXSvoe8MOy/37g2gbjadLhwCnAcyV9GdgJOKDZkBpzrqSzqC6ooLqgOqfBeJqwSHa5Zcwi+kLSksDHgNeXpguBI20/2lxUzZH0YmArqvGbc23f2HBIjSmD3a8ruxfaPqXJeJoiaTPgBttzyv7ywEtsX9ZsZGNLsughSc+huqt4LdWdxMXAwbbvazSwaFzphlqNjrt5239uLqJoWhnD2sTlj7CkxaiKQCasKGxKuqF66wSqK+h3l/33Az8G3txYRAMm6UTb7x2jXxZgaPtj+0nSP1FdRNwNPEF1d2GqCrFWKVfT/wm8BFiCqq/+YdvLNxpYM+SOq3XbT0oa2r/JubPoIUnX237ZfG3X2X55UzENmqTVbd8pad2xjtu+bdAxNU3STKqKqNbfYUqaQTVr9ElUpdUfAl5o+zONBtaAUjF4PnBkafpH4I2239FUTBPJE9y99UtJO0tarHy9l2qm3NYYqXQpSeFR4OXl6+9tTBTF7cBQ1s43wfZMYIrtJ2wfC2zbdEwN+SjVMyezgDuAVzO62ufQyZ1FD0maAyzDaFfDYsDD5bDbdKtdEuXXqa6cRDWg+S+2f9JkXE2QdDTVUr9nAI+NtNs+rLGgGiLpQqpu2e8BdwF3Arvb3nDCF0bjkiyiLyT9FniL7XvK/qrAOW38oyBp/lJqAGx/YdCxNK1MdXIP1XjFJ4EVqGY5mNloYA2Q9EKqLqjVbL9M0iuAHWx/qeHQxpRk0SeS1gd2AXax/dKm4xm0+cdqSqXHb9s0fjM/ScsC2P5b07EMmqRpwKHA+sB1wKdst3ppZEkXAP8CfMf2xqVtgXHPYZExix6StIak/SRdAdxAVemxc8NhNeUXks6StLuk3am6YM5sOKZGSHpZKZO8AbhB0pWS2nYBcQzwc6pKwauoKqLabukxJpMc2umBhrZMa1EiaS+qu4g1gROBPYHT2tjNAE/Nsno48CqqZ04Ajmrrw1dUy2XuZ/s8AElbAt+lGtxsi+Vsf7dsf73MuNp295YeiJHnLHaiGsMZSkkWvfEt4BJgV9szACS1tn/PtiWdWbqcWjf19BiWGUkUALbPl7RMkwE1YElJG1MVOwAs1bnfsnnTRuxNdSHxYkmzgD9RPZs1lDJm0QPlye33UN1dPI/q7mJ322s3GliDJB0HfMv2FU3H0jRJp1B1vfy/0vQB4JW239lcVIMl6XzGeEizsO02zZs2j3LhsNjItB/DKsmixyStRTU52i5UZbSn2P5ss1ENnqTfAy8AbqMqHxbVH4U2PrW8EvAFRrvkLgIOsv1Ac1FFkyS9iOqZiheXphupumr/0FxUE0uy6KNSGrez7YObjmXQ8gR3dJL0Jtu/Gm/FvDatlCdpc6ru2e8AV1NdSG0MfBh4l+1LGwxvXEkW0VOSVp7ouO37BxXLsCgXDZ8CpjLvRIKt6XqR9AXbB3asmNfJtv9h4EE1RNL/AF+zff587W8A9rf91kYCq5FkET0l6U9UfdOiWtTmgbK9IvBn2+s1F10zygOK3waupHq6H4A2rc0eoyT9wfYLxzl2k+0XDTqmbqQaKnpqJBlI+i7VeM2ZZf+twDsaDK1Jc20fWX9atMREA9kPT3CsUbmz6LHSJ/vUehZtfbZgrNl2WzgD70iX3Mepprg4hXnnhmpdl1yApHuoljNY4BDwXturDTikriRZ9JCk/6KqAOpcMvKPtvduLqpmlKUzL2LeZVVfb3ub5qIarNIlNx7bfv7AghkCZcqXzWz/pulYmiRpt4mO2z5uULEsjCSLHirloi+Zb+WrG2y/pNnIBq9cVR/IvMuqfiFX0+0m6eqReZDaTtJ7bJ9U1zYsMjdUb82kGtQdsXZpax3b99ve1/bG5WvftiUKSdMknSrpeknHS1qz6ZiGwLmS3l2mhGm7sRZ8GtpFoHJn0QOSfkY1RrEC1XxIl5f9VwOX296yueiakXJRkHQR8AOqu6odgM1tj/mcQVvMt+bL3xl9WLNNa728FdgOeC/Vsssjlgc2sL1pI4HVSDVUbxzadABD6CSqctHv0VEu2jKZPG8+tpdrOoYh8BdgBtUFRGf59ByqNT6GUu4seqw8uTzN9jmSlgIWH/Y5X/pB0pW2X9l0HE0qY1i7MDp53o+AXUf2Wzp5HpJ2YHQs63zbP28ynqZIehbVBfs6tm9qOp46SRY9JOnDVPO9rGx7/bLgy7dtb9VwaAMn6SBaXi6ayfMWJOkQqq7aH5WmXYAZtoe2r75fJL2dqldiCdvrSdoIONj2Ds1GNrYkix6SdA2wKXBZx8pXrXq2YMQ4ZaOtKxeNeUm6FtjI9pNlfwpwdUsnmLwSeBPV3dXQ/73ImEVvPWb78ZFCD0mLM/6V5aQ21rQekpZoIpamZPK8ca0IjNxhrtBgHE37X9sPzlcYNrR/L5IseusCSZ+lWtjlLcA/Aj9rOKZGlRLJN1H11b8NGMqnU/vkDcCvgLePccy0c2GorwJXSzqPauzm9cD+zYbUmBsk7QpMKV3WHweG9oHFdEP1UHkIb09ga6r/CGcB33MLf8mSNqNKEO8AVqZaFez0rOHQTpK2sP1rSc+m+vfwqnLoctt3NRhaYyQtDfwb1d8LqP5efMn2o81FNb4kix6QdB9wGfBrqiuDy2w/0mxUzZD0FapVA/9MNe3JKVQDmK2bbTZGjVTHSbrK9iZNxzNMJC29KPy9SLLoAUnLA5sBrylfr6RaT/fXwK9tn9hgeANVJkn7A/AfwM9sPybplgxst5ukS4Frqe40F5hEz/bHBx1T0yS9huo5pGVtryNpQ+Ajtv+x4dDGlGTRB2VN3T2ATwDr2Z7SbESDU6pb3kJVErkVcB7wZmBt23ObjK0JmTyvImkVqn8HXwM+P//xYZ08r58kXQbsRNU9O1INdb3tlzUb2dgywN0DktZg9K5ipC/2SuAA4JKm4mqC7SeAXwC/KP3TbwOWAmZJOtf2ro0GOGC2n5R0BNWyma1l+17gBEk32v5t0/EMC9u3z1cNNbSzHSRZ9MYdwFXAv1Mti/h4w/EMBduPAT8Fflq66t7RbESNOVfSu4GT21js0CmJYh63l64ol6e59wVubDimcaUbqgfKAuybU91ZrAfcSnVHcQnV4O5j4786JrtMnhdjKV1z36TqnhPwS2Bf2/c1Gtg4kiz6QNJUqtr6fYG1bC/ZbEQRzSvjNzu1qeBjIpKWHNYy2bEkWfSIpBczOm6xBdVTqpdSVUO1albaDOouKJPnVSTNsD296TiGgaSZwN1UK0peRLUM84PNRjW+JIsekHQv1bTDl1CetbDdykWPRmRFtFGZPG9U+V3cS7WOw8Mj7W2aYLKTpHWA11FdYG4H/NX2Ro0GNY4kix6QtMIwXxE0QdKhVMmz9YO6mTxvVCaYHCVpLapE8QZgQ6r5si62/dVGAxtHkkX0RQZ1R5VkseXI1XNZn/z8NiaLGCXpSeAK4Cu2T2s6njpJFhF9JmkX4BCqBxSfmjzP9o8nfOEkVOZD2o9qwZ+9ygR6L2rTGI6kxW3PLU9sv5bq38M6wM3ABbaPbjTAcSRZ9EgqPRbU9kHdTJ63IEk/pnpg9UO2X1aSx2+GtZ++Hzrnx5K0LFXCeB3wAQDb6zYY3riSLHoolR6jMqibyfPGMvJ/pLMAQtJvbW/YdGyDMvLvQdIM4NlUk49eSDVecVuz0Y0vyaKHUukxKoO6mTxvLJJ+QzVn2K/LH8z1geNtb9pwaAMj6Q7gMGAK8GRpfuoPse3DmoirTqb76K33le97d7QZaF2lR7Ei7V4R7W1UT+duQ9X1EnAQ1dxha0v6EVXJ6B6NRjR4U4BlqcavFhm5s4i+yKDuKEkbZk6kUZKeQzWlv4BLyySDrbGodksmWfRQKj0yqBsTKzMPb1XXNpktqg+sphuqt46l6m54TdmfBZwEtCZZAIdTLf50Sbl6Or3heGIISFoSWBpYRdJKjHbBLA+s2VhgzVgkE2OSRW+tb/t9pQsG249ovsnqW+B/JR0FrCXp8PkPtm1QNyXVT/kI1WJga1BdUI38v3gI+FZDMTViUS14SbLorcclLUWpbCiVHm2bnjyDuh3K4kf/CrQ6Wdj+JvBNSR+3Pc9FROmyjCGXMYsekrQ18G/ABlRz028B7GH7vEYDa0AGdUelpHrUWIO7i+qAb9skWfRY2ys9YkGZPA8kPY9qbOKHwK7MO2bxbdsvbiq26E6SRQ+l0iNibJJ2A3YHpgMzOg7NAb5v++Qm4oruJVn0QEelx3nAlsx71fSLtl01ZVB3XimpHiXp3bZ/2nQcsfCSLHpA0r6MVnrMYt5Kj+/ablW1B2SerE6ZPA8kfcD2DyX9Mx1TW4wY1ikuYlSqoXoglR5jOkfSp8igLqSkGqq1TaCa5iIWQbmz6KFUeozKoO6oTJ43StKSth9tOo5YeLmz6IGOSo+lJG3MvGMWSzcWWINsr9d0DEPkIDJ53ojrJd0NXFS+Ls6SxIuG3Fn0QCo9FpRB3XmlpHqUpHWoFvvZAtgO+Gubxm8WVUkWPZRKj1EZ1B2VkupRktaiShRvADakmsL+YttfbTSwqJVuqB4YqfQApkrab/7jLa30aP2gbibPG9OfgSuAr9j+aNPBRPeSLHojlR4LyjxZmTzvKZIWtz0X2JhqzeldJe0P3AxcYPvoRgOMWumG6qFUeozKPFmjxiuptt2a5NlZFShpWaqE8TrgAwC2120wvOhCkkUPSZoJpNKjyKBuJSXVoz+vpBnAs4HfABdS/R+5rdnoohtJFj2WSo9KBnUzeV4nSXcAh1GtP/1kaX7qj09Lx/UWKRmz6KFS6bEFVbLYELgBuLjRoAYsg7rz2IaqpHotqj+UI+YAn20ioAZNoRrTa1WRw2SSO4sekvQko5UepzUdTxMyT9aCUlLdvm63ySjJogdGKj0kbUg1cPd6YB1aXOmRQd1MntdJ0tW2N246jnj6Fms6gEnicoCyMtxxwLHAr6gePPp8g3E1afcx2i4ZdBAN6yypXm6MrzZpzVjVZJUxix4ao9Lj9W2r9Mg8WaNsf6dsfq3tJdUtnW14Ukk3VA+k0mNU5slaUEqqYzJIsugBSXcCRzJOpYftLww2ouZlUHdeKamORV26oXrjTtsHNx3EMMg8WQtKSXVMBkkWvZHa8VGZJ2tBmTwvFnnphuoBSStnAG9emScrJdUxuSRZRF9kUDeT58Xkkm6o6AvbL+gY1N0eOEJSKwd1U1Idk0HuLKIvsiJaSqpjcsmdRfRLBnUzeV5MIrmziJ7KoO6oTJ4Xk0mSRfRUBnVHZfK8mEzSDRV9kUFdIJPnxSSSO4voqQzqRkxOubOIXsugbsQklDuL6KkM6kZMTln8KHotdxQRk1DuLKKnMk9WxOSUZBEREbXSDRUREbWSLCIiolaSRUQNSc+RdE35ukvSrI79JcZ5zUclfahs7y5pjY5jn5C09KDij+iFjFlELARJBwF/s33oQrzmfOBTtmeU/VuB6bbvXYj3mGL7iYWLNqJ3cmcRsfAWk3QlgKQNJbms3YGkP0paWtJBkj4laSdgOvCjcieyL7AGcJ6k88prtpZ0iaSrJJ1U5tRC0q2SvibpKuA9kj4u6XeSrpV0QiM/ebRWnuCOWHhPAktKWp5qksQZwOskXQzcY/sRqXrcxPZPJO3DvHcWnwTeaPteSasABwBvtv2wpE8D+wEHl8+6r2Nixr8A69l+TNKKA/tpI0iyiHi6fgNsQTUF+1eAbakeSLxoId9nM2AD4NclwSwBXNJx/Mcd29dS3aGcCpz6dIKOeLqSLCKengup7irWBU4DPk01YeIZC/k+As62vcs4xx/u2N6eKjm9Hfg3SS+3PXchPy/iacmYRcTTcxHVGh03236SatnY7YCLxzh3DrDcOPuXAltIegGApGUkvXD+N5C0GLC27fOoEtMKVBM2RgxE7iwingbbt6rqN7qwNF0MrGX7gTFO/z7wbUl/BzYHjgJ+Iekvtt8oaXfgeEnPLucfAPxhvveYAvxQ0gpUdyOH2/5rL3+miImkdDYiImqlGyoiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIha/x/BeLoI+FpeGQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# top 5 sources of users\n", + "fig, ax = plt.subplots()\n", + "ax.tick_params(axis='x', labelsize=10)\n", + "ax.tick_params(axis='y', labelsize=10)\n", + "ax.set_xlabel('Twitters', fontsize=10)\n", + "ax.set_ylabel('Number of sources' , fontsize=10)\n", + "ax.set_title('Top 5 sources', fontsize=10)\n", + "tweets_df.source.value_counts()[:5].plot(ax=ax, kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Possibly sensitive" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 3866\n", + "unknown 3463\n", + "1.0 111\n", + "Name: possibly_sensitive, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df[\"possibly_sensitive\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPwAAADnCAYAAAA6ujs/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAaD0lEQVR4nO3deZhcVZ3G8e+v6nZC1srSkIQEUiEJCQRZREDAFQHRQoiKKK4s4uOoiA8jWCojd2DGp1VERFAZBoy4gSJ7jQqyK6sBlE0lQpFOZyPpcLOntzN/nNvQaXqp213V5966v8/z9FPV1dVdb5S37lLnniPGGJRS6ZBxHUApNXK08EqliBZeqRTRwiuVIlp4pVJEC69UimjhlUoRLbxSKaKFVypFtPBKpYgWXqkU0cIrlSJaeKVSRAuvVIpo4euEiBwnIv8QkWUiUuzj56NF5Prw54+ISN5BTOWYFr4OiEgWuAJ4D7AvcIqI7NvraWcAG4wx84DvAd8a2ZQqDrTw9eFQYJkx5gVjTBtwHXBir+ecCPw0vH8D8C4RkRHMqGJAC18fZgLNPb5fET7W53OMMR1AAEwdkXQqNrTwSqWIFr4+tAB79Ph+VvhYn88REQ/IAetHJJ2KDS18fXgMmC8ic0RkFPAR4NZez7kV+FR4/yTgbqMzmKaO5zqAGj5jTIeIfAH4A5AFrjHGPCMiFwJ/McbcClwN/ExElgGt2DcFlTKib/JKpYfu0iuVIlp4pVJEC69UimjhlUoRPUufEvliaQJ2nP0CYDrQGH5NBSYB44Fx4e1oYDuwFdjS67b7fivwL+B5YBnQUm4q6BngmNOz9HUmXyxNxBZ7UXjbfX+PgX6vCray8xvA88DjwJPlpkJXjV9bVUgLn3D5YikHHAUcE37Nc5vodQLgT8C94dcT5aZCp8tAaaaFT5h8sZQFDgOOxRb8UJJ1aNb9BnAfcHe5qbDUcZ5U0cInQL5YGgWcAJwCvAs7Dr5elIFfA9eVmwpPOM5S97TwMZYvlvbDTlzxcewJtnr3T+BnwJJyU2GF6zD1SAsfM+FJt1OwRT/EcRxXuoA/Aj8Bbiw3Fdoc56kbWviYyBdLBwNnAx8ExjqOEycrgO8AV5WbCttch0k6Lbxj+WLpCOA/gONcZ4m5tdi5+K4oNxU2uQ6TVFp4R/LF0pHAhdiP1FTlXgF+AFxabiq0Os6SOFr4EZYvlg4Avgm813WWhNsM/BhoKjcVdOaeCmnhR0i+WJqFnRr6FEBni62edcBXgJ/o0N7BaeFrLF8sCfBvQBMwwXGcevZn4LPlpsLTroPEmRa+hvLF0kLgf4EjXWdJiQ7gUsAvNxW2OM4SS1r4GsgXSw3Y3czzsVeeqZHVDJxdbirc5DpI3GjhqyxfLB2K3aq/wXUWxS3A6Xo2/zVa+CoJL2r5L+A8dGKROHkJ+FC5qfCY6yBxoIWvgnyxNBW7ntvRrrOoPrUB55SbCle4DuKaFn6Yws/VbwbybpOoClwHnFluKmx2HcQV3fUchnyx9FHgQbTsSfER4LF8sdR7Ke3U0C38EITH698GznGdRQ3JFuAz5abCL10HGWla+IjC4/XrsRNRqGT7armp0OQ6xEjSwkeQL5amA3cD+7jOoqrmUuwJvVQUQQtfoXyxNBNb9r1dZ1FV90vgU+WmQofrILWmha9AvljaA7gHmOs6i6qZW4CT6312HT1LP4h8sTQbO8Oqlr2+nQjclC+WdnEdpJZ0Cz+AfLG0F3bLvqfrLGrE3AmcWK/TaekWvh/5Ymk+dsuuZU+XY4Cf54uluuxGXf6jhqvHCbpZrrMoJz6AnTiz7mjhe8kXS+OB29Cyp905+WLp865DVJsew/cQjqC7BSi4zqJioRNYXG4q3O46SLXoFn5nl6BlV6/JAtfli6U3ug5SLbqFD+WLpVOxK50o1dsq4LByU6HZdZDh0sLz6iw196PTUan+PY0t/VbXQYYj9bv0+WJpGnAjWnY1sP2A77oOMVypLzywBJjpOoRKhM/mi6VEn+NJdeHzxdIZ6JpuKpqr88XSrq5DDFVqCx9eEHOJ6xwqcaZhZyVOpNQWHrgamOg6hEqkE/LF0pmuQwxFKs/S54ulzwBXus6hEm0LcGC5qbDMdZAoUlf48HLXp9B13tTwPQIcWW4qdLoOUqlU7dKHCztejZZdVcdhwBmuQ0SRqsIDp6GTT6rqujBfLCVmA5KawoczmVzoOoeqO9OwC4cmQmoKD5yFDrBRtXFO+DFv7FVceBEZKyL/ISJXhd/PF5HjaxetevLFUg4ous6h6tYY4JuuQ1Qiyhb+J8AO4PDw+xbsaqlJcB4wxXUIVdc+li+W3uQ6xGCiFH6uMebbQDuAMWYrIDVJVUXh4hFfcp1D1T0hARfXRCl8m4iMAQyAiMzFbvHj7hvAWNchVCq8LV8sneA6xECiFN4Hfg/sISK/AO7C7irHVr5Ymgt82nUOlSqxPmNfceGNMXdgZ/M8FfgV8CZjzL21iVU1RaDBdQiVKkfki6XDB3+aG1HO0t8GHAvca4y53Rizrnaxhi88M3+K6xwqlb7sOkB/ouzSXwy8FXhWRG4QkZNEJM7L8nwCGOc6hEqlxeHhZOxE2aW/zxjzOWAv7JVmJwNraxWsCj7rOoBKrQwx/e8v0ki78Cz9B7H/mEOAn9Yi1HDli6W3Aotc51CpdlocF6aMcgz/a+A54Cjgcuzn8mfVKtgwxfLdVaXKVOxecKx4EZ57NXCKMSbW1/6G842d5DqHUtgNz7WuQ/Q06BZeRI4K744DThSRD/T8qm28ITkNGOU6hFLA4fliKVarD1eyhX87diXV9/XxM4Od0z1OEjUhgap7i4HLXIfoVvEUVyIyxxjz4mCPuZQvlhZhVwhRKi7uKTcVjhr8aSMjyln63/bx2A3VClIli10HUKqXt+WLpamuQ3QbdJdeRBZiP+LK9TpmnwjE7WOHxa4DKNVLFns4vMRxDqCyY/gFwPHAJHY+jt8ExGZu7nyxtDtwsOscSvXh/cSk8FGO4Q83xjxU4zxDli+WTsd+dKhU3GwHGstNhS2ug1SyS39eOPHFR0XkdRejGGO+WJNk0R3rOoBS/dgFu4ZhX+fBRlQlu/TPhbd/qWWQ4cgXSxngaNc5lBrAsSSh8MaY28LbV8fNi0gGGG+M2VjDbFG8ETuUUam4OsR1AIg2lv6XIjJRRMZhP+t+VkTOrV20SI5wHUCpQbwhDhfTRPkcft9wi74Y+B0wB3vNeRwc4DqAUoPwgANdh4hS+AYRacAW/lZjTDvhhJYxoIVXSeB8tz5K4a8EytiLaO4XkdmA82P4fLHkode+q2RwXviKL481xlzGzhcBvCQi76x+pMgWEL8Rf0r1JTmFF5HR2Nlu8r1+z/UCjbo7r5JiQb5YmlhuKjjbM46yS38LcCLQAWzp8eWaFl4lheB4+HeUGW9mGWOOq1mSoTvQdQClIpgL3OPqxaNs4R8UkTfULMnQxTGTUv3Z3eWLR9nCvwU4VURexK4pJ4Axxuxfk2QVyBdLWWCaq9dXaggSU/j31CzF0E0h4lTbSjnmtPBRFqJ4CdgDOCq8vzXK79fIro5fX6moZrp88Shj6S/Aroz51fChBuDntQgVgRZeJU0ytvDYWTtOIPwozhizEphQi1ARaOFV0uwWjg51Ikrh24ydHscAhFfNudboOoBSEWWA6S5fvFK/FpErgUkicibwR+Cq2sSqmG7hVRI5K3yUsfQXi8gx2AtmFgDfMMbcWbNkldHCqyQa4+qFo4ylHwfcbYy5U0QWAAtEpCG8TNaVyQ5fW6mhSsQx/P3AaBGZCfweO/nFklqEiqDL8esrNRQNrl44SuHFGLMV+ADwI2PMh3B/HXqb49dXaiicbeGjvLCIyOHAx3htwcZs9SNF4vJwIvGmsLF1rqxcszDTHCyUl3bMz7TITFm3yxQ2TRbax6/xvKDZa9i0vGHUtuUNXnuL55nVXjbbms2O3pSR8dsz5Low47HDrFWlukYZKAz6NBG5BrsIzFpjzH59/FyA7wPvxQ6EO9UY8/hAfzNK4c/GDrq5yRjzjIjshcOrfkK6hR9AI6+sm5dpWbtAmoN9pLltXqZFZsrLYyazefJo2qeLMAU7PLlPszvbZ8zubIcdW/t9jR3C9jVZb12L573S3OBtafa8tpYGr2tVNptZ72VHb8xkxm0XmdQJjdg5FVS2rdI3yCXA5fS/xvx7gPnh12HAj8LbfkU5S38/9ji++/sXgFcXoRCRHxhjzqr071VJqrfwu9H68vzMyrULZfnGBbJ8x7zMyuxMWTdmElsmj6J9hgiN1HiswmjDLnt2dMzas6Nj1uHbB35ukMkEq7zs+hbP29jseVub7V4Da72s15rJjtmckQltIpMNTMVuvepVRf/dGmPuF5H8AE85Ebg2HB/zsIhMEpEZxphV/f1CNY8ljqzi36pUHRfemBm0rp2XaXl5H1m+cUGmuW2urMzuLuvGTmLLlAY6ZoiwKwn6aDLX1ZXLtXXlFrYN/H9bB3Ssy2bXr/KyG1Y0eJubvYbtzQ1ex0ovm3k5mx0VZDJjt2UyuXb7xjB+hOJXU/+7TNHMBJp7fL8ifGxECu9CYnfpha6u3Vm/Zl6mZd0+snzTwszy9r1kVWaGtI7LsXlqA53TRZhGCi//9cCb3tk5bXpn57SDdgz8f/FWkS1r7F5DsMLztjQ3eG3huYbMumx29KZMZsJ2kcld9s3B2dnxXja7euGkFz62W/gMXZ0zZd3q+bJi/T6yfNOCTHPHXrIqM11ax+fYMtWjc4YIM4AZrrMm2Vhjxs1p7xg3p71jwOcZMBsymdZVntfa4mU3NTc0bGv2vPaVXpa1XrZhQzY7ZovIhDaRKQYm1/iQolpz2rVgr2DtNit8rF/VLLyLYy5nc+pl6ezYQ9auni8t6xbK8i0LM80dc2SVN102jJvA1kbPbqFn4vhySGUJyJSurilT2tqmLGoD2Nbvc9ug7WUvu36l520I9xq2r/C8rlWeJ+uymVFBJjtuW0ZyHfZE5NghxNkw1H9HL7cCXxCR67An64KBjt8h2ki7NxhjnhrgKd+v9G9V0YD/uOHw6GjfU9aumi8r1u+TWb5lgTR3zpHV3jTZMH4CWxuzdE0XYRb2XVXVkVEwamZH54yZHZ0zDmHHgM/dLLJ5jZddt8LzNtpPKRraWxqyZk3Wy7ZmM7tsymTG77CHFI2IZIH2pz711CuV5BCRXwHvABpFZAVwAeGgHWPMj4H/w34ktwx7XuC0wf5mlC38D8OpqpcAvzDGBD1/aIxZEuFvVcuKof5iAx1ts2X1qgWyYv3CzPIte0tz1xxZ3TBNXpkwjm2NWbqmibAnsGcV86o6M96Y8ePbO8bPbe8YaKeBLuhqzWbWrfS8f1T6t40xr1uevdfPDfD5isMS7WO5t4rIfOB0YKmIPAr8xPEFNP0er4ymbXteVq/aW1a07pNZvnVvae7My5pRu8krE8exrTGDmSbCbGD2COZVKZWBTGNnV2NjZ9u/XOaIdAxvjHleRM7HrhV/GXBQONrna8aYG2sRcBArT8g8+JdFmfK2+bKic7asGb2bvDJxLNt3y2AaRZiDXfRSqbgY8l5pNUQ5ht8fe4xQAO4E3meMeVxEdgceAka88OWmQhv+R2egJ8ZUcjQP/pTaiXLxzA+AJ4ADjDGf7x6zG051dX4twlVomcPXViqqF1y+eJRj+LcP8LOfVSfOkCwD+s2mVMw86fLFBy28iDxF3+vAO1+IIvS849dXqlIG+KvLAJVs4Y+veYrhecJ1AKUq9CJ+4GzlWKig8OGiEwCIyHTgUOw71WPGmNU1zFapR7B56vnqKlUfnnQdIMpCFJ8GHsXOeHMS9nK802sVrGJ+EAB/dx1DqQo43xuN8jn8ucBBxpj1ACIyFXgQuKYWwSJ6GNjHdQilBvGk6wBRPpZbD2zq8f2m8LE4eNh1AKUq8KTrAJWcpT8nvLsMeEREbsEeM58I/K2G2aLQwqu4ewk/cDrKDirbpe9eP+5f4Ve3W6ofZ8iexk4qkMTZT1Q6/N51ALBTT7vOUB1+7g7gGNcxlOrHYvzA+Uaykl36S40xXxKR2+hjAI4x5oSaJIvuNrTwKp7agLtch4DKdum7h81eXMsgVXAz9go+peLmT/iBs3nsehr0LL0xZml4e1/3F/Zk3Ybwfjz4QTP2sl2l4uZ3rgN0izLw5l4RmSgiU4DHgatE5JLaRRuSm1wHUKoPySs8kDPGbMSOtLvWGHMYcHRtYg2ZFl7FzXL84BnXIbpFKbwnIjOAk4Hba5RnePzgOaDiOcOUGgEuLx1/nSiFvxD4A7DMGPNYuLZcHC9N1a28igtDPIaev6p+Pofv5ucWYQfiKOXavfjBO12H6CnKSbtvhyftGkTkLhF5WUQ+XstwQ2KPl+Lz6YFKs6tdB+gtyi79seFJu+OBMjAPewVdHF3uOoBKvQD4resQvUU6aRfeFoDf9F6IImZuZpA1tpSqsV/iBwMsTeFGlMLfLiJ/Bw4G7hKRXYFBVgR3xA86gCtdx1CpFrvdeYh40i4cdBMYYzrFLqI3MSbTXL2en5sGLAdGuY6iUufP+MFbXIfoS5STdg3Ax4HrReQG4AziMwHG6/nBGmJ4DKVS4ULXAfoTZZf+R9jd+R+GX28MH4uz77kOoFLnYfzgDtch+lPxLr2I/NUYc8Bgj8WOn7sJWOw6hkqN9+IHsRk731uULXyniMzt/iYcaddZ/UhV93WSkVMl32NxLjtEK/y5wD3hVXP3AncD/16TVNXkB88Ss/HMqm7F9ti9W5TC/xn7UVcX0Bref6gWoWrgAmCH6xCqri3FD+J5UVkPUQp/LXat9YuwK8nuRVK2nH6wHHuiUalaKboOUIkoJ+2eNcbsO9hjseXnGrGz7k50HUXVnevwg1Nch6hElC384yLy5u5vROQwkjSllB+sA77pOoaqOxuBcwZ9VkxEKfzBwIMiUhaRMvb4/RAReUpE4rIgxWC+SwzW91J15Xz8YJXrEJWKsks/e6Cf91xlNtb83AHAY0CD6ygq8R4HDsUPEvOxb/1NgFEJP3cRcL7rGCrRuoA34wePuQ4SRZRd+npyEfCs6xAq0a5MWtkhrYX3gzbgNHQEnhqafwDnuQ4xFOksPIAfPApc6jqGSpztwMlxWUkmqvQW3vo6SfpoUcXB2fhBUj6Vep10nrTryc/tASwFdnUdRcVeYgbY9CftW/juNek+jB7Pq4E9D3zGdYjh0sID+ME9JPQkjBoRO7DH7ZtcBxkuLXw3P7gE+JXrGCqWPosfPOk6RDVo4Xf2aexS2Ep1+zp+sMR1iGrRk3a9+bk88AAwy3ES5d7l+MFZrkNUk27he/ODMnYZ7LWOkyi3fgOc7TpEtekWvj9+bn/gHmCK6yhqxN0LHIcf1N0sSVr4gfi5Q4C7gAmuo6gR8zfgbfhBnJdSGzLdpR+IvTiiAGx1HUWNiGeAd9dr2UELPzg/eAA7r33d7d6pnTwOvAM/iOfSaVWiha+EH9yJ3dInfuCF6tODwFHhNGh1TQtfKT+4C3gbUNdbgBQqAcfU8258T1r4KOxoqyOw10Or5LsGWIwfpOYcjZ6lHwo/Nxm4ATjKdRQ1JAa4CD+4wHWQkaaFHyo/14Bd3OLTrqOoSDYAn0zCKjG1oIUfLj/3eeBiYBfXUdSglgIfwg9edB3EFT2GHy4/uAI4FPsZroqv/wGOTHPZQbfw1ePndsEudPE511HUTrZiL29NxjqINaaFrzY/dwL27O9U11EUTwKfwA+edh0kLnSXvtr84FZgf+wYfOXGVuBc4E1a9p3pFr5W/JwAnwC+BUx3nCZNfgd8LrzMWfWiha81PzcR+AbwRXQ9u1pag51C+nrXQeJMCz9S/NxC4DLgGNdR6kwncDVQxA82uA4Td1r4kebn3g9cAuQdJ0m6LuA64D/xg3+6DpMUWngX/Nwo7PH9V4D5jtMkjcEOa/bxA10QNCItvEt+LgN8EPgqcJDjNElwM3BBkpd6ck0LHxd+7t3A17CX4KrXbAWuB36AHzzhOkzSaeHjxs8dDpwJnES659L7G3Y47M/Tcq36SNDCx5WfG4OdWuuT2DP7Wad5Rkb31vxK/OAR12HqkRY+CfzcdOCj2PIf4DhNtW0C7gBuA27WrXltaeGTxs/Nw27xjwbeCUx2G2hIXgBux5b8fvygzXGe1NDCJ5k9y38wr70BHAGMdpqpby9hr0V/CCjhB885zpNaWvh6Yo/79+vxtSi8nTmCKZZjy70U+AuwNA2zwSaFFj4N/NwkbPn3BWYAjeHX1B73G4Gx/fyFLmA7sA07a28LsKKP22b8oLVW/ww1fFp49Ro/Nxrwej3ahh+0u4ijqk8Lr1SK6AQYSqWIFl6pFNHCK5UiWnhVdSJyqohc7jqHej0tvFIpooVXgxKRvIg83eP7L4uILyL3isi3RORREfmniLy1j98tiMhDItIoIktE5DIReVBEXhCRk8LniIh8R0SeFpGnROTD4eNXiMgJ4f2bROSa8P7pIvLfYa7nROQqEXlGRO4QkTEj879KMmnh1XB5xphDgS8BOy3OKCLvB4rAe40x3aPtZgBvAY4HmsLHPgAciL0w6GjgOyIyA3gA6H4TmYkdOET42P3h/fnAFcaYRcAr2AlFVD+08Gq4bgxvl7LzPH1HYafwKhhjek4uebMxpssY8ywwLXzsLcCvjDGdxpg1wH3AIYSFF5F9gWeBNeEbweHAg+HvvmiMebKfDKoXLbyqRAc7/7fSc+HMHeFtJzuP0vsXdgKPvXv9rR097stAL2qMaQEmAcdht+gPACcDm40xm/r4e70zqF608KoSa4DdRGSqiIzG7o4P5iXs7vW1IrJokOc+AHxYRLIisit2mq9Hw589jD1c6C78l8NbNQRaeDUoY0w7cCG2hHcCf6/w9/4OfAz4jYjMHeCpN2GntPorcDdwnjFmdfizB7DnCZYBjwNT0MIPmY6lVypFdAuvVIpo4ZVKES28UimihVcqRbTwSqWIFl6pFNHCK5UiWnilUkQLr1SKaOGVShEtvFIpooVXKkW08EqliBZeqRTRwiuVIv8PA7aRwIUDwgEAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "tweets_df[\"possibly_sensitive\"].value_counts().plot(kind=\"pie\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Polarity and subjectivity" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " 0.000000 2894\n", + "-0.100000 269\n", + " 0.500000 225\n", + "-0.050000 188\n", + " 0.200000 178\n", + " ... \n", + " 0.151667 1\n", + "-0.190000 1\n", + "-0.140136 1\n", + " 0.013624 1\n", + " 0.207143 1\n", + "Name: polarity, Length: 760, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df[\"polarity\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Sentiments" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAE2CAYAAACQtL4gAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAhlElEQVR4nO3deZxcVZn/8c+XALJvksEQAoEYxKgYYk8AVxCH1WFRJCxCQJ2MP0GRxTG4DKg4g/gTXIkGiQEFYhAYwiIYQhABWTp7QlhiCJIYIRAIAUY04Zk/7ulwaar7Vne66lZ1f9+vV73q3nO3p2469dS559xzFRGYmZl1ZoOyAzAzs8bnZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCkPRVSQskzZU0W9Le3dzPcEmH5uYPlzS25yKteMz9JL23lsdIx7lTUks3tvu5pGFVrjtR0tFdj67r0nm7qR7HKojjPElnd3GbdX9Xko6s9vza+tmw7ACsXJL2BT4KjIiIVyRtD2zczd0NB1qAWwAiYgowpSfi7MR+wIvAvTU+TrdExGfKjqE3kbRhu7+rI4GbgIdKC6qPcM3CBgDPRMQrABHxTET8BUDSeyT9XtIMSbdJGpDK75T0HUkPSHpU0gckbQx8ExiVaiejJJ0s6cdpm4mSxkm6T9Li9Mt2gqSFkia2BSPpQEl/lDRT0jWStkjlSyR9I5XPk7SHpMHAZ4Ez0jE/kP9gkkamfc2SdK+kt6XykyVdJ+lWSY9JujC3zThJramm9Y32J0vSpyR9Pzf/b5IulrS5pJslzZE0X9Ko3LlqkdQvnYP5Kf4zOvj3+Eg6/qOSPpr2MVjSH9Jnn9lWk5I0QNJd6bPPb/v8nZzDgyU9LGkm8LFKB5e0iaRfpBhnSdq/6Jy1236JpAvT9g9IemvuM9yhrPY6TdLOFbb9N0kPpnN4raTNUvlEST+VdD9wYdvfVToPhwPfTedgSPpsbfsbmp+39RQRfvXhF7AFMBt4FLgE+FAq34js13r/ND8KmJCm7wS+l6YPBW5P0ycDP87te908MBGYBAg4AngBeBfZD5YZZLWS7YG7gM3TNl8G/jNNLwE+n6Y/B/w8TZ8HnN3BZ9sK2DBNfwS4NhfXYmBrYBPgCWBQWrZdeu+XPueeuc/cks7Xn4CNUvm96XN8HLg0d+yt2233HmBqbvk2FeKdCNyazslQYGmKbzNgk7TOUKA1TZ8FfDUX75YdncO0nyfT9gImAzdViOGs3L/zHsCf07YdnrN22y/JxXRS2zGAG4HRafpTwP+0//cD3pzbz/m5f++JZLWHfh38XR2d2246MDxN/1fbPvxa/5cvQ/VxEfGipPcAHwD2B36t7HpwK/BOYKokyL6Mluc2vS69zwAGV3m4GyMiJM0DnoqIeQCSFqR97AQMA+5Jx9wY+GMHx6z4y7idrYHLJQ0FgiwBtpkWEavS8R8CdiH7Mj1G0hiyS7QDUjxz2zZK5+sO4KOSFpIljXmSXgG+J+k7ZF+Qf2gXy2JgN0k/Am4GftdBzJMj4lXgMUmLyb6wHwd+LGk4sBbYPa37IDBB0kZkX76zJX2IyudwD+DxiHgsfeZfAWMqHP/9wI/SZ31Y0hO543V0ztq7Ovd+cZrel9f+zX4JVKqZvFPS+cA2ZEn5ttyyayJibYVt2vs5cIqkM8l+4IysYhurgpOFkf4T3gncmb7IR5N9IS+IiH072OyV9L6W6v+O2rZ5NTfdNr9h2tfUiDiuh475LWB6RByVLlndWWFf6/YnaVfgbOCfI+I5ZZfHNqmw358DXwEeBn4BEBGPShpBVtM6X9K0iPhm2wZpf+8GDiK7dHYM2S/s9toP1hbAGcBTwLvJah1/S/u8S9IHgcOAiZIuAp6jwjlMiWZ9veGcdbBedDBdZCJwZETMkXQyWXtUm5eq3Me1wLnAHcCMiHi2C8e3TrjNoo+T9Lb0y7vNcLJLDI8A/ZU1gCNpI0nvKNjdarJLId11H/C+3HXuzSXtXrBNZ8fcGliWpk+u4vhbkX0prZK0A3BIpZUi4n5gEHA86Ve0pB2BlyPiV8B3gRH5bZR1HNggIq4FvtZ+ec4nJG0gaQiwG9m/w9bA8lTjOJGsloekXchqaJeSJbARdHwOHwYGp/0CdJSQ/wCckLbdHdg5xdAVo3LvbTXDe4Fj0/QJ6TjtbQksTzWlE6o81uv+/SPib2Q1knGkRG49w8nCtiC7VPOQpLlklzDOi4i/A0cD35E0h6xdo6iL6nRgWGpsHFWw7htExAqyL/WrUyxtl086cyNwlCo0cJNd6vhvSbOooiYSEXOAWWRfrFcB93Sy+mTgnoh4Ls2/C3hA0myyX7bnt1t/IFnNbTbwK+CcDvb7Z+AB4LfAZ9OX3yXA6PTvsAev/creD5iTPt8o4AcdncO0nzHAzanR9+kOjn8JsEGqYf4aODlS54cu2DYd+3SyWhHA58kuD80lS3inV9ju68D9ZOf94SqPNQn4UmqMb0uEV5LVVju61GfdoNQQZGZdoOwehYsjYlrZsTQSSUuAloh4psQYzibrYPD1smLojdxmYdYFkrYh++U/x4mi8Ui6HhgCfLjsWHob1yzMzKyQ2yzMzKyQk4WZmRWqWbJIwwY8kG7dXzd0gqRdJd0vaZGkXysbJgJJb0rzi9Lywbl9nZPKH5F0UK1iNjOzymrWZqHs9tHN0x2vGwF3k3WXOxO4LiImSfopWUPhOEmfIxta4bOSjgWOiohRykaUvJrsTswdgduB3Tu7m3P77bePwYMH1+RzmZn1VjNmzHgmIvpXWlaz3lCRZaEX0+xG6RVkvRSOT+WXk40NM45svKDzUvlvyIY3aBtHaFLq6/24pEVkiSM/DMTrDB48mNbW1p78OGZmvV4a3qWimrZZKBtpczbZDUBTyQZgez4i1qRVlpLdrER6fxIgLV8FvDlfXmGb/LHGKButs3XFihU1+DRmZn1XTZNFRKyNiOFkA8SNpPhu3PU51viIaImIlv79K9aizMysm+rSGyoinicbCmJfYBtJbZe/duK1sXuWkY23Q1q+NfBsvrzCNmZmVge17A3VP93tiqRNgX8BFpIljbZHR44GbkjTU9I8afkdqd1jCnBs6i21K9l4/A/UKm4zM3ujWg73MYBsgLp+ZElpckTclMbBn5TGrZ8FXJbWvwz4ZWrAXkkaoTIiFkiaTPbYxDXAqVWOa29mZj2kVw730dLSEu4NZWbWNZJmRERLpWW+g9vMzAo5WZiZWSEPUV6lwWNvLjuEqiy54LCyQzCzXsg1CzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSHflGel8E2OZs3FNQszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwK1SxZSBokabqkhyQtkHR6Kj9P0jJJs9Pr0Nw250haJOkRSQflyg9OZYskja1VzGZmVlktH360BjgrImZK2hKYIWlqWnZxRPz//MqShgHHAu8AdgRul7R7WvwT4F+ApcCDkqZExEM1jN3MzHJqliwiYjmwPE2vlrQQGNjJJkcAkyLiFeBxSYuAkWnZoohYDCBpUlrXycLMrE7q0mYhaTCwF3B/KjpN0lxJEyRtm8oGAk/mNluayjoqb3+MMZJaJbWuWLGipz+CmVmfVvNkIWkL4FrgixHxAjAOGAIMJ6t5fK8njhMR4yOiJSJa+vfv3xO7NDOzpJZtFkjaiCxRXBkR1wFExFO55ZcCN6XZZcCg3OY7pTI6KTczszqoZW8oAZcBCyPiolz5gNxqRwHz0/QU4FhJb5K0KzAUeAB4EBgqaVdJG5M1gk+pVdxmZvZGtaxZvA84EZgnaXYq+wpwnKThQABLgH8HiIgFkiaTNVyvAU6NiLUAkk4DbgP6ARMiYkEN4zYzs3Zq2RvqbkAVFt3SyTbfBr5dofyWzrYzM7Pa8h3cZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVmhmiULSYMkTZf0kKQFkk5P5dtJmirpsfS+bSqXpB9KWiRprqQRuX2NTus/Jml0rWI2M7PKalmzWAOcFRHDgH2AUyUNA8YC0yJiKDAtzQMcAgxNrzHAOMiSC3AusDcwEji3LcGYmVl91CxZRMTyiJiZplcDC4GBwBHA5Wm1y4Ej0/QRwBWRuQ/YRtIA4CBgakSsjIjngKnAwbWK28zM3qgubRaSBgN7AfcDO0TE8rTor8AOaXog8GRus6WprKPy9scYI6lVUuuKFSt69gOYmfVxNU8WkrYArgW+GBEv5JdFRADRE8eJiPER0RIRLf379++JXZqZWVLTZCFpI7JEcWVEXJeKn0qXl0jvT6fyZcCg3OY7pbKOys3MrE5q2RtKwGXAwoi4KLdoCtDWo2k0cEOu/KTUK2ofYFW6XHUbcKCkbVPD9oGpzMzM6mTDGu77fcCJwDxJs1PZV4ALgMmSPg08ARyTlt0CHAosAl4GTgGIiJWSvgU8mNb7ZkSsrGHcZmbWTs2SRUTcDaiDxQdUWD+AUzvY1wRgQs9FZ2ZmXeE7uM3MrJCThZmZFepyskgNzXvWIhgzM2tMVSULSXdK2ioNvTETuFTSRUXbmZlZ71BtzWLrdEPdx8iG5Ngb+EjtwjIzs0ZSbbLYMN1AdwxwUw3jMTOzBlRtsvgG2Y1wiyLiQUm7AY/VLiwzM2sk1d5nsTwi1jVqR8Rit1mYmfUd1dYsflRlmZmZ9UKd1iwk7Qu8F+gv6czcoq2AfrUMzMzMGkfRZaiNgS3Selvmyl8Ajq5VUGbWNYPH3lx2CFVZcsFhZYdg3dRpsoiI3wO/lzQxIp6oU0xmZtZgqm3gfpOk8cDg/DYR8eFaBGVmZo2l2mRxDfBT4OfA2tqFY2ZmjajaZLEmIsbVNBIzM2tY1XadvVHS5yQNkLRd26umkZmZWcOotmbR9hjUL+XKAtitZ8MxM7NGVFWyiIhdax2ImZk1rmqHKN9M0tdSjygkDZX00dqGZmZmjaLaNotfAH8nu5sbYBlwfk0iMjOzhlNtshgSERcC/wCIiJcB1SwqMzNrKNUmi79L2pSsURtJQ4BXahaVmZk1lGp7Q50L3AoMknQl8D7g5FoFZWZmjaXa3lBTJc0E9iG7/HR6RDxT08jMzKxhVHsZCmAg2bDkGwMflPSx2oRkZmaNpqqahaQJwJ7AAuDVVBzAdTWKy8zMGki1NYt9IqIlIkZHxCnp9anONpA0QdLTkubnys6TtEzS7PQ6NLfsHEmLJD0i6aBc+cGpbJGksV3+hGZmtt6qTRZ/lDSsi/ueCBxcofziiBieXrcApH0fC7wjbXOJpH6S+gE/AQ4BhgHHdSMOMzNbT9X2hrqCLGH8lazLrICIiD072iAi7pI0uMr9HwFMiohXgMclLQJGpmWLImIxgKRJad2HqtyvmZn1gGqTxWXAicA8Xmuz6K7TJJ0EtAJnRcRzZI3n9+XWWZrKAJ5sV773eh7fzMy6qNrLUCsiYkpEPB4RT7S9unG8ccAQYDiwHPheN/ZRkaQxklolta5YsaKndmtmZlRfs5gl6SrgRnJ3bkdEl3pDRcRTbdOSLgVuSrPLgEG5VXdKZXRS3n7f44HxAC0tLdGVuMzMrHPVJotNyZLEgbmyLnedlTQgIpan2aOAtp5SU4CrJF0E7AgMBR4gaxsZKmlXsiRxLHB8V45pZmbrr9o7uE/p6o4lXQ3sB2wvaSnZkCH7SRpOlmiWAP+e9r9A0mSyhus1wKkRsTbt5zTgNrIbAidExIKuxmJmZuun02Qh6T8i4kJJPyINIpgXEV/oaNuIOK5C8WWdrP9t4NsVym8BbuksTjMzq62imsXC9N5a60DMzKxxdZosIuLGNPlyRFyTXybpEzWLyszMGkq1XWfPqbLMzMx6oaI2i0OAQ4GBkn6YW7QVWUO0mZn1AUVtFn8ha684HJiRK18NnFGroMzMrLEUtVnMAeZIuioi/lGnmMzMrMFUe1PeSEnnAbukbdoGEtytVoGZmVnj6MpAgmeQXYpaW7twzMysEVWbLFZFxG9rGomZmTWsapPFdEnfJRsLKj+Q4MyaRGVmZg2l2mTR9gyJllxZAB/u2XDMzKwRVTuQ4P61DsTMzBpXVXdwS9pB0mWSfpvmh0n6dG1DMzOzRlHtcB8TyYYJ3zHNPwp8sQbxmJlZA6o2WWwfEZNJz9+OiDW4C62ZWZ9RbbJ4SdKbSc+0kLQPsKpmUZmZWUOptjfUmWSPPh0i6R6gP3B0zaIyM7OG0mnNQtI/S3pLup/iQ8BXyO6z+B2wtA7xmZlZAyi6DPUz4O9p+r3AV4GfAM8B42sYl5mZNZCiy1D9ImJlmh4FjI+Ia4FrJc2uaWRmZtYwimoW/SS1JZQDgDtyy6pt7zAzsyZX9IV/NfB7Sc8A/wv8AUDSW3FvKDOzPqPo4UffljQNGAD8LiIiLdoA+HytgzMzs8ZQeCkpIu6rUPZobcIxM7NGVO1NeWZm1oc5WZiZWSEnCzMzK1SzZCFpgqSnJc3PlW0naaqkx9L7tqlckn4oaZGkuZJG5LYZndZ/TNLoWsVrZmYdq2XNYiJwcLuyscC0iBgKTEvzAIcAQ9NrDDAOsuQCnEv2pL6RwLltCcbMzOqnZskiIu4CVrYrPgK4PE1fDhyZK78iMvcB20gaABwETI2IlRHxHDCVNyYgMzOrsXq3WewQEcvT9F+BHdL0QODJ3HpLU1lH5W8gaYykVkmtK1as6Nmozcz6uNIauNMNflG4YvX7Gx8RLRHR0r9//57arZmZUf9k8VS6vER6fzqVLwMG5dbbKZV1VG5mZnVU72QxBWjr0TQauCFXflLqFbUPsCpdrroNOFDStqlh+8BUZmZmdVSzkWMlXQ3sB2wvaSlZr6YLgMmSPg08ARyTVr8FOBRYBLwMnAIQESslfQt4MK33zdyQ6WZmVic1SxYRcVwHiw6osG4Ap3awnwnAhB4MzczMush3cJuZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhUpJFpKWSJonabak1lS2naSpkh5L79umckn6oaRFkuZKGlFGzGZmfVmZNYv9I2J4RLSk+bHAtIgYCkxL8wCHAEPTawwwru6Rmpn1cY10GeoI4PI0fTlwZK78isjcB2wjaUAJ8ZmZ9VllJYsAfidphqQxqWyHiFiepv8K7JCmBwJP5rZdmspeR9IYSa2SWlesWFGruM3M+qQNSzru+yNimaR/AqZKeji/MCJCUnRlhxExHhgP0NLS0qVtzcysc6XULCJiWXp/GrgeGAk81XZ5Kb0/nVZfBgzKbb5TKjMzszqpe7KQtLmkLdumgQOB+cAUYHRabTRwQ5qeApyUekXtA6zKXa4yM7M6KOMy1A7A9ZLajn9VRNwq6UFgsqRPA08Ax6T1bwEOBRYBLwOn1D9kM7O+re7JIiIWA++uUP4scECF8gBOrUNoZmbWgUbqOmtmZg3KycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWaGmSRaSDpb0iKRFksaWHY+ZWV/SFMlCUj/gJ8AhwDDgOEnDyo3KzKzv2LDsAKo0ElgUEYsBJE0CjgAeKjUqM+t1Bo+9uewQqrLkgsPqejxFRF0P2B2SjgYOjojPpPkTgb0j4rTcOmOAMWn2bcAjdQ+067YHnik7iF7E57Nn+Xz2nGY5l7tERP9KC5qlZlEoIsYD48uOoysktUZES9lx9BY+nz3L57Pn9IZz2RRtFsAyYFBufqdUZmZmddAsyeJBYKikXSVtDBwLTCk5JjOzPqMpLkNFxBpJpwG3Af2ACRGxoOSwekJTXTZrAj6fPcvns+c0/blsigZuMzMrV7NchjIzsxI5WZiZWSEnCzMzK+RkYWavI2lTSW8rO45mJWnrTpY17b0WThbW1JT5pKT/TPM7SxpZdlzNStK/ArOBW9P8cEnupt41t0vatn2hpAOB60uIp0c4WdSJpNWSXqjwWi3phbLja2KXAPsCx6X51WSDTlr3nEc2FtvzABExG9i1vHCa0nhguqR1w2ZIOh74GVDfAZ16UFPcZ9EbRMSWZcfQS+0dESMkzQKIiOfSjZvWPf+IiFWS8mXuX98FEXGppL8Bd6TaxCjgs8D+EbGk1ODWg5NFSST9E7BJ23xE/LnEcJrZP9IQ9gGQfs29Wm5ITW1B+hXcT9JQ4AvAvSXH1HQi4pcpYcwC/gy8PyKaYSDBDvmmvDqTdDjwPWBH4GlgF2BhRLyj1MCalKQTyH65jQAuB44GvhYR15QaWJOStBnwVeDAVHQbcH5E/K28qJqLpHlkP15E9v97BfBSmo+I2LPE8LrNyaLOJM0BPgzcHhF7Sdof+GREfLrk0JqWpD2AA8j+M06LiIUlh9S0JI2IiJllx9HMJO3S2fKIeKJesfQkJ4s6axuqOCWNvSLiVUlzIuLdZcfWjCT9EJgUEb5U0gMkTQfeAvwG+HVEzC85JGsQbrOov+clbQHcBVwp6WmyKqp1zwzga+m+gOvJEkdryTE1rYjYX9JbgGOAn0naiixpnF9yaE1D0uO8vlOAcvMREUPqH9X6c82iziRtDvwvWbflE4CtgSsj4tlSA2tykrYDPk42fP3OETG05JCanqR3Af8BjIoI9zCrkqQ3tyvagCz5ng3MjIiP1z+q9eeaRR2lXjs3RcT+ZD12Li85pN7krcAepA4DJcfStCS9nazDwMeBZ4FfA2eVGlSTafvhJ2kD4ETgS2Q3Oh4WEQ+VGNp6cbKoo4hYK+lVSVtHxKqy4+kNJF0IHAX8ieyL7VsR8XypQTW3CWTn8aCI+EvZwTQjSRsBnwLOAO4GjoyIReVGtf6cLOrvRWCepKnk2ioi4gvlhdTU/gTs2+x92BtFROxbdgy9wOPAGuD7ZPdY7ClpXXfZiLiupLjWi9ss6kzS6ArFERFX1D2YJiZpj4h4WNKISsvd/bNrJE2OiGNy9wisW0QT3xtQBkm/SJPtzyNk5/JTdQ6pR7hmUX/bRMQP8gWSTi8rmCZ2JjCG7AbH9oLsXharXtvf4EdLjaJ3mM9rN+WRplcAd0fE46VFtZ5cs6gzSTMjYkS7slkRsVdZMTUzSZu0v7u4UplVR9J3IuLLRWXWMUnnVijeDjgIOC8iJtU5pB7hZFEnko4DjgfeD/wht2hL4NWIOKCUwJpcB8n3DWVWnQ7O51xfhlp/qXv37c36t+nLUPVzL7Ac2J7XXzpZDcwtJaImlm4cGwhsKmkvXqvybwVsVlpgTUrS/wM+B+wmKf/3uCVwTzlR9S4RsVLthvNtJq5ZWFNKHQVOBlqA/B3bq4GJzdrjpCzp6W7bAv8NjM0tWh0RK8uJqndJ48B9PSKasj3NyaLOJK3mtV4SGwMbAS9FxFblRdW8JH08Iq4tO47exkPod1+FHmWQtVn8BTgpIh6uf1Trz5eh6iz/EKRUJT0C2Ke8iJqTpE9GxK+AwZLObL88Ii4qIaymlx6rehHthtAHPIR+9dr3KAvg2Yho6jHg/FjVEkXmf8h6SVjXbJ7etyC7rt7+Zd1zPtmPl0cjYleyod/vKzek5hIRT7R7/bnZEwX4MlTdSfpYbnYDsmvuH/Kds9YIPIS+dcSXoervX3PTa4AlZJeirBvS2FDnk43keyuwJ3BGukRlXech9K0i1yysqUmaHRHDJR1Fdq34TOAu/xLunjSE/t/IuiJ7CH1bxzWLOpO0OzAO2CEi3pkGGDvcD5fptra/4cOAayJiVRN3ZS9du2vrHkLf1nEDd/1dCpwD/AMgIuaSPbDHuucmSQ8D7wGmSepP9svYukHSakkvtHs9Kel6SbuVHZ+VxzWL+tssIh5o9+t3TVnBNLuIGJvaLVal54W8hNuA1sf3gaXAVWSXoo4FhgAzyZ51sV9ZgVm5nCzq7xlJQ0g37Ug6mmwYEOuG9KCZTwIfTAn498BPSw2quR3err1nfGoX+rKkr5QWlZXOyaL+TgXGA3tIWkb2oJQTyg2pqY0juwv+kjR/Yir7TGkRNbeXJR0D/CbNH81rl/XcG6YPc2+oOpP0JrL/gIPJhgB4gez+vG+WGVezqnQPgO8L6L7ULvEDYF+y5HAf2eNBlwHviYi7SwzPSuSaRf3dADxPdg3Yzzhef2slDYmIP8G6L7u1JcfUtCJiMa+/FyjPiaIPc7Kov50i4uCyg+hFvgRMl7Q4zQ8GTikvnObmrt3WEXedrb97Jb2r7CB6kXuAnwGvAivT9B9Ljai5uWu3VeSaRf29HzhZ0uPAK2TdE8NPIuu2K8jafb6V5o8Hfgl8orSImpu7dltFThb1d0jZAfQy74yIYbn56ZIeKi2a5ueu3VaRk0WdRcQTZcfQy8yUtE9E3AcgaW9e/+Q86xp37baK3HXWmpqkhcDbgLYnue0MPEJ26cSX97rIXbutI65ZWLNzz7Ke5a7dVpFrFma2jqT5EfHOsuOwxuOus2aW567dVpFrFma2TupJ9layhm137bZ1nCzMbB1Ju1Qqdy8+c7IwM7NCbrMwM7NCThZmZlbIycKsCyRNl3RQu7IvShrXwfp3SmqpT3RmteNkYdY1V/PGUViPTeVmvZaThVnX/AY4TNLGAJIGAzsCx0lqlbRA0jcqbSjpxdz00ZImpun+kq6V9GB6vS+Vf0jS7PSaJWnLGn82sw55uA+zLoiIlZIeIBs9+AayWsVk4L/Ssn7ANEl7pmdBVOMHwMURcbeknYHbgLcDZwOnRsQ9krbgtWdhm9WdaxZmXZe/FNV2CeoYSTOBWcA7gGEdbFvJR4AfS5oNTAG2SsnhHuAiSV8AtokIP1fCSuNkYdZ1NwAHSBoBbEb2hL6zgQPSnc43A5tU2C5/U1N++QbAPhExPL0GRsSLEXEB8BlgU+AeSXvU4sOYVcPJwqyLIuJFYDowgaxWsRXwErBK0g50/ICrpyS9XdIGwFG58t8Bn2+bkTQ8vQ+JiHkR8R3gQcDJwkrjZGHWPVcD7waujog5ZJefHgauIrt8VMlY4CbgXl7/9LkvAC2S5qaxmT6byr8oab6kuWTPxP5tz38Ms+p4uA8zMyvkmoWZmRVysjAzs0JOFmZmVsjJwszMCjlZmJlZIScLMzMr5GRhZmaFnCzMzKzQ/wHz3USypeKHFQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pols = cleaner.text_category(series= tweets_df.polarity)\n", + "pols = pd.Series(pols)\n", + "\n", + "# top 5 sources of users\n", + "fig, ax = plt.subplots()\n", + "ax.tick_params(axis='x', labelsize=10)\n", + "ax.tick_params(axis='y', labelsize=10)\n", + "ax.set_xlabel('Values', fontsize=10)\n", + "ax.set_ylabel('Sentiments' , fontsize=10)\n", + "ax.set_title('Sentiment analysis based on polarity', fontsize=10)\n", + "pols.value_counts().plot(ax=ax, kind='bar')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.000000 2314\n", + "0.500000 435\n", + "0.100000 292\n", + "1.000000 255\n", + "0.400000 231\n", + " ... \n", + "0.301667 1\n", + "0.500168 1\n", + "0.417857 1\n", + "0.343750 1\n", + "0.421429 1\n", + "Name: subjectivity, Length: 710, dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df.subjectivity.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAExCAYAAACNsY6YAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbFElEQVR4nO3de7ildV338fcHUPHAUUbEAR0UEvGRkyNgVioYBw+hSYRiDoZOPQ+p5WOFZpmahvYkSaVJSo6mIkoEj1o4IWhKCMNpOBojQjKSDAwiqJHgtz/u34bFZu+518Csvfbs/X5d17rWff/uw/quPXPtz75/v/uQqkKSpHXZZNwFSJJmP8NCktTLsJAk9TIsJEm9DAtJUi/DQpLUy7DQRivJHyS5MsnKJJcm2a9n/T9O8uYp2p+Q5HMPsoajkzxhYP4jSXbv2ea89r4oySsfzOeuR33PS/L5UX6G5ofNxl2A9GAkeTbwYmCfqroryXbAwx/Mvqrqu8DhD7KUo4ErgO+2fb12iM/72Ta5CHgl8KkH+dnSjPHIQhurHYBbquougKq6pf3SJ8n1LTxIsjjJuQPb7Znk35Jcm+R1bZ1FSa5o05sm+bMkF7Yjlt+Y2DDJ7ye5PMllSY5PcjiwGPhkO7J5ZJJz22f+ZpI/G9j26CR/1abvbM3HAz/ftv2dJF9NstfANl9Lsufgl05yfpKnD8xPfN6+7XtdkuS8JE+d/AObfGSV5Ioki9r0q5Jc0Gr5cPs5bJrkY229y5P8ztD/OppzDAttrL4E7JTk35N8MMlzh9xuD+AA4NnAHw12ITXHALdX1bOAZwGvS7JzkkOBw4D9qmpP4H1V9TlgBXBUVe1VVT8e2M9pwMsG5n8VOGXSZx0H/Gvb9gTgo3RHKiT5GWDzqrps0jafAY5o6+wA7FBVK4BrgJ+vqr2BPwLeM+TPgyRPa/U9p6r2Au4BjgL2AhZW1f+qqmcAfzfsPjX3GBbaKFXVncAzgaXAGuAzSY4eYtMzqurHVXULcA6w76TlBwGvTnIp8A3gscCuwAuAv6uqH7XPX9tT3xrguiT7J3kssBvw9Z7aPgu8OMnDgF8HPjbFOqdyX5fZEcDEWMtWwGfbEdIJwNOn2HY6B9L9LC9s3/tA4MnAdcCTk/xlkkOAH6zHPjXHOGahjVZV3QOcC5yb5HJgCd0v2Lu57w+hzSdv1jMf4PVVddb9GpODH0SJp9D9Qr8GOL16bsRWVT9KspzuCOYIul/gk9dZneTWJHvQHQ38Zlv0LuCcqnpZ61o6d4qPGPy5wH0/mwDLquotkzdo3WAHt885gi7ENA95ZKGNUpKnJtl1oGkv4IY2fT33/aJ9+aRND0uyeftr/3nAhZOWnwX87/bXPUl+JsmjgeXAa5I8qrVv29a/A9himjJPp/vF/woe2AU13bYfAU4ELqyq26bZ72eA3wO2qqqVrW0rYHWbPnqa7a4H9mn17wPs3NrPBg5P8ri2bNskT2rjPptU1WnA2ya21fxkWGhj9RhgWZKrkqwEdgf+uC17B/CBJCvo+t8HraTrfjofeNfEoDj3HWF8BLgKuLh16XwY2Kyq/hk4E1jRumomBoo/BvzNxAD34Ae1X/ZXA0+qqgum+A4rgXvagPnvtG0uouvuWdf4wOeAI+m6pCa8D/jTJJcwfY/BacC2Sa4Efgv49/aZV9GFwZfaz3I53QkEC+mO2i4F/h54wJGH5o94i3LNd0meCby/qoYdJB9lLU+g60Larap+OuZypHt5ZKF5Lcli4NPAB2ZBLa+mG1T/A4NCs41HFpKkXh5ZSJJ6GRaSpF5z8jqL7bbbrhYtWjTuMiRpo3LRRRfdUlULplo2J8Ni0aJFrFixYtxlSNJGJckN0y2zG0qS1MuwkCT1MiwkSb0MC0lSL8NCktTLsJAk9TIsJEm9DAtJUi/DQpLUa05ewb2xWHTcF8Zdwpxy/fEvGncJ0pzlkYUkqddIwyLJ9Ukub4+cXNHatk2yPMm17X2b1p4kJyZZlWRle0bwxH6WtPWvTbJklDVLkh5oJo4snl9Ve1XV4jZ/HHB2Ve1K96D441r7ocCu7bUU+BB04QK8HdgP2Bd4+0TASJJmxji6oQ4DlrXpZcBLB9o/Xp3zga2T7AAcDCyvqrVVdRvdw+QPmeGaJWleG3VYFPClJBclWdratq+qm9r0fwLbt+mFwHcGtr2xtU3Xfj9JliZZkWTFmjVrNuR3kKR5b9RnQ/1cVa1O8jhgeZJrBhdWVSXZIA8Br6qTgJMAFi9e7IPFJWkDGumRRVWtbu83A6fTjTl8r3Uv0d5vbquvBnYa2HzH1jZduyRphowsLJI8OskWE9PAQcAVwJnAxBlNS4Az2vSZwKvbWVH7A7e37qqzgIOSbNMGtg9qbZKkGTLKbqjtgdOTTHzOp6rqn5NcCJya5BjgBuCItv4XgRcCq4AfAa8BqKq1Sd4FXNjWe2dVrR1h3ZKkSUYWFlV1HbDnFO23AgdO0V7AsdPs62Tg5A1doyRpOF7BLUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF4jD4skmya5JMnn2/zOSb6RZFWSzyR5eGt/RJtf1ZYvGtjHW1r7N5McPOqaJUn3NxNHFm8Erh6Yfy9wQlXtAtwGHNPajwFua+0ntPVIsjtwJPB04BDgg0k2nYG6JUnNSMMiyY7Ai4CPtPkABwCfa6ssA17apg9r87TlB7b1DwNOqaq7qurbwCpg31HWLUm6v1EfWfwF8HvAT9v8Y4HvV9Xdbf5GYGGbXgh8B6Atv72tf2/7FNvcK8nSJCuSrFizZs0G/hqSNL+NLCySvBi4uaouGtVnDKqqk6pqcVUtXrBgwUx8pCTNG5uNcN/PAX4pyQuBzYEtgQ8AWyfZrB097AisbuuvBnYCbkyyGbAVcOtA+4TBbSRJM2BkRxZV9Zaq2rGqFtENUH+5qo4CzgEOb6stAc5o02e2edryL1dVtfYj29lSOwO7AheMqm5J0gON8shiOr8PnJLkT4BLgI+29o8Cn0iyClhLFzBU1ZVJTgWuAu4Gjq2qe2a+bEmav2YkLKrqXODcNn0dU5zNVFX/BfzKNNu/G3j36CqUJK2LV3BLknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeo1VFgkecaoC5EkzV7DHll8MMkFSf5Pkq1GWpEkadYZKiyq6ueBo4CdgIuSfCrJL460MknSrDH0mEVVXQu8Dfh94LnAiUmuSfLLoypOkjQ7DDtmsUeSE4CrgQOAl1TV09r0CSOsT5I0C2w25Hp/CXwEeGtV/Xiisaq+m+RtI6lMkjRrDNsNdXpVfWIwKJK8EaCqPjGSyiRJs8awYfHqKdqO3oB1SJJmsXV2QyV5BfBKYOckZw4s2gJYO8rCJEmzR9+YxXnATcB2wJ8PtN8BrBxVUZKk2WWdYVFVNwA3AM+emXIkSbPROscsknytvd+R5AcDrzuS/KBn283bVd+XJbkyyTta+85JvpFkVZLPJHl4a39Em1/Vli8a2NdbWvs3kxz8kL+1JGm9rDMsqurn2vsWVbXlwGuLqtqyZ993AQdU1Z7AXsAhSfYH3gucUFW7ALcBx7T1jwFua+0ntPVIsjtwJPB04BC6W49s+iC+qyTpQRr2orwTk6xXV1R17myzD2uvoruQ73OtfRnw0jZ9WJunLT8wSVr7KVV1V1V9G1gF7Ls+tUiSHpphT529CPjDJN9K8v+SLB5moySbJrkUuBlYDnwL+H5V3d1WuRFY2KYXAt8BaMtvBx472D7FNoOftTTJiiQr1qxZM+TXkiQNY9gbCS6rqhcCzwK+Cbw3ybVDbHdPVe0F7Eh3NLDbQ6i177NOqqrFVbV4wYIFo/oYSZqX1vfhR7vQ/cJ/EnDNsBtV1feBc+jOqto6ycRZWDsCq9v0arq72tKWbwXcOtg+xTaSpBkw7JjF+9qRxDuBK4DFVfWSnm0WJNm6TT8S+EW6GxGeAxzeVlsCnNGmz2zztOVfrqpq7Ue2s6V2BnYFLhju60mSNoRhbyT4LeDZVXXLeux7B2BZO3NpE+DUqvp8kquAU5L8CXAJ8NG2/keBTyRZRXd1+JEAVXVlklOBq4C7gWOr6p71qEOS9BD13e5jt6q6BrgQeGKSJw4ur6qLp9u2qlYCe0/Rfh1TnM1UVf8F/Mo0+3o38O511SpJGp2+I4s3AUu5/60+JkycBitJmuP6bvextE0e2v7yv1eSzUdWlSRpVhn2bKjzhmyTJM1BfWMWj6e7AO6RSfYG0hZtCTxqxLVJkmaJvjGLg+kecrQj3bjFRFj8AHjr6MqSJM0mfWMWy+hOf315VZ02QzVJkmaZYccsnjlxgR1Akm3adRKSpHlg2LA4tN2yA4Cqug144UgqkiTNOsOGxaZJHjEx027f8Yh1rC9JmkOGvd3HJ4Gzk/xdm38N9z17QpI0xw0VFlX13iSXAS9oTe+qqrNGV5YkaTYZ9sgCujvG3l1V/5LkUUm2qKo7RlWYJGn2GPYW5a+je9Tph1vTQuAfR1STJGmWGXaA+1jgOXQX41FV1wKPG1VRkqTZZdiwuKuq/ntipj3JrkZTkiRpthk2LL6S5K1094j6ReCzwP8fXVmSpNlk2LA4DlgDXA78BvBF4G2jKkqSNLsMe+rsT4G/bS9J0jzTd4vyU6vqiCSX88AxiqJ7VvZfVNUZoypQkjR+fUcWb2zvL55m+XZ0V3cbFpI0h61zzKKqbmrvNwB3AXsCe9CdHXVDVV0EHDXyKiVJYzXsRXmvBS4Afhk4HDg/ya8DtMCQJM1hw97u43eBvavqVoAkj6V7BvfJoypM0ngtOu4L4y5hzrj++BeNu4SHbNhTZ28FBu8DdUdrkyTNA31nQ72pTa4CvpHkDLqzoA4DVo64NknSLNHXDbVFe/9We03w7CdJmkfWGRZV9Y6ZKkSSNHsNNcCd5BymuHFgVR2wwSuSJM06w54N9eaB6c2BlwN3b/hyJEmz0bD3hpp8LcXXk1wwgnokSbPQsN1Q2w7MbgIsBrYaSUWSpFln2G6oi7hvzOJu4HrgmFEUJEmafdZ5UV6SZyV5fFXtXFVPBt4BXNNeV/Vsu1OSc5JcleTKJG9s7dsmWZ7k2va+TWtPkhOTrEqyMsk+A/ta0ta/NsmSh/qlJUnrp+8K7g8D/w2Q5BeAPwWWAbcDJ/Vsezfwf6tqd2B/4Ngku9M9SOnsqtoVOLvNAxwK7NpeS4EPtc/dFng7sB+wL/D2iYCRJM2MvrDYtKrWtulfBU6qqtOq6g+BXda1YVXdVFUXt+k7gKuBhXRXfy9rqy0DXtqmDwM+Xp3zga2T7AAcDCyvqrVVdRuwHDhkfb6kJOmh6Q2LJBPjGgcCXx5YNux4B0kWAXsD3wC2n7j1OfCfwPZteiHwnYHNbmxt07VP/oylSVYkWbFmzZphS5MkDaEvLD4NfKXdE+rHwL8CJNmFriuqV5LHAKcBv11VPxhcVlXFFBf7PRhVdVJVLa6qxQsWLNgQu5QkNX23+3h3krOBHYAvtV/u0IXM6/t2nuRhdEHxyar6h9b8vSQ7VNVNrZvp5ta+GthpYPMdW9tq4HmT2s/t+2xJ0obTe4vyqjq/qk6vqh8OtP37xHjEdJIE+ChwdVW9f2DRmcDEGU1LuO+mhGcCr25nRe0P3N66q84CDkqyTRvYPqi1SZJmyNDjDg/Cc4BfAy5PcmlreytwPHBqkmOAG4Aj2rIvAi+kux36j4DXAFTV2iTvAi5s671zYNBdkjQDRhYWVfU1INMsPnCK9Qs4dpp9nYxP5ZOksRn2SXmSpHnMsJAk9TIsJEm9DAtJUi/DQpLUy7CQJPUyLCRJvQwLSVIvw0KS1MuwkCT1MiwkSb0MC0lSL8NCktTLsJAk9TIsJEm9DAtJUi/DQpLUy7CQJPUyLCRJvQwLSVIvw0KS1MuwkCT1MiwkSb0MC0lSL8NCktTLsJAk9TIsJEm9DAtJUi/DQpLUy7CQJPUyLCRJvUYWFklOTnJzkisG2rZNsjzJte19m9aeJCcmWZVkZZJ9BrZZ0ta/NsmSUdUrSZreKI8sPgYcMqntOODsqtoVOLvNAxwK7NpeS4EPQRcuwNuB/YB9gbdPBIwkaeaMLCyq6qvA2knNhwHL2vQy4KUD7R+vzvnA1kl2AA4GllfV2qq6DVjOAwNIkjRiMz1msX1V3dSm/xPYvk0vBL4zsN6NrW269gdIsjTJiiQr1qxZs2GrlqR5bmwD3FVVQG3A/Z1UVYuravGCBQs21G4lScx8WHyvdS/R3m9u7auBnQbW27G1TdcuSZpBMx0WZwITZzQtAc4YaH91Oytqf+D21l11FnBQkm3awPZBrU2SNIM2G9WOk3waeB6wXZIb6c5qOh44NckxwA3AEW31LwIvBFYBPwJeA1BVa5O8C7iwrffOqpo8aC5JGrGRhUVVvWKaRQdOsW4Bx06zn5OBkzdgaZKk9eQV3JKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSem00YZHkkCTfTLIqyXHjrkeS5pONIiySbAr8NXAosDvwiiS7j7cqSZo/NoqwAPYFVlXVdVX138ApwGFjrkmS5o3Nxl3AkBYC3xmYvxHYb3CFJEuBpW32ziTfnKHa5oPtgFvGXUSfvHfcFWgM/L+5YT1pugUbS1j0qqqTgJPGXcdclGRFVS0edx3SZP7fnDkbSzfUamCngfkdW5skaQZsLGFxIbBrkp2TPBw4EjhzzDVJ0ryxUXRDVdXdSX4LOAvYFDi5qq4cc1nzid17mq38vzlDUlXjrkGSNMttLN1QkqQxMiwkSb0MC0lSL8NCktTLsNCU0nlVkj9q809Msu+465I0Hp4NpSkl+RDwU+CAqnpakm2AL1XVs8ZcmuapJHcAU/3CClBVteUMlzSvbBTXWWgs9quqfZJcAlBVt7ULIqWxqKotxl3DfGZYaDo/abeGL4AkC+iONKRZIcnjgM0n5qvqP8ZYzpznmIWmcyJwOvC4JO8Gvga8Z7wlSZDkl5JcC3wb+ApwPfBPYy1qHnDMQtNKshtwIF2f8NlVdfWYS5JIchlwAPAvVbV3kucDr6qqY8Zc2pxmN5SmlORE4JSq+utx1yJN8pOqujXJJkk2qapzkvzFuIua6wwLTeci4G1JnkrXHXVKVa0Yc00SwPeTPAb4KvDJJDcDPxxzTXOe3VBapyTbAi+nuy38E6tq1zGXpHkuyaOBH9ONuR4FbAV8sqpuHWthc5xHFuqzC7Ab3eMWHbPQWLUz9D5fVc+nOztv2ZhLmjc8G0pTSvK+dsbJO4ErgMVV9ZIxl6V5rqruAX6aZKtx1zLfeGSh6XwLeHZV3TLuQqRJ7gQuT7KcgbGKqnrD+Eqa+xyz0P0k2a2qrkmyz1TLq+rima5JGpRkyRTNVVUfn/Fi5hGPLDTZm4ClwJ9Psazozm+XxmnrqvrAYEOSN46rmPnCIwtNKcnmVfVffW3STEtycVXtM6ntkqrae1w1zQceWWg65wGTu6KmapNmRJJXAK8Edk5y5sCiLYC146lq/jAsdD9JHg8sBB6ZZG+6W30AbAk8amyFSd0fKzcB23H/btI7gJVjqWgesRtK99MGD48GFgODV2zfAXysqv5hHHVJGi/DQlNK8vKqOm3cdUiTTXoI0sOBhwE/9OFHo2U3lO4nyauq6u+BRUneNHl5Vb1/DGVJ9xp8CFKSAIcB+4+vovnBK7g12aPb+2PoBg4nv6RZozr/CBw87lrmOruhJG1UkvzywOwmdONrz62qZ4+ppHnBIwtNqd0basskD0tydpI1SV417rok4CUDr4PpTr44bKwVzQMeWWhKSS6tqr2SvAx4Md2V3V+tqj3HXJqkMfDIQtOZOPnhRcBnq+r2cRYjTUjyM+1o94o2v0eSt427rrnOsNB0Pp/kGuCZwNlJFgDe6kOzwd8CbwF+AlBVK+kezqURMiw0pao6DvhZuudY/ITuVtD2C2s2eFRVXTCp7e6xVDKPeJ2FppTkYcCrgF/oTmXnK8DfjLUoqXNLkqfQLsxLcjjdbUA0Qg5wa0pJPkJ3ZezEYyt/Dbinql47vqokSPJk4CS6I9/bgG8DR1XVDWMtbI4zLDSlJJdNPvNpqjZppiV5BHA4sAjYFvgB3fV57xxnXXOd3VCazj1JnlJV34J7/5q7Z8w1SQBnAN8HLga+O95S5g/DQtP5XeCcJNe1+UXAa8ZXjnSvHavqkHEXMd94NpSm83Xgw8BP6R4s82Hg38ZakdQ5L8kzxl3EfOOYhaaU5FS6vuBPtqZX0j37+FfGV5UESa4CdqEb2L6L7gFdVVV7jLWwOc6w0JSSXFVVu/e1STMtyZOmavdsqNFyzELTuTjJ/lV1PkCS/bj/k/OksTAUxsMjC00pydXAU4H/aE1PBL5Jd6Wsh/zSPGNYaErTHepP8K87aX4xLCRJvTx1VpLUy7CQJPUyLKT1kOScJAdPavvtJB+aZv1zkyyemeqk0TEspPXzaR74oJ0jW7s0ZxkW0vr5HPCiJA8HSLIIeALwiiQrklyZ5B1TbZjkzoHpw5N8rE0vSHJakgvb6zmt/blJLm2vS5JsMeLvJk3Li/Kk9VBVa5NcABxKd/fTI4FTgfe0ZZvSPYZ2j/a4z2F8ADihqr6W5InAWcDTgDcDx1bV15M8Bh9rqzHyyEJaf4NdURNdUEckuRi4BHg6sD63RXkB8FdJLgXOBLZs4fB14P1J3kB3Xy4fHaqxMSyk9XcGcGCSfYBH0d2V983Age3K9i8Am0+x3eBFTYPLNwH2r6q92mthVd1ZVccDrwUeCXw9yW6j+DLSMAwLaT1V1Z3AOcDJdEcVWwI/BG5Psj1dF9VUvpfkaUk2AV420P4l4PUTM0n2au9PqarLq+q9wIWAYaGxMSykB+fTwJ7Ap6vqMrrup2uAT9F1H03lOODzwHnATQPtbwAWJ1nZbr/9m639t5NckWQl8BPgnzb815CG4+0+JEm9PLKQJPUyLCRJvQwLSVIvw0KS1MuwkCT1MiwkSb0MC0lSr/8BUAzAbLw3aFcAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "subs = cleaner.text_category(series= tweets_df.subjectivity)\n", + "subs = pd.Series(subs)\n", + "\n", + "# top 5 sources of users\n", + "fig, ax = plt.subplots()\n", + "ax.tick_params(axis='x', labelsize=10)\n", + "ax.tick_params(axis='y', labelsize=10)\n", + "ax.set_xlabel('Values', fontsize=10)\n", + "ax.set_ylabel('Subjectivity' , fontsize=10)\n", + "ax.set_title('Subjectivity values', fontsize=10)\n", + "subs.value_counts().plot(ax=ax, kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hashtags" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 [{'text': 'City', 'indices': [132, 137]}]\n", + "1 [{'text': 'China', 'indices': [18, 24]}, {'tex...\n", + "2 [{'text': 'XiJinping', 'indices': [127, 137]}]\n", + "3 [{'text': 'XiJinping', 'indices': [9, 19]}]\n", + "4 []\n", + " ... \n", + "7435 [{'text': 'China', 'indices': [29, 35]}, {'tex...\n", + "7436 [{'text': 'exactly', 'indices': [29, 37]}, {'t...\n", + "7437 [{'text': 'Taiwan', 'indices': [168, 175]}, {'...\n", + "7438 [{'text': 'China', 'indices': [17, 23]}, {'tex...\n", + "7439 [{'text': 'Pelosi', 'indices': [16, 23]}]\n", + "Name: hashtags, Length: 7440, dtype: object" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df.hashtags" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[] 527\n", + "[{'text': 'Taiwan', 'indices': [0, 7]}] 62\n", + "[{'text': 'ThankYou', 'indices': [0, 9]}, {'text': 'JoeBiden', 'indices': [20, 29]}, {'text': 'Nides', 'indices': [42, 48]}, {'text': 'Pelosi', 'indices': [63, 70]}, {'text': 'IsraelHasTheRightToDefendItself', 'indices': [72, 104]}, {'text': 'IAmAGoodJew', 'indices': [107, 119]}] 20\n", + "[{'text': 'Taiwan', 'indices': [36, 43]}] 20\n", + "[{'text': 'Taiwan', 'indices': [44, 51]}] 18\n", + " ... \n", + "[{'text': 'China', 'indices': [25, 31]}, {'text': 'Taiwan', 'indices': [32, 39]}, {'text': 'US', 'indices': [40, 43]}, {'text': 'TechStocks', 'indices': [111, 122]}] 1\n", + "[{'text': 'Taiwan', 'indices': [42, 49]}, {'text': 'Chinese', 'indices': [92, 100]}, {'text': 'France', 'indices': [115, 122]}] 1\n", + "[{'text': 'Baerbock', 'indices': [0, 9]}, {'text': 'BaerbockRuecktritt', 'indices': [10, 29]}, {'text': 'pelositaiwan', 'indices': [30, 43]}, {'text': 'pelosivisittotaiwan', 'indices': [44, 64]}, {'text': 'CNN', 'indices': [186, 190]}] 1\n", + "[{'text': 'BREAKING', 'indices': [14, 23]}, {'text': 'Taiwan', 'indices': [25, 32]}, {'text': 'Chinese', 'indices': [80, 88]}] 1\n", + "[{'text': 'China', 'indices': [17, 23]}, {'text': 'Taiwan', 'indices': [45, 52]}] 1\n", + "Name: hashtags, Length: 5697, dtype: int64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df.hashtags.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see there are null hashtags" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### removing null hashtags" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 [{'text': 'City', 'indices': [132, 137]}]\n", + "1 [{'text': 'China', 'indices': [18, 24]}, {'tex...\n", + "2 [{'text': 'XiJinping', 'indices': [127, 137]}]\n", + "3 [{'text': 'XiJinping', 'indices': [9, 19]}]\n", + "4 []\n", + " ... \n", + "7435 [{'text': 'China', 'indices': [29, 35]}, {'tex...\n", + "7436 [{'text': 'exactly', 'indices': [29, 37]}, {'t...\n", + "7437 [{'text': 'Taiwan', 'indices': [168, 175]}, {'...\n", + "7438 [{'text': 'China', 'indices': [17, 23]}, {'tex...\n", + "7439 [{'text': 'Pelosi', 'indices': [16, 23]}]\n", + "Name: hashtags, Length: 7440, dtype: object" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# take the rows from that have values in the hashtag columns\n", + "hashtags_list_df = tweets_df.loc[tweets_df[\"hashtags\"] != \" \"]\n", + "hashtags_list_df = hashtags_list_df['hashtags']\n", + "hashtags_list_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### flatten the hashtags" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hashtag
0[{'text':
1'City',
2'indices':
3[132,
4137]}]
\n", + "
" + ], + "text/plain": [ + " hashtag\n", + "0 [{'text':\n", + "1 'City',\n", + "2 'indices':\n", + "3 [132,\n", + "4 137]}]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#create dataframe where each hashtag gets its own row\n", + "flattened_hashtags = []\n", + "for hashtags_list in hashtags_list_df:\n", + " hashtags_list = hashtags_list.split(\" \")\n", + " for hashtag in hashtags_list:\n", + " flattened_hashtags.append(hashtag)\n", + "flattened_hashtags_df = pd.DataFrame(flattened_hashtags, columns=['hashtag'])\n", + "flattened_hashtags_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "hashtag \n", + "'indices': 27668\n", + "{'text': 20755\n", + "[{'text': 6913\n", + "'Taiwan', 5063\n", + "'China', 2439\n", + " ... \n", + "'himalayas', 1\n", + "'OPERATIVES', 1\n", + "'historical', 1\n", + "'ONEPIECE1056', 1\n", + "'antiwhitism', 1\n", + "Length: 5709, dtype: int64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "flattened_hashtags_df.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAASsAAADnCAYAAABG+XDPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA2xklEQVR4nO2dd3ic1ZX/P2eqimW5V4FlXMAYYwO2AYMLZgMkIrAQQkggcTAkIQFCIOwvStdmyaIUYJcQQk1IIZuQLGHZiEBYEhdMsXHBchGhWO69SFabkWbO74/7CsayyjTpnRndz/PoYfTOfd97JEZf33vuKaKqWCwWS6bjcdsAi8ViiQcrVhaLJSuwYmWxWLICK1YWiyUrsGJlsViyAitWFoslK7BiZbFYsgIrVhaLJSuwYmWxWLICK1YWiyUrsGJlsViyAitWFoslK7BiZbFYsgIrVhaLJSuwYmWxWLICK1YWiyUrsGJlsViyAitWFoslK7BiZckoRCRfRJaKyAQRWeJcmyki9yf4nAoRudN5/T0R+ac02lgbx5gfi8jCdM1pAZ/bBlgsHVgMPA1E2i+o6hvAG8k+UFW/kwa7EuUnwKPA31yYOyexKytLpnEt8D8YsToEICILROTPzusKEfm5iCwRkfdE5MvtN4rIN0XkHyLyMnByzPUnROQq5/UsEXlFRN4UkZUiUiQiXhH5kYisEpH1IvIFZ+xoEVkmIutEZIOIzHUeub+nH0JVtwJDRWRUen4tFruysmQMIhIATlLVWufSlV0MPQW4ACgC3hKRnwGnA9cAMzCf6zXA6k6e/3vgE6q6SkQGAs3ADUCdqs4SkSCwQkT+6sz/gqp+X0S8QAGAqs6KeeZzwI2quqsTO9cA5wH/ndAvwtIpVqwsmcQw4Egc46pUNQSERGQfMBKYC/xJVZsAROTZTu47GditqqsAVLXeGXsRcHr76gsoBiYBq4Cfi4gfeEZV13V8oKp+pBs79wFj4vh5LHFgxcqSSTQDeXGMC8W8jpD651iAW1X1hePeEJkHlAFPiMi9qvqrBJ6bh/mZLGnA+qwsGYOqHga8IhKPYHVkGfDPzmliEfDRTsa8BYwWkVkAjr/KB7wAfNFZQSEik0WkUETGAXtV9VHgMeDM7gwQkbtF5IqYS5OBDUn8LJZOsGJlyTT+Cpyf6E2qugbjj3oT+AtmC9dxTBj4BPATEXkTeBGz+nkM2ASsEZENwMOY1doC4E0RWevc958dnykiz4lI+1ZvGrDHue4HJpLCKablWMS2j7dkEiJyJnC7qn7abVsSRUReUNWLnddXAGeq6rddNitnsD6rfkppedUYzKnaOGBoF1+FgALRTr5aMA7kvZ187QTeqa0si5AgqrpGRP4uIl5VTfh+N2kXKgcfcI9btuQidmWV45SWV5UAM4EpGHFq/xrYy1OHgBqgGuO32QBU11aWbevleS05ihWrHKO0vGoSMC/mq9RVg47nMPAysBRYAqytrSyLumqRJSuwYpXllJZXFQOXYY7X5wGj3bUoYQ5hUlJeAJ6vrSzb4bI9lgzFilUW4gjU5cDHgYuAgLsWpQ0FVgC/BZ6qrSw76LI9lgzCilWWUFpeVQBcBVwNfIjcEaiuaMWEFvwWeKa2sqzRZXssLmPFKsMpLa86FbgJ+AwmDaQ/0gQ8BdxXW1m23m1jLO5gxSoDKS2vEuAjwO3AhS6bk2m8BNwL/KW2ssx+ePsRVqwyiNLyKh/wWeBOYkqcWDplE/AfwK9rK8taXLbF0gdYscoAnJXU1cC/YbL9LfGzD/N7e7i2sqzVbWMsvYcVK5cpLa/6MPB94Ay3bcly3ga+UVtZ9ke3DbH0DlasXKK0vGoOcDcmNsqSPl4F7qytLHvFbUMs6cWKVR9TWl41GPgxpta4pfd4GrijtrJsq9uGWNKDFas+pLS8qr3MyEi3beknNAD/D3jInhxmP1as+oDS8qoTgAeBS922pZ/yd+DG2sqy99w2xJI8Vqx6mdLyqpsxvqkit23p5zQC3wB+YldZ2YkVq16itLxqEPAEJofPkjksB66trSzb7rYhlsSwYtULlJZXnQX8ARjvti2WTjkAXFNbWfaS24ZY4sfWYE8zpeVVX8RUDrBClbkMA14oLa/6mtuGWOLHrqzSRGl51QDgEeCTbttiSYingc/WVpYdddsQS/dYsUoDTj3zv2C6AluyjxrgytrKss1uG2LpGitWKVJaXnUK8Dym8YIlezkClNnI98zF+qxSoLS86hxMPXErVNnPIODF0vKqS9w2xNI5VqySpLS8qgxTW2mo27ZY0kYB8GxpedU1bhtiOR4rVklQWl51PfAM5sNtyS38wJOl5VU3uW2I5VisWCVIaXnVIuBxbIPYXMYD/Ky0vOobbhti+QDrYE+A0vKqjwG/B7xu22LpM26vrSz7D7eNsFixipvS8qqLgWfJ/a4ylmNR4NO1lWVPum1If8eKVRyUllfNxYQnWB9V/6QVuLy2suwvbhvSn7Fi1QNOnt/fgIFu22JxlSbgwtrKstfcNqS/YsWqG0rLq8YCq7HF8iyGQ8Dc2sqyTW4b0h+xp4FdUFpeFcTkjVmhsrQzBHiutLxqmNuG9EesWHXNg8Bst42wZBzjgN+VllfZE+E+xopVJ5SWV30J29DB0jUXAj9w24j+hvVZdcA5+XsJE8lssXTHlbWVZX9y24j+ghWrGErLq0YB67B+Kkt8HAHOrK0s2+K2If0Buw08lkewQmWJn0HA70vLq+wqvA+wYuXg5Px91G07LFnHLODrbhvRH7DbQN6Pp9qA+ZfSYkmUMHCGjb/qXezKyvAYVqgsyRMAHi8tr7J/T71Iv//llpZX3QjY6pCWVDkHuM1tI3KZfr0NdLZ/m7B5f5b00AScZk8He4f+vrL6AVaoLOmjAHjUbSNylX67siotrzoX04xU3LbFknNcXltZ9qzbRuQa/XNlVVEsp0rt17FCZekd/t3mDqaf/ilW8PGqwDc+/FTgX5cOoe6g28ZYco6pwGfcNiLX6H/bwIpiH7ARmAygSt3vIwvWfqft+nPD+IPuGmfJIbYDk2sry1rcNiRX6I8rq8U4QgUgQvE1viULNgYX71vs/YvtxmtJFycAt7htRC7Rv1ZWFcX5wNvA2K6G1GtB9Zdab+Pl6LRpfWeYJUc5BJxUW1lW57YhuUB/W1l9gW6ECmCgNE37TeDuaX8L3PFqqeze3kd2WXKTIcCtbhuRK/SflVVFsQA1xGwBe0KV0PLotNduab11Rj0DinvPOEsOsxsYV1tZ1uq2IdlOf1pZLSQBoQIQITjPWz1/XfALke/6frnUR5v9wFkSZTRwtdtG5AL9SaxuSvZGj+iQ630vzN8UXLzjGu/fXk+nUZZ+gc0ZTAP9YxtYUTwK2EaaShUf1gHrPh++I7hKT5mSjudZ+gXn1VaW2dPmFOgvK6sbSGNN9cHSMOOpwPdOeSHw/1aMZf/udD3XktN8xW0Dsp3cX1lVFHuALcCJvfF4VZpfjJ71+u2tX5rZSP6A3pjDkhNEgNLayrIdbhuSrfSHldVH6CWhAhAh/yLv6gXrgzc2/4vvd8s9RCO9NZclq/EC17htRDbTH8Qqacd6InhFh9/se3bupuD1713uWfFGX8xpyTo+4bYB2UxubwMrik/EbAH7XJT3afHqG8N3DlyvEyb19dyWjGZibWXZu24bkY3k+srqM7j0M46QurP+J/Dtk54NfHP5SA7tc8MGS0ZiV1dJkutidbGbk4vgPd2zZe5rwVsK7vf/ZEk+oSY37bFkBFaskiR3t4EVxUWYRFKf26a0E1HP7vvaPvbeA5F/ngNiC//1X06trSzb7LYR2UYur6zmk0FCBeCV6Og7/X84b2Nwcc2HPG+sc9sei2t8zG0DspFcFqsPuW1AVxRKaMqjgXtnrAjeuvJk2WY7ofQ/FrptQDaSy9vATUDGp8Oo0rZKT15xU/grpx2ieKjb9lj6hBZgUG1lWchtQ7KJ3FxZVRSPJQuECkAE32zPW/NXB7/oq/Q9siRAq/0A5z55mKaolgTITbHK4C1gV8SUV95vyyv3Cy5w24Bsw4pVhuGXSMl3/L+esz54w4bzPdXVbttj6TUWuG1AtpF7PitTEXQPMMJtU9LBlujIVxe3/svYLTqm1/IbLa4QwvitbPebOMnFldVUckSoAMZ79p77t8Cdo37tv3vpQBps44HcIQjMdNuIbCIXxSrnutKIEJhryyvnIlPdNiCbyEWxmui2Ab1FbHnlT3pfsuWVsx8rVglgxSoLCUjb+Lv9j5+9Nvj5dbOkxqZtZC+num1ANmHFKouJLa9cIvt3uW2PJWHsyioBrFhlOSLIyZ4d5y0P3Db4Uf89SwtpbnDbJkvcjCotrxrithHZQm6Jlam0kDMngYkgQv6HvKvn2/LKWYfdCsZJbolVP1tVdUZMeeUttrxyVpBQ493+TEpiJSLni8j1zuvhIjI+PWYlTb8Xq3bypHXifwZ+OnNl8IurT5d333bbHkuX9MudQDIkLVYi8l3ga8DXnUt+4DfpMCoFrFh1wJZXzniGu21AtpDKyuoK4DKgEUBVdwFF6TAqBaxYdYItr5zR2JVVnKQiVmE1iYUKICKF6TEpJUrcNiCTEWHAZd5XF2wI3lB3i/dPL0OuJYZmJXZlFSepiNVTIvIwMEhEPgf8H/BoesxKGtsROQ6c8srnbwoufusiz6q1btvTz7FiFSdJ1yhX1R+LyIeAeuBk4Duq+mLaLEuOfJfnzyoKJHTKI4H72KlDVy4O/8vwt/REtw9I+iNWrOIkpYYKjji5LVCxFLhtQDYyVg7Ofj5Q3vaGTl72hfDtU2155T7F/q7jJJXTwCtF5G0RqRORehE5KiL16TQuCezKKklE8M3y/GOeLa/c5wTcNiBbSMVn9UPgMlUtVtWBqlqkqgPTZViS5Lk8f9YTW175Bu9zr7ptTz/Am8hgEckXkaUiMkFEljjXPisiD3QYt0REZjqvU0rBEpHnRGRQkvdWiMhnexhzqYh8r6dnpSJWe1U10zL+cy0i3zX8Ein5tv8351YHb9g417PellfuPaS0vCqRz+1i4GkgbelUItKtO0hVP6KqR9I1XydUAR8VkW7dOKn4rN4Qkd8Dz2BKtAKgqk+n8MxUsUfxaaZImqf+IlDZ9tA7F/3BM2IObYNCEYo8nnCBz1sXDGlDoNUT8ke8bX4PbT6PJ+LzeCJejz8kQQmRRwt5EiJPQgQ8YYLSSsDbKn5fGz5fGz5fBK8/ijcQFQkoniBmW9S/ulVLQt25rwU+hRGrQwlOMwz4X+AuTHzkvwGHgVOAySLyDHACZofyn6r6iHNfLaaq6QDgL8DLwBxgJ3C5qjaLyATgp5gDgybgc6paAzQAzd3ZparqrBIvBZ7qalwqYjXQMeqi2Hkxqu8WURfnzln2+bz7n5m5+dx7H30nsHHqDZsOFA0dVdD21o7xfk9kVN7I4MDgSH/Y7ymo87S0HfIcbTosjTT56jyat9srgYagL9BEMNgYCgab2gLBpmgg0Nzm97eIz9ca8Hja8jyeSDHKYBF8USTaSqAlRKAlRF4oRDAUIi/cQl5rC/mtzeS3tZDf1kx+pIW8aDP50RbytcUIIyHyJEzQEybgCRPwtuL3OsLof18Y8QQVCSqSB+Qj4vaKPK7PrYgEgJNUtda5dGW8E4jISOBZ4Fuq+qKILADOBE5T1fZGu4tV9ZCI5AOrROS/VfVgh0dNAj6pqp8Tkacw3aV/AzwC3KSqb4vI2cCDwEJV/XGcJr4BzKU3xEpVr0/23l7Erqx6ge0+36G9g2XqilNDSxes/+mCo4Vj3103/dYhe3xFZ+4K1e2K1K9+OxKuociXP2xswcTdJ+VPbB0cPHWorykwSUT8IVrrjkjj7oOehiOH5Ghoj6dRj0qLN0TrgAjRMYg5GPH5QvV+f8uRYLDpaCDY1BQMNoaDwca2QYFmDQQOe/3+loDX25rv9bYNEYkWA4NFUt/6t6kvHCLQHCYYaiGvXRxjBTLiCGS0hTxHHNsFMijh91eOAU8rAV+bEUh/2wfi+L5AYsQx1qke2XPBjHg/tyXAQGcV87iqLnD8QdcBNe2DnFVKEaAishhzSr4V2MWxJ+YrgT+JSI2qXgN8WUSuwKyuAhhh6ihWW1R1nYjcglkBl4rIAMxK6w8xi8RgnD9TO/uAMd0NSFqsRKQE+AlwnnNpOXCbqu5I9plpwIpVL1Dr9x8FePjDnjlzNkfeKWrcOXHuK+XsGDvv1X9MvGqcp+CC+f6CCwhHmw69E1q/q+bon/OI1o/3iDcyPO+Et0oKJh8cmT8uMNw35kSPeMbGPltRbSK877CnYe+h1ob6g6GjrUcam7z10jIgRNsoRUcjXX1ONerzhQ4HAs1HgsGmhkCwqdkIXFMkGGjCH2jx+n2hoNfXmu/xRIpEooOAgSLHbjN9tAV8tAUK6ZsspKhKNGzEsaWFvEMQ9znGlZgtVUd/VQswuMO1gZi/72869/wRIwjrY8YIxsE/V0QuAf4JOBezKppB5wdW7S6fn2PygjdifMVHVHVGvD9IJ+TRw3YxlW3gL4DfAh93vr/OueZmz76jwGgX589J3vP7wwARr/jvucLTWP6HqApIyc5l547e/VrTxlOvX3Jg6LQ54ikY4ss/5zxf/jmotjZFwpve3NeyLrK3+a/TgUEA+d4Be8cUTKwdUzCxeWhw9OCAJ39ioQRHFEaDI0oYetyfYRSNNEjzjsPSeOCgHG045Gloq5Mmf6OEilppG9HWmjeyrS1vcFNTx7/Vroi2+QMtRwKB5rpgoKkhaASuNWi2p/gDLT6fLxT0elsLjMDpIJH05rx6UE8eocI8QoUDqU+k+ceVGLHwcay/ai9woYiMUtU9mFWVH2jD/E2MxDjm/wBcDfzAuW8M8Bime/mHgMOq2iRmW9xtR3Nn3CFgrKrWi8gWEfm4qv5BzPLqdFV9M/YeZzWGqj7grOBmq2p7IYTJwIbu5kxFrIar6i9ivn9CRL6SwvPSwU5sfaC0s83ve9+nsnaiZ/rWEdGXS/dxPoA3Gi44fcPDCxoLRtWunX7rgXBwkHNc7i/wBaef4wtORzXaFm19Z01by5qG5sjuye8eXXf2u0fXASBIZEhw9D/GFkzaOyp/vAwMDB3jwTve+cDjQbwDtaBkoBaUjGP4cWLWRqSlXpp3H5KGgwc9R5sOS0O0XpqDTRIe1EZkFNJxxeHxtYYLhrWGC4Y1xhmPKRIJ+/0thwPB5vpgoKkxGGxsDgabWoPBRg0EmsUIXDjP620r8HgiA0EHi8Qd8xdXe7V2fxXm5KxUVWP9Vc3AbcBzjtCMB74ErMMI2ekYUfo1cJOIHAU2YcTqdxgH+22AR0Q2A28R33JvB9Dez/Ja4Gci8i2MUP4OeLPD+FOAFc7rCZjsl3Yu4IMKLp2SilgdFJHrgP9yvv8kx+9v+xo3t6A5y26fzx/7/V3XeKc8en/ksMRsPQqb9pSe/+o3S3eOPm/lPyZ9Yox6vO8nlYt4fN7A5DO9gcmoqkbbdmyMhN7YH23dNk6JjD8Y2jX5YGjX5PWHlwLg9wTrRuef9M7YgklHh+WVDMj3DpggIp0unXx484bogPFDdMD4idFRx70fpq3+iDTuOeRpOHxQGloOexo4Ks35LbQOdvxlPSbgq3oD4XDhyHC4cGS8AUseT1uzP9B8OBhornf8by3BYFNbMNCkgUCz+Pwtfp8vnCei2+J85DDgCObE7XZMLi44rg9V/R/gf+B9n1WNqkac7d0s4ELgR8BvVPVBJwbrTVXdJiI7Mdu601X1uBNGVS11Xh4ATot562mMAOE46S/p4WcoBe5wXs9wfo5253++qnYbIpOKWC3G+Kzuw/zCXgHcdrrvdHn+nOSg13PMH3R9oQx9eo4s/9grOrfj2LG7V8wetXdl86ZTFi3dP3zG2Ygc4/cQEfH6T5jq9Z8AQLRt33ttoTXbouG3R0DrFEBao6HibY2bz9rW+EEY30D/sNqxBRN3jimYECkOjBjhE//EnuKDAAL4Bo7Q4oEjIsWdvt9M+MBhT8PeQ9JQd9DTED4ijd4GaSkI0Tosio5B8Hd6Yw9Eo778UEtRfqilqFunMbDzkovjemQzJkPjPmCliNRhVikPAieIyDpgoqoOAIZghAWMyH1PVS8RkRcxrpoKzOLiFCcsAYyP62MkVoygRz9TLKp6aczr62LeOhH4ak/3p3IauBVTzyqTsCurXqDR4zluVfP7eZ7zL1kdqS4MHd9U1httzZ+26bH5Tfkjtq2dfuueUN6Q2V092+MbcVLAd8lJFF6CRup3t4XWvB0Jbx6ANp9OzOezvvVAaX3dgdLNda+ZOcTXPCLvxE1jCyYdHpE/LljoKx7nEU/C/sp8AsPyo0OGjWHIcVtMRaMN0rLrsDTuPyhHjx7yNETqpMnbKC1FYdqGK4wi9dPIvfEMUtXDIjIQE36wGpiiqpc6q5IHgHOAd5wVUxDYLiJjgFHAbhE5D+OH2upsFa8Gpjl16BCRC4Bv00GsRORuYKWq/inW5+S8PZkPtnVJo6qr4hmXsFiJyHe6n1f/LdFnphErVr1Aa2cF4kTkrk968/79iUibdPE5Kmjed+J5r337xN0jZ6+qOfnaEerxjetuHvEOHO0vWDDaX7AAjTYfjoTWb4yEq/0arZ9GhyT1iLbl725+7/Tdze99MJ934J4xBRNqxxRMDA0Jjh4c8ORNcmKGkkIQT5HmjynS/DEnMuw4MYsQDddL8+7D0nDgoOdo0yFpiNRLc6BJQsWtREYiDItjmkR2A2FgT+wFVd0rIrcBz2FWXv+BiYOKiogf+DFGVD4KrAVuwsQz7WwXKodlwKkiMlpVd8dcn4YRSDjW5wQmEqAiAftTQjTB+msi0tlyrRC4ARjqLEPdoaJ4JhCXSlvio87jOXL+uJJBXb3/zd9Flk7fovN7ek5UfKHNp1z36t4RM88mQQExJ4ubqyOhda0aOTCV44/pO0WQtqHBse+MLZy0b1T+eG+Rf/BYr/hKE5k7FVppazgiTbuNv+xo82Fp5KinOa+Z8OAI0dGYU8bPVVRUPNbTsxwH+y5MBPnjwJ2x2ypnTENnf38iMhZ4XlWPWwXHMe8Lqnqx8/rPwJWqGhaRM4A7VPXTiT4zWRIWq2NuFinCnCLcgIk8vUdV3avzXVE8Ctjd4zhL3GwMBN6+ZuyoSV29Hwxr4xP3Ruq82n1AXzvNecN2rJ1+646W/GHnJGOPOVl8t7ottKZe23ZNBk1o6xfw5B8anT/+vbGFkxqHBUsG5HkLJ4pI5w6tXqaF8OFGCV0x/e6PLO1prLOl+xumgEAtRijiFSs/sEdV01aOxqll93ZMNH2vk5TPSkSGYLz61wK/BM5U1cPpNCxJ9gKtkJxT1HI8tX5ft2V/QgEpfPxiz4bPPx+NS6zyWw6UzHn9uyV7h5+1etOUTw9Rjz+hgn/mZHHSGd7AJFRVtW3HprbQ6n3R1q0nQGRCT/eHo81DtjZuGrK1cVP7JR0UGPHemIKJu8bkT9DiwPCRXvFNEJGEqiEkQx6BwXka2Brn8GYgT1V/7qTKJDZVAo7weHCj0GYy28AfYYLTHgF+qqqZ1QG4ongrH8R+WFLkgUHFyx8eXHzcqV9HHnygbeWwo3TpSO+MqHjDb03+5Ku7R51zFiZlIyWikf1bIi2rt0XC7wyD8KkkmRDtFX/jyPxxb48tmFQ3Iu/EvELfwPEint5o7NACFJZUzu0xN9DxvR0G/hkTjd6kqiOc9xYB38LELl2PCWv4lape6Lx/FvB9Vb3E+f7HwHOq+re0/0S9SDJiFcVE0bZxbHqLYBzs7ta0qiheDiZg0ZI6/zJ86JLnBxQu6GncyEO64/6HI0MkiWqtLcHBu9dOv3VLc8HIOUkZ2Qkard/d1rLWOVlsmkaKq+1C36CdYwombBtbMDE8ODBqqN8TnCQiiea/dWRdSeXcM+IZKCI3Y7JE/ghcBRx0TgOHYJKAZwLbMCk1NwDlMb6mO4GQqv7E+X4c8KiqXnT8TJlLwttAVXU7Q70n1mDFKm3s8Pvi2g7tHSIlS6fJkgXVuiDROfJCh0efu/J7o/cNm75205TrB0a9/h63cz0hnoGj/QXzR/sL5qPRlrpIeP2GSKjaq9G600lCUBvbjox9u3712LfrVwPgwRMemjd2U0nB5AMj80t9Rf7BJR7xJrqi7za9pAPXYkq73Nbh+sXAi061BDBlxj+DCR5t5zLg8vZvVHWriAyNSc/JClKqwZ6hvAx82W0jcoX9Xm/c1Vcf/rDnvDmbIu8GIiQlNiMOvHnGsJfvaPvHxKuX7hpz/pmYA5yUEU9esS9v9nm+vNmotjZHwjUrI6F1YY3sPxUTQJkwUaKB/S3bT93fsv39a0FPwYHRBSe9N7ZgUvOwvLEDg56CCU5sVFes7+a9D+z/oDRMlYjMw/xjfJqIPIdxujc6QaF7MeE7zar6rHPvcODeTnzKazChB/8d1w+cAeSqWFnSRJ3HE/dJWcQr/nuv8DSU/zH5smIejfpOeft388dv/cu+ddNvWd9YOOa8nu+KHxF/vi84bbYvOA3VaCTa+t66ttCaOm3bORF0bM9P6JpQtGlYbcOGYbUN7y+YooMDI98ZWzBpz+iCk3Sgf9gox3nfvjt5I85Ht6fagAldmKqq55mfR+4EDrVXPBCRb8feqKr7MQUyO9JjSZZMI/fEqqJuNxXF72GSPi0pEhJJ6Lh7zSTP9NoR0RWl+0hJZILhuhFnr/r+iANDpr65YeqN+VFvIO0J6iIerzcwcYY3YBp5R1t3bDYni7VjIZKO7t6ew+G9Ew+H907ccMT8G+qTwNGR+ePeLSk8+XDpgKnxilUzXfcX2AksiPm+BFgSxzPTfkLY22S6/ylZlrttQC4QhpAmKFYAd13jPUXNyVXKDDu0cfr85XdMKNnx96WoxlWhIFk8/pIpgQGXz88bfNvEwMBFtd7A1CVIYANprJPWpuGinU1vz3h9/5+HlVTOPRrPPc4Wzisd8iwdXgAuEpHBTrL3Rc41RORXIjLbeX23U5alnR5LsmQauSpWdiuYBnb7fEkF+NYXytA/zZG0/SEI6p38zh/nn/fK18MDjm5/mVQimePE4x1a6i+8eEHeoFtOCxZ/bp83OHMZUrAaE8eXDhLNqfsrnRwcOVUS/g2TubEKk7TcXjnhdEzUO5i0mT3wfpDoROLfhmYEVqwsXbLV70t6dfS7eZ7zG4OktStOsPXo8NmrK8+fsf6BDd5IqM86K4mnaKS/YN68vEE3nRUs/lKTL3/uCvEUv4ppupAsf49rbqf1Fqb8y2+cy0PF9Otc59SfOkFVJzpfv3DuG4iJMN8hIhXAJFVtr1F1KfBHVW3rYe7LRKTceZ22llrJkptiVVFXwwclMixJUuv3Jx/wKyJ3XeMNqonHSytDDtdMm7f8qyefuO3FZfRx5oQ5WZx1XrD4hnODg77s9RVcvFK8I5aTWC03xZzixUN76631GCd7K6a4Xshxqs8ErhORM4+ZQLVeVT8ecym2moIPuKdHI1WfVdXKOO2EOFtqJUtuipUh5dIV/Z0tfl9KW553x8jk9aXSK/8fBPVMfO+Zeee/8jUtqq9djmqfdzYS8eX5glNnBwdeNzc46CuD/IWXr/P4SpaB9FT9Y+1Xf//neP8xvRazqooAm1R1BXANptkDqtqIKRkzUUzj0+dFZLWILBeRU463WWZgakctE5E/tRc1FJEvi8gmEVkvIr9zrsU2T42rpRbGuX9pd+OSJZfFym4FU2S7P/UUy3s+5jkrIuzqeWRyBFobh8xa86O5Z6z7jxpvW/PG3pqnJ8zJ4oQZgaKr5+UNvr0kUHRNjcc/cQn43u5k+PPxPfOD1luqur1DKeP2MUMxtaw2YlLgblXVs4A7MYX5OvIr4GuqejpQDXzXuV4OnOFcv6njTar6Y1X9fRxmt7fUSju5LFZ/cduAbGe3zxvoeVT3tARkwOMXeeIt3Zs0g+veOXXey3eeWlr73HI06nZ5bTy+MacEBly2IG/wlycFBi7a6g1MW4oEqzE9Ap/t6X6H2PiqjswVkbUYx3slptVWezusdcDDdGie4lSXGKSq7VUefgnMc16vB550SpWnsnXvtfit3BWrirqNmChdS5Ic9njTUpvs/870nHOgyGxbehMBOam2au7cFV/zFte9uwzVtLVYTwWPd+g4f+GH5ucNunlasPjGDRD376K7+KrlqnqGqp6lqg8R0w4r5qvbDjUdKMOk6JyJaXCabAxmr8Vv5a5YGX7ltgHZTJOn8yYNyfCvn/KOUfqmMZ+/rWnQWWvvnXfW2nve8bU2pfVEMlXEM3DJV3//57hCL3qIr+o4th7YIiIfB1PrXkSmdxhTBxwWkfZt2qeBpU5E/Qmq+nfga0AxplV85z+DyC3tJY5F5Aqn9HE7vRa/leti9VvSFxfTr1DQiOk3lxb2DpGSZadJn1ZxLa7fcvLcFf9y2knv/e8KNLq/L+fuhnj8PrF0Gl/VBdcCN4jImxgf1uWdjFkE/EhE1mM6zHwP0+j0NyJSjSl9fL+qHulmnlP44PSzs5ZaVXHamxApVQrNCiqKn8XUn7YkwH6v58DCE0viqSEeN96Itv7ynsi2ZBOdU6HVm19Xfdrn3zwyaNIckt/ipMp2YNzNDy2M+4/OCUm4vS/LB/dEh/LGv8HYt99pXvHb9jpa6SbXV1ZgnIiWBNnu86V9JdKe6Jzu58aDP9JcfOab/zlv5pofbvG1NnZsvtlX/DERoQJQ1TXA3/uicmm8qOqlqhp2Xl/nJEtDnC21kqU/iNX/cmyrbUscbPX748pbS5Q1kzzTt45wL6xk4NFtk+at+H/TJ7z79CtotK9rOf2i5yHHo6o/1ww5LOgOVV2lqut66/m5L1YVdWES9xP0e7b4/S299ex0Jjony7jtL82Z9/KdhUMObVqKal/4NZfe/NDCjHL2Zxu5L1YGuxVMkFq/r9ecmXWFMiydic7J4ouEimas/+n8WW/cvc0fPtrbYS4/6eXn5zz9Q6wq6l4H3nLbjGxily++csbJ0huJzslS1LhzwtxXys+c9PZTr6GR3oi2307nBfAsCdA/xMrwkNsGZBMHvN6kOxnHRS8mOifLCTuXnjN/+Z3FQw9UL8FxIKeJn9380MKM9zllOv1JrB7BpAJY4uCoxzOot+fozUTnZPFGw4XTNzy04OxVd+0KhI6ko97TUeBn8QxsLwfjJCQvibk+W0SWichbIrJWRB4TkQKnbMudXTzrlWQNFpEneupN6ASGLk52jmToP2JVUddEHGUxLIawMLwv5untROdkKWzaU3r+q9+cefJbv10p0UhPVRS646GbH1p4JM6x7eVg3l+FObFLf8AkH5+sqmdgEqG7baahqmlra9YFPwdu7eU5jqH/iJXhp9g6Vz3SKNJA911Z0kZfJTony9jdK2bPe/mrQ4fvX7sE1URPSEPAfQmMjy0H0x5uczPwy5jCeajqH1V1r/PtqSKyRETeE5H3uzqJSIPz3wXO+38UkRoReVKcnl0i8h0RWSUiG0TkkfbrQB3Q7TZYVZuA2vayyX1B/xKrirpGEvvw9Et29kJAaHf0VaJzsnijrfnTNj624JyV39sXbDmUiJ1P3PzQwt3xDOymHMxpmHpVXXEKpnfgbOC7TsnijpwBfAU4FdNIpb2ZxwOqOktVTwPycepQqeptqhrPNrLXysF0Rv8SK8P9mP5qli7Y5vcd6es5+zLROVkKmvedeN5r3549ZfOvVkm0bWsPw0OY0i3x0l05mO6oUtWQqh7A+GQ7y+dcqao71BQoXAeUOtcvEJHXnZzAhcDUBOfu03ZecYlVjOPP63xfJCIrnBrQ7deWiEip8/obyRokIqUi8qmY7xeIyBM93BNwHJA953xV1DVgkjctXbDF7+9z0XAj0TlZRu99fdb85V8dNXLvqiWodlUO5b6bH1pYm8BjuyoHsxE4q5v7QjGvI3TeXu+4MU4lhweBq1R1GqbscdwNbR36tJ1XvCurxcDTMSH/C4GdTs2czo5kkxYrjOp/qqdBsTh5Si8Bn4jzlkeAzio4WoBav8+VcIKHPuKZE/byrhtzJ4pH24JTNz+x4NzXKw7mNR94rcPbe4B/T+R53ZSDeQBYJCJnt18QkSsdx3sqtM9zQEQGAFd1NkhiWnjFloZx6NN2XvGKVbvjr51BHB8GcAiIiEglkO+sup4EEJHrRGSlc+1hEfGKyCyn3nOeiBSKyEYROQ2zdJ7rjL0d4+iLp1/cM46dPVNR10ZqgprTbPf7pOdR6cfNROdkyW85UDLn9e+eM3Xjz1dLtG2Lc/lbNz+0MJncyuPKwTiO9GuAHzuhC5sxPqqUcjedEjCPYsTmBUwbr854v4UXx5aGAeP7ejEVOxKhxxIxjuNvm6qOirm2GJihql/u4p4GVR3gvJ4C/BBTUqJVRB4EXlPVX4nIXRiFzwd2qOrdTnzHnap6XNF5EZkJ3KSqN3bynhfYo6rxH7lXFL8CnBv3+H7CxSVjXt/l953d88je4UePt708bl/cNZwyhqh4w29Nvuap3aPnLLr5oYUJN7DI0HIwL6jqxc7r2NIwZwB39KWt8aysOnP8zQDijT25ELPnXiWmNvSFfNDa/XvAhzDthH7Y04NU9Y3OhMp5LwKERaTb+JMOfI5j9/MWoM7rSeR3mHYyIdE5GTwa8U1568n7kxEqyNhyMBfHvH6/NAxGF77dl7bEI1bHOP5E5GXMsvS3cc4hmDiR9rrQJ6tqhfPeUEz51CISd+51RhCIPxbG1Gn/VhrmzSmak2gZn04yJdE5CR6cUrM5pUOCLCoH86Kq1vblnD2KVUfHn6qeDzwOfL6b21pj4j1eAq4SkREAIjJERMY57z2MUecngR84147SQ3RuOyJSE/N6KHBAEy/3cS+wLMF7cpY2aIvSN9Hr3ZFJic5xsgv4pttG5DLxOtg7Ov7eAoZ0M/4RYL2IPKmqmzCrl786dZ9fBEaLyGeAVlX9LcapPktEFmJaAkVE5E3Hwf4+IjJTRB5zXg/DrNraSa72c0VdFPgspoljv2efz7sf00DAXTIw0bkHbptSs7m+52GWZImrBntHx5+IXI2Jz7i6l+3rzqZLMRG/9zvfPw2Uq+o/knpgRfHnMCLbr3k9L7jxxtEjEw0O7DW+9V+RpafX6ny37eiBJ6fUbL7ObSNynbj+Be3E8fd/wKDYoNC+RlX/HCNUAeCZpIUKoKLuUeC5NJmXtWzppXLGyfJjk+gcV8qKS2yge5eIJU3EvdyPdfyp6iFVvaiboNA+RVXDqpqOHoE30s/rtW/x+zPqdLQlIAN+nrmJzvXAx6bUbM7oNKFcwX3fRCZRUbcb+KLbZrjJtl4sZ5wsL57pOftgUZdBi25y/ZSazcmv5i0JYcWqIxV1TwG/cdsMt9jt83WWte86FZ/yjtY+zEOLg3um1Gx+2m0j+hNWrDrnc+Beuyg3Oej1dNk23E2cROfX3bbDYRlQ7rYR/Q0rVp1RUdeCab1d09PQXKOhD8oZJ8tDH/GclwGJznuAT0yp2ZwtIRU5gxWrrqioOwRcAhl9EpV22mCE2zZ0RcQr/vuu8Lh5WtmGEaq+bo5qwYpV91TUbQXKSDHDPVuo83iOINK7XW1SZPUkz4ytw3GrycRXptRsttkOLmHFqicq6tZiav30RddeV9nex+WMk+WuT3pPdiHR+VtTajb/tI/ntMRgxSoeKur+inG65zRb/b546oa5Tl2hDHvm3D5NdP7BlJrN3+/D+SydYMUqXirqfkkfl8Toa7b4/ZkUGtAt/zW/zxKdH5xSs9me/GUAVqwSoaLuLkzd6pxkq9/nejZC3PRNovOjwC09jrL0CVasEqWi7mZytOHEDr8vY4q+xcO7Y2Ryde91dP4p8IUpNZszLqK/v2LFKhkq6r6LySPMqVibfV5vOgog9im9lOh835SazbdYocosrFglS0Xd48BHyaE6WPUeT590YU4nTqJzTz38EqFySs3mO+IdHNOmboLTju5ipxrJOhFpcJo8rBORThPtReQxETk1feYf8+x42tgNF5Hne2P+dGPFKhUq6p4H5vNB94+sJmQKGmYdL57pOScNic4tmMTkryd432LgaUw/PlT1hfYS3piOxdc633+ms5tV9UanQKUrqOp+YLeInNfjYJexYpUqFXVrMB1ysjo1Jwwhdbn2eir866e8o1JIdN4OzJ1Ss/mJJO5tb1MXoZvyQiLyMxF5w2k5968x15c4FXA/LiL3OtduE5H3nNcniRi/nIh8R0RWicgGEXlERCTmGT9w2t39Q0TaW7qnv42di1ixSgcVdbWYHmpuRVanzC6fr2MfyKxizxA5YdlpsjKJW5cAZ02p2fxGojc6RR9PUtVaVd2uqld2M/ybqjoTOB2YLyKnd3h/OdAuMnOBgyIy1nndHjX/gKrOUtXTMO3rYtvV+VR1NvAV4LsAqvqKqt4Wx4/yRszcGYsVq3Rhcgn/CfiD26Ykwza/L+uLDibR0fk+4ENTajYnG7nfWZu6rrhaRNYAa4GpwDF+KlXdAwxwWsmdgOkeNQ8jIsudYReIyOsiUo3pih5bfrq9XM1qTFfzRNgHjEnwnj7HilU6qahroaLuakwBv6wJsASo9fsb3bYhVRJIdG4Grp1Ss/mOFKsnHNOmritEZDxwJ3Chqp6OaWzS2X2vANdjGrK0r7TOBVY43aUexPQ+mIaJAYt9RnuF1wjgS/DnyCMLPq9WrHqDirqHMI1d17lsSdxs8ftyIvfRSXTurhZZLTBnSs3mePtedknHNnXdMBBoBOpEZCTw4S7GLceI2jLMCuwCIKSqdXwgTAdEZAAmXzVuRGR2+4mkiIwVkZdi3p6MqSWf0Vix6i0q6jYDZ2P6EmZ8vM42f0YWCE2Kuz7pPUU73549BcycUrN5XRqn69im7jhU9U2M+NRgtndd+TaXY7aAy5zeBttxikCq6hHMamoD8AIkfPp5Ih+snkZzbIxgcm3s+pi4WnFZUqSieB7wGDDJbVO6oqxk9Kvb/P5z3bYjXXxySWT5Fa9qu9N4J/DFKTWb/zfd83RsU5epiMiPgF+r6noRuQXYpqrPOu8tAy53VooZixWrvqKiOB+TpnM7kHFpLXNOLKk+6vVMc9uOtKGqv7gvsqEwxMtAeW82IBWRxcAvM6HTU6KIyHDgPFV9xm1besKKVV9TUTwLeBzIKGGYUXrCjohIidt2pJF1g4/qF5fdsuE1tw2xpAfrs+prKupWATOAz4Dr9cQBUNAIjHTbjjRxBFMpYaYVqtzCrqzcpKLYhzmq/jbGseoK+72e/QtPLBnu1vxpogG4H7inelF11seMWY7HilUmUFEcxLQg/wYwqq+nXxMMbF40ZtSUvp43TTQCDwA/ql5UfdBtYyy9hxWrTMI44W8GvoaJju4T/jSgcOV3hg+d3VfzpYlmTJDkD6oXVWdF7XhLalixykQqigdgcrxupQ9aY90zeNDyJwYNzPjcMIcdmHijh6sXVe912xhL32HFKpMxPq1LMM74y4Bgb0zz5RHDlvy9sGBBbzw7TSgm+PJnwJ+rF1VnXYiAJXWsWGULFcWDgKuBRcCcdD76qjGjXn4rGOg2Ctsl9gO/wKyi3nPbGIu7WLHKRiqKJ2BWW58Gxqf6uAUnjF190Oc9K2W70sM2TH2lPwHL7SrK0o4Vq2ymolgwmfmXYfLTzgQSTvI7a9wJ74Y9MiHN1iXCBhyBql5UvcZFOywZjBWrXMKcJp6NEa7zMeVFeqyrPq30hHpE+rL+ensJlGXAsupF1emsoW7JUaxY5TIVxR5MWk+7eJ2Dyb5/P3OhUaThnNITBvSSBc2YKP23MVUHVgIrqxdVZ3TCrCUzsWLV36go9mMEazwwfovfN/KykjETMcGoozDxXXlAIOZLOjylAVPbu/2rHpPmshUjTO84XzurF1XbD5glLVixsvTItF9O82FEywc0Wqe3xQ2sWFkslqzAVl2wWCxZgRUri+vEdDW+UETqROS5Du8XicgKp7Ox17m2RERKndffSGHuUhH5VMz38XQxDojIMhFJtDGDJQWsWFkygdiuxstV9SMd3l8I7HQ6G3fmL0tarDBtqz7V06BYVDUMvAR8IoV5LQlixcqSCbR3Ne6KQZjedrEcAiIiUgnkO6uuJwFE5DqnO/E6EXlYRLwiMktE1otInogUOp2RTwMqgbnO2NvJsS7GuYR1sFtcxelqvE1VR4nIAuBOVb20w5jFwAxV/XIXz2hQ1QHO6ynAD4ErVbVVRB4EXlPVX4nIXZiwjHxgh6re3dWczrNmAjep6o2dvOcF9qhqthctzBrsntviNvF0NZ6BKQ0TDxdiejauEhEwwtS+KvsepoVVC9Cp8MWiqm8AxwmV815ERMIiUqSq8TRWtaSIFSuL23Tb1VhEXsY04TwzzucJptPM1zt5bygwAJM/mYepMpoKQYzwWfoA67OyuEpPXY1V9XxMN6DPd/OYVhFpT+B+CbhKREYAiMgQERnnvPcwpt79k8APnGtHgaJ4bBWRmpjXQ4EDqpoTnayzAStWlkygp67GbwFDunn/EWC9iDypqpuAbwF/FZH1wIvAaBH5DNCqqr/FONVnichCYD3GUf+m42B/HxGZKSKPOa+HcWzaUVZ0Mc4lrIPd4jrtXY0xK6jOHOxXA1ep6tVu2OfYcClwkqre73z/NFCuqv9wy6b+hl1ZWVxHVdcAfwdagdM6BoUC/wcMig0K7WtU9c8xQhUAnrFC1bfYlZXFYskK7MrKYrFkBVasLBZLVmDFymKxZAVWrCwWS1ZgxcpisWQFVqwsFktWYMXKYrFkBVasLBZLVmDFymKxZAVWrCwWS1ZgxcpisWQFVqwsFktWYMXKYrFkBVasLBZLVmDFymKxZAX/H6nU/Gs+WyjPAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "flattened_hashtags_df.value_counts().head(10).plot(kind=\"pie\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### User mentions" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 [{'screen_name': 'i_ameztoy', 'name': 'Iban Am...\n", + "1 [{'screen_name': 'IndoPac_Info', 'name': 'Indo...\n", + "2 [{'screen_name': 'ZelenskyyUa', 'name': 'Волод...\n", + "3 []\n", + "4 [{'screen_name': 'ChinaUncensored', 'name': 'C...\n", + " ... \n", + "7435 [{'screen_name': 'metesohtaoglu', 'name': 'Met...\n", + "7436 [{'screen_name': 'NEVERBOW', 'name': 'P K', 'i...\n", + "7437 [{'screen_name': 'BBCNews', 'name': 'BBC News ...\n", + "7438 []\n", + "7439 [{'screen_name': 'Reuters', 'name': 'Reuters',...\n", + "Name: user_mentions, Length: 7440, dtype: object" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_df[\"user_mentions\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 [{'screen_name': 'i_ameztoy', 'name': 'Iban Am...\n", + "1 [{'screen_name': 'IndoPac_Info', 'name': 'Indo...\n", + "2 [{'screen_name': 'ZelenskyyUa', 'name': 'Волод...\n", + "3 []\n", + "4 [{'screen_name': 'ChinaUncensored', 'name': 'C...\n", + " ... \n", + "7435 [{'screen_name': 'metesohtaoglu', 'name': 'Met...\n", + "7436 [{'screen_name': 'NEVERBOW', 'name': 'P K', 'i...\n", + "7437 [{'screen_name': 'BBCNews', 'name': 'BBC News ...\n", + "7438 []\n", + "7439 [{'screen_name': 'Reuters', 'name': 'Reuters',...\n", + "Name: user_mentions, Length: 7440, dtype: object" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# take the rows from that have values in the user_mentions columns\n", + "user_mentions_list_df = tweets_df.loc[tweets_df[\"user_mentions\"] != \" \"]\n", + "user_mentions_list_df = user_mentions_list_df['user_mentions']\n", + "user_mentions_list_df" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_mentions
0[{'screen_name':
1'i_ameztoy',
2'name':
3'Iban
4Ameztoy',
\n", + "
" + ], + "text/plain": [ + " user_mentions\n", + "0 [{'screen_name':\n", + "1 'i_ameztoy',\n", + "2 'name':\n", + "3 'Iban\n", + "4 Ameztoy'," + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#create dataframe where each user_mention gets its own row\n", + "flattened_user_mentions = []\n", + "for user_mentions_list in user_mentions_list_df:\n", + " user_mentions_list = user_mentions_list.split(\" \")\n", + " for user_mentions in user_mentions_list:\n", + " flattened_user_mentions.append(user_mentions)\n", + "flattened_user_mentions_df = pd.DataFrame(flattened_user_mentions, columns=['user_mentions'])\n", + "flattened_user_mentions_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "user_mentions \n", + "'id': 6521\n", + "'name': 6521\n", + "'id_str': 6521\n", + "'indices': 6521\n", + "[{'screen_name': 4150\n", + " ... \n", + "'Scientists 1\n", + "'ScottLucas_EA', 1\n", + "'ScottishSun', 1\n", + "'ScottsPassage', 1\n", + "🪙', 1\n", + "Length: 15428, dtype: int64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "flattened_user_mentions_df.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVIAAADnCAYAAABMpd6dAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAouklEQVR4nO3deXxV1bn/8c+TBMKYwzwkIEEEEjEICKKIoHGovdTellurP60Xa1s7xNpaO6S3raZiK2q1Wm2NVVRabbWt4zW9BbVlVgGZDhomMU4gqEAYhADnPL8/9qbGmOGcnGGd4Xm/Xudl3Geftb8J4WHtvddeS1QVY4wx7ZfjOoAxxqQ7K6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBOjrCmkItJZRBaIyDARme8wR10E+zwiIsOTEMcYEwdZU0iBy4HHgZDrIBG4G/ih6xDGmMjkuQ6QRJcAF+MV0p0AInIZ8FmgCzAMeEJVf+i/dzcwAegM/E1Vr/O31wF/Bj4NHAGuAG4EjgNuUdVqf78fAF8E8v12r/NzvBdB1kXAgyKSp6pHYvquY1EV6AoMAYr91xCgEOiG9zNr+uoENAD7gX3+q/HX24HXgS3+f9+kqj4d/mEzplVZUUhFpCNwrKrW+ZumN3p7DDAWrwBsEJE7VfUt4CequlNEcoHnRWS0qq71P/Omqo4RkV8DDwKn4RWRdUC1iJwLDAdOBgR4WkSmqOpCVZ3QKNdqVR3TNK+qhkVkM3Ai8HJ8fgqtqAp08I81wc88ChgK9EnwkY9QFXgLr7BuxPteVwCvUFXv7h8QY6KUFYUUryDsbuG951W1HkBEXsXrdb0FfFFErsD7GQ0EjgeOFtKn/f8GgW6quhfYKyINItIDONd/rfL364ZXWBc2PnBzRbSRHXi9v/gX0qrAYGAqXtE8Ge8fk/y4H6dteXgFeyhw1tGNhzV3y/DKmq3AYv+1pG7WtN0O8hkTkWwppAfweozNaWj0dQjIE5GhwPeBCaq6S0QebPL5o58JN/l8GO9nKsCNqnpPDJk7+bljVxUQvIJ5vv8aHZd2E+Rt7bsdmOy/AELFlTVLgCeBp+pmTdviKpsxzcmKQuoXw1wR6aSqByP4SAHetb16EemPdz10fhSHnAvMFJGHVXWfiBQBh1V1R0sfEJE/AHep6jJ/0wi8SwXtUxXogtcrPh+YBvRvd1tJtiQ86lCTTbnAFP91W3FlTRB4Cq+orkh2PmOayopC6puH18N5rq0dVXWNiKwC1uOd5i+J5kCqOk9ESoEXRAS8Gy1fwjtd/7cm10hHA1v97f2BA6r6bjTHBaAqMBbvBtjFeP8gpJ254Qm929ilzH/9tLiy5k3gAeDeulnT3kl4OGOaIarqOkNSiMg44GpVvdR1lqZEpACYraoX+P9/NbBHVWdH1EBVoDte4fwacFKiciaDKodKGh4MN9CxpUsxLQkBzwDVwNy6WdOy4xfbpISsKaQAInI5MEdVU3rIjYh8Gfhjm0OfqgJlwHeAi4CuSYiWcPu006snNNx/fIzNbAHuBWbXzZoWyXAzY2KSVYU0Y1QFJgI/AT6Dd2MrY7wULll44aFrp8SpuQN4DzfcVDdrWovXp42JlRXSdOIV0JnAOa6jJMovDl+y9N7QtElxbnY/8Fvg5rpZ0z6Ic9vGWCFNC1WBE4Eb8HqgGe2MhlvfrtOBgxLU/F7gTuBXdbOm7UrQMUwWskKayqoCPfEeP/0aWTAvQkjlvWEND/dNwqHqgeuBO+pmTUvp6+UmPWT8X860VBUQqgKXARuAr5Mlf07b6P16kg4VAG4FVhRX1kxM0jFNBsuKv6BpxbsTvxBvbGQyemcp46VwaXye5IrcGGBpcWXN3cWVNT2SfGyTQayQpoqqQBeqArcCK/no0cisMjc0IeDgsDnAN4D1xZU1Fzs4vskAdo00FVQFRgOPAKWuo7iiSuiEhtkH9tO5m+Mo/wtcVjdr2k7HOUwasR6pa1WBCuAlsriIAhyk42spUETBm5tgdXFlzWmug5j0YT1SV7w78rOBz7uOkgpWh4ct+tyhmae7ztHIEeBaYJY9bmraYj1SF6oCk4HVWBH9t3+GxqTaE1p5wC+B/yuurOnnOoxJbVZIk60q8E28KfmOcZwkpTwbHl/oOkMLPoV3qn+q6yAmddmpfbJ4kyvfBPzAdZRUo0r90IaHC0BSrVfa2AHgkrpZ055wHcSkHuuRJkNVIB/vrrwV0Wa8R2BzihdR8BdBLK6sudJ1EJN6rJAmWlWgN/A83oqiphkrwiP3uc4QoRzgzuLKmluKK2tSvfCbJLJCmkhVgWHAUrxVRk0L5oYmpNtcqt8H/lRcWdMx1oZEpLOILBCRYSIy3982XkR+08L+dSLS5+jXEbT/iIgMjzWnaZ0V0kTxiuhCvLWXTAtU0fnhE49znaMdLsK7o985xnYuBx7Hm+EfAFVdoapXxdjuUXcDP4xTW6YFVkgToSowBPgn3nLKphWHyKurp1sP1znaqRx4PMae6SV4C/mFgJ0AInKGiDzjf91bROaJyCsich8fn8g7ktn/FwFni0g2rc+WdFZI460qMAiviNrwpgi8poVbXWeI0XnAI8WVNVEXKhHpCByrqnWq+paqTm9mt+uAxao6CniCRr9XqjqhUVurmzuGqoaBzcCJ0eYzkbNCGk9VgQF4N5aOdR0lXSwInxh2nSEOPg/MKa6sifbvUx9gdxv7TAEeAlDVGqDZCakbrUbbnB3Y2VFCWSGNl6pAX7wiatdEozAvNL6/6wxxcjFwT5R38w8A0a6W2h6d/GOZBLFCGg9Vgc7A/wGxrn6ZVVTZv0aHDXOdI46+CtwW6c6qugvIFZHWiulCvCKNiHwa6NlWuyLyBxE5udGmEcC6SHOZ6FkhjY8HSPP15F3YRbfNYXJyXeeIs+8WV9ZURLH/PFqff/bnwBQReQWYDrzZ3E5NrpGOBrb62/sDB1T13SgymShZIY1VVeB/gAtdx0hHq8PH7XadIUFuL66sOSvCfX8LzGi8QVXnq+pn/K8/UNVzVXWUqn5NVYeo6vtNGzl6jVRECoBNqvq2/9bFwD3t/UZMZKyQxqIq8Fm81T1NOzwbPinWMZipKg/4a3FlTZuXLVR1JfAvEYlLz1xV96jqBY027QbmxKNt0zKbtKS9qgKjgBeA7q6jpKuJB+/asZ1emTxF3Vrg1LpZ0z50HcQklvVI26Mq0AtvELUV0XY6ojnvZHgRBe9a5e9dhzCJZ4W0fe4BMuluc9K9of2bvWmSgS4prqz5husQJrGskEarKnAJ8AXXMdLd4vAJh11nSKJfRXK91KQvK6TRqAoUAXe5jpEJ5oYn9HadIYm6Ag+048knkybsDzY69wM9XIdId6o0LA+XZNvUbqcD33EdwiSGFdJIVQW+BZzrOkYm2EvnTYfJi3kuzzT0i+LKGnuEOANZIY1EVeA44BbXMTLFuvDQD1xncKQz8GBxZU2mPc2V9ayQRuYuoIvrEJniufBJ2dgbPepU4LuuQ5j4skLalqrAf+AtyWvi5LnwuGyfq/Xa4sqavq5DmPixQtqaqkAecKvrGJkkpLL9Te1f5DqHYwVAlesQJn6skLbuG0CJ6xCZZKv2qXOdIUVcUVxZY79bGcIKaUuqAj2xXkPcvRA+/qDrDCkiD7jZdQgTH1ZIW3YtkE2DxpNibnh8D9cZUsj5xZU1Z7gOYWJnsz81pyowFNgAdHAdJZOoEjq+4YGGA+TbCIiPrATG182aZn8R05j1SJv3I6yIxt0B8jdbEf2EccB/ug5hYmOFtKmqwEDgMtcxMtF6HbzDdYYU9X3XAUxsrJA28WrHDlcA+a5zZKJ/hsba71vzTiuurJnoOoRpP/vFbqRsTln3C4sGfu/0Y4pWP9O1ywrXeTLNvPB4W1u9Zde4DmDazwrpx30VKNidmzvmx/36jJ8wZNDGewMFSw5DNs2dmRBhZddGHTzUdY4UNr24ssZ+PmnKCqmvbE5ZLnBV420Hc3JG/KZXj9MmFA9+b2bvngv2i+xzFC/t7aDna64zpLhc7Bn8tGWF9COfBoqbeyMkUviXgu5TTxkyKHRl/z4L3svNeS+50dLfsnDJftcZ0sDlxZU1AdchTPSskH7k0jb3EAks6NJlavngooKLCvsv2tyhw+tJyJUR5oXG20KBbesGXNDmXiblWCEFyuaUFQDnR/wBkfxX8vNP/3zRgCGfGlT40oud8tclLl36U0UXhEfbmkWRucR1ABM9K6Se/8KbdDc6IjlbO+RN/NrA/iecdkzR2se7dV2mYE+oNHGIDlv20tVOWSMzpbiyJttnx0o7Vkg9X4q1gT25uaOv69v75PFDBm/5XY/A4kNwKB7BMsFmLdzmOkMayQEuch3CRCfrC2nZnLIi4Ix4tXcoR4bd3TMweULx4J3X9um1YK/Inni1na7mh8dYLz06F7sOYKKT9YUU75c27j+HsMiAJ7p3mzppyCC+3r/vgm25uVnbK5sXGj/AdYY0M87mKk0vVkhhekJbFylY2qXz1HMHF/b+QuGAxbUdO2TVeEpV9q3VoXajKXp2ep9GsrqQls0pCwATknIwkY4b8jtO/mLhgGPPHly4fEHnTmuTclzHdtJ9s5KT1b9n7XSe6wAmctn+C34m3hMlySMi2/PyJlw5oN/oU4cMWvdo924vhiGc1AxJtCo8vN51hjQ1vriypsB1CBOZbC+k57g8+L6cnBNu6NPrlPHFg9+8vWdgUYOQcctwzA2Pj35YmQHvH/iprkOYyGR7IT3bdQCAwyLFs3sETp8wZPDeyr69F9Tn5Ox2nSle/hkae6zrDGnsLNcBTGSytpCWzSkbDIxwnaMxFelb063r1MnHFOVdPqDfwrfzct9xnSkWhzX3zQ8I9HGdI42Vuw5gIpO1hRTHp/WtEum2vHOnKZ8eVNj/c0UDlgY7dtzoOlJ71OmAt11nSHMnFFfW9HMdwrQtmwvpZNcB2iSS91rHjpMuLhow4szBRS8/26XzKteRorEoXHbEdYY0J3g3RE2Ky+ZCeqLrANF4Py/3pO/17zt24pBBtX8s6L40BCHXmdoyNzTeTutjN851ANO2rCyk/iTOx7vO0R4f5uSU3ty756TxxYO33tyrx8IDIh+6ztQcVQ6+rCOGu86RAU5wHcC0LSsLKTAc6OQ6RCyOiAz+Y6BgysQhgw5c06/Pgp05OR+4ztTYHrpsOkKeLWkdOyukaSBbC+lo1wHiRUV6z+vaZerUY4q6XDqw/8K6vLw3XWcCWBs+dqfrDBnimOLKGpsUO8VZIc0UIp1Xd8qfcv6ggUWfGTTwhZfz82tdxnkufJItaR0/1itNcVZIM41I7hsdOpx6WWH/0inHFK2q6drlZRcxng+PG+LiuBlqlOsApnXZWkhLXQdIhl25uWMr+/U5acKQQRtnBwqWHoGkDEcKac62t7XvwGQcK0tYjzTFZWshLXQdIJkO5uSMuL1Xj0njiwdv/0USlpV+W/u8kcj2s5Ctd5/iEl5IRWSyiHzZ/7qviDj9pSibU9Yd6OIygyshkaJHCrpPPXXIoCPf7pe4ZaVfCI9qSES7WcyebkpxCS2kInId8CPgx/6mDsBDiTxmBLJ+tnYV6TG/a5ep5YOLuv+/BCwr/Y/whJ7xbM9YIU11ie6Rfh74LLAfQFW3Aq6HcvR3fPzUIdJpnb+s9HmDCl96qVP+K7E2qcrhF8OlNhA/vqyQprhEF9JDqqr4SxSLSNcEHy8SWd8j/QSRnHc65E386sD+oyYfU7T2yRiWld5Pp80Hybc5SOOrW3Fljf1MU1iiC+lfROQeoIeIfA14Drg3wcdsixXSVtTn5o7+mb+s9N09CqJeVrpWhyTkuquxXmkqS2ghVdVfAX8DHgNGAteq6p2JPGYE7BcyAodyZNjvevaYPKF48AfX9ek1P9JlpZ8Pjc1LdLYs1dd1ANOyhP/Sq+qzwLOJPk4U0voZ+2QLiwx8vHu3gY9367rntAMHF1S9v3PkgFCoxV79s+GTBiUzXxbp5TqAaVmi79pPF5FNIlIvIntEZK9E2LNJoGwdOxsbkYIlXTpPPWdwYa8LCgcsXt/MstJhlQ9e06JjXMTLAh1dBzAtS3RRuRn4rKoGVLVAVburquuVEa2QxkKk4/r8jpMvKBxw7DmDC5cvbLSs9HZ6bnEZLcMld7VbE5VEn9pvV1Wnk2c0wwppPIjIu3l5EyoG9KNbKBy8etfu/Tm7Sw+4jpXB7NpzCkv0H84KEXkUeBL499Muqvp4go/bGiukcbYvN6fs9i49d/6spueq74/o9B7We4q7Q2mwIkI2S3QhLQA+BM5ttE0BK6SZRFVvmR3a0mvfyrMOdx664K3B5bYee5zlt3Ncr0mOhBZSVf1yIttvJyukcfatmvCCXvs4A2D4a49N3dt98ILdPYZbMY2vw64DmJYl+q79IBF5QkR2+K/HRMT18JiDjo+fUcZtCq+ZGtTTG28bu/qO0/MP7lzmKlOGskKawhLdO3sAeBpv2rpC4H/9bS6l1NpG6SywT9/7wWPh/tLkmqigOacsmzkqJ9Sw3lW2DGSFNIUlupD2VdUHVPWI/3oQ909ovO/4+BlBVMO33B96M1ebf+Q2N3yo6ynLZgbQ8LZkZ8tQ9uhtCkt0If1ARL4kIrn+60u47xG6Pn5GuOqp8KIe+zmptX06NewaeNKq2/agmtCJpLPEO64DmJYlupBeDnwReBfYBnwBcH0DynqkMTp5Q3jVpNqPXxdtSWDP6yNL1/+xFlUbvtN++yqqy3e7DmFalui79m/gzUeaSqxHGoOee3XH954ID5Io/hEeuP2lCXu7D17w9qAz7U5++1hvNMUlpJCKyLWtvK2qOjMRx42Q9UjbKSesoVtmh7bmKGOi/eyIzX+buq/bIBsW1T5WSFNcok7t9zfzAvgK3tIjLr2PPSXSLt97Iryo4ED0RfSosavvmJx/cJcNi4re264DmNYlpJCq6q1HX8Dvgc5410YfAY5NxDEjFZwRPAzY5BpRmvRq+OUJG3VKLG0Imjtx+czjc0ING+KVK0tYIU1xCbvZJCK9ROQGYC3eJYRxqvojVd2RqGNG4VXXAdJJ73rddtVT4SHRXBdtSV6oodspy2YW2LCoqNipfYpLSCEVkVuA5cBeoExVq1R1VyKO1U5WSCOUE9Yjt9wfei8H+sSrzU4NuwaOW3VbvQ2Lipj1SFNconqk1+A9yfRTYKs/qXOqTOwMkGpT+6WsH/4tvLjbQUbHu90ee14vKd3w0Ks2LCoidikkxSXqGmmOqnY+OpFzo1cqTOwM1iONyNRgePnY1zRhd9kHvvviyYPeWbA4Ue1niJ3ARtchTOuydSakWmxaslb1263vfOuZ8HECksjjjNj816k9dm9akMhjpLllFdXl9rua4rKykAZnBD8E6lznSFW5IT180/2hXQI9k3E8f1jU8mQcKw296DqAaVtWFlKf/YK24H8eDS/t2sAJyTqePyyq1IZFNesF1wFM27K5kNq1uWactSr8Utkbibsu2pJGw6LeTfaxU5gCL7kOYdoWUSEVkc4iskBEhonIfH/beBH5TQv714lIVMNlROS7ItKllffni0hxG21cKSKXR3jIRdHkywYDdupbV/wjXOLq+P6wqN2o7m9776ywvqK6vN51CNO2SHukl+Ots/TvoSqqukJVr4pjlu8CzRZSEYl0MbX7gW9HuO86bAKTf8sL6aGbHgjtFQi4zNFjz+slJRseegXVsMscKcIuP6WJSAvpJcBTeIV0J4CInCEiz/hf9xaReSLyiojcRyt3ekWkq4jUiMgaEVknIheKyFV4407/JSL/8vfbJyK3isga4FT/uK2OOVTVD4E6ETm5rW8oOCOowL8i+N6zws/+FHqh8yGOd50DoPDdF08uemeBnTFYIU0bbRZSEekIHKuqdar6lqpOb2a364DFqjoKeAI4ppUmzwO2quqJqnoC8A9V/Q2wFThTVc/09+sKvOTvt1hVp6vqW36m+0RkfAvtrwAimisTeC7C/TLaeSvCL5S+TUrNyjRy81+nBnZvzvZhUfNdBzCRiaRH2gfY3cY+U4CHAFS1BmjtcdAgcI6I3CQip6tqS9eAQsBjzb2hql9V1RUtfG4HXu82Es9GuF/GKnpf3/jys+FRrnM0Z9zq2yfnN+zO1mFRayuqy20gfpqIpJAeADrF64CquhEYh1dQb2hl7tKD2r7HBzvhZW5TcEZwi58jK3U4ogdvfDB0UCAVnjb7BEFzJy67PluHRf3FdQATuTYLqT/ZSK6ItFZMFwIXA4jIp2llILeIFAIfqupDwC14RRW8CU66R5i7cXtXisiVjTaNwLuRFKlHoj1mpqh6OLSs02FGus7RmrxQQ7eJy27IxmFRVkjTSKQ3m+YBk1t5/+fAFBF5BZgOvNnKvmXAMhFZjXdt9QZ/+++Bfxy92dSaJtdIS/j43ffTiO6UPSsL6fkvhpcM30pM84smS+eGnQPHrfr1riwaFrW6orp8k+sQJnKi2vZjvCIyDrhaVS9NfKTo+CMHpqvqIREZC3wv2pxlc8qWARMSEjAFDd6hr/9qdqivQDfXWaKxdcCpy9aPvGQ8Ipn+IMlPKqrLf+k6hIlcRL+QqroSb2hSpOM5k0ZVP6Oqh/z/7QP8rB3NZE2vtONhPfDLOaEj6VZEAQrffeHkoq0Ls2FYlJ3Wp5mIeqTtalikN/B8M2+dpaopNRC+bE5ZEfAWCZ7pKBXcdP+RxUO3t3qZJuW9PPZ7C+sDw9LiskQ7rKqoLh/X9m4mlSTsFElVP1DVMc28UqqIAgRnBN8hCx4Znb4knPZFFGDcql+flsHDoqw3moYy/VpTNKpdB0ikoe/q5gsXhjOip+MPiyrJCR3KtHGWR4CHXYcw0bNC+pG/kqFr4+Qf0v0z/xASaWEug3SUF2roPnHZzO4ZNizq8Yrq8rdchzDRs0LqC84IHgHucp0jEX45J7S6Y4hhrnPEWwYOi/q16wCmfayQftzvgUz5SwnAhQtCiwa/z2mucyRKjz1bSks2PJwJs0W9VFFdbpOUpCkrpI0EZwR3AXNc54iX497RDdOXasaPj/WGRS1K95uFt0WyU6O5gc8SkXoR+XuT97uLyBIRWX10uGIkc/lmGxG5TESq2tinr4j8I5L2rJB+0u1kwMJ4nRt0788fCuVLHOdJSGUjNz06NVD/2kLXOdppI/C3CPdtPDfwIlX9jybvlwPv+CNk4rLUtYjkxaOddKOq7wHbRKTNMzorpE0EZwQ3AU+7zhGrGx8MBTuEKXadI5nGrfr1aR0bdrc0K1gqu7GiujzSSxNH5wZuSQ+8GdAa2wmERCRXRB705wEOisjVACJynIg8588RvNJfCeMMEVkkIk8Dr/qfvUVElovIWhH5+tHGReQHjbb/3N9WLCK1InKvP0/xPBHp3FJov9d8k4gsE5GNInJ6o3YW+blWisgkf/sZfs/8KRHZIiKzROQS//NBERnm79dXRB7z8y1vVBQPAPsi+Hk/6f/MW2WFtHk/oY1JpFPZl54PLSzcySTXOZJN0NxTll0/Ms2GRb2BPwVlWxrPDdzKbrnAx4pyo7l8xwBFqnqCqpYBD/i7PAz8VlVPBCYB2/zt44DvqOoI4CtAvapOwHuc+msiMlREzgWGAyf77Z8kIkcflhjutzsKbyrO/2rjW8xT1ZPxVsu4zt+2AzhHVccBFwKNlzc6EfgGUApcCozwP38fH62UcQfwaz/3f/nvoaqPquqvAETksyJyfQuZIprf2AppM4Izgq+QptdKR76ltecv04muc7iSF2roPnH5zG5oeLvrLBGaVVFdfiTCfSOZG3gMLQ/j2wIcKyJ3ish5wB4R6Y5XXJ8AUNWD/koTAMtU9XX/63OB//YnG3oJ6I1XKM/1X6uAlXiTCA33P/O6qq72v34Z2jxDeryZfTsA94pIEG+IYuNVHJar6jZVbQBew5tcCbypMY9+/mzgLj/300CBiHzs8WhVfVpVW5rOM6L5ja2QtuxnwIdt7pVCuhzU+uv+FOoqkO86i0udD+4sHLf69p18VBBS1Vrg3ij2b3VuYBFZDFwE/Km59/0pMU/Em3n/G/i9s1Y0HsEiwLcbPaE4VFXn+dtvbLT9OFWd7X+modHnQ0Bb11qP7t9436uB7X7u8UDHZvYHrxfe0Ojro5/PAU5plK9IVSM5pT8qovmNrZC2IDgjuJU0G9d30wOh2rxwq8u8ZI0e9a+Vlmz8UzDFh0VdWVFdHvElpLbmBlbVycBs4Irm3hdvZd8cVX0M+CkwTlX3Am+LyOf8ffKl+dV85wLfFJEO/n4jRKSrv/3yo708ESkSkX6Rfk8RCADb1PtzvBTv0kU05tFoQUwRGdPazn7+xnOERDS/sRXS1t0EvOc6RCS+PC+0oP9uTnGdI5UUbls6sXDr4lQdFvVQRXV5e7K1NTfwBqBXC+8VAfP909yHgB/72y8FrhKRtcBSYEAzn70PeBVYKSLrgHvwrmnOw+sBv+Cffv+NdkzQ3orfATPEWwSzhOjHeV8FjPdvhL2K1xP/mCbXSAfiPap71JlATVsHSdjsT5mibE5ZBSn+xNOoN8KvXPun8AjxrieZJlaMvWbhnsCxqTRb1B5gZEV1edSPtx6dGxiv5/l9Vf1Mk/e/CHxBVb8Yl6RZxl9t401Vfdr//4XAf/pnAy2yHmnb7sG7iJ6Suh3Q3T99JBywItqyk1bdlmrDoq5rTxGFj+YGBg4DJzQdkI+3Mm6PxgPyTeRU9a5GRbQvcFtbRRSsRxqRsjllo/GGQaRWsVLV3/0utLzPHk52HSXVHcnN37t40qxt4dyOIxxHWQeMjeJOfUYRkd/CJx5ZvkNVH2hu/3RhhTRCZXPKrgOqXOdo7Iq/hxacvUZTaj36VHagU6+tL0z8eS6S099hjKkV1eXp+gSWaYGd2kful8Aa1yGOGr0lHDxrjWbsZCSJkALDoh6yIpqZrJBGKDgjeBi4jI/f0XOiYL9+8OO/hPtI2+PyTBM96l8rHbnxzy6GRb0GXNnmXiYtWSGNQnBGcDVwo9MQqnrz/aHXc5WBTnOksaJtS5I9LKoBuKCiurw+icc0SWSFNHozgRdcHbzimfDCXvsY7+r4maJk0yNTC+pfT9Zp9tUV1eWrknQs44AV0ij5p/hfAJK+xMVJm8Krp6zTtF+8LlWMW33bpI4N9YkeFvVIRXX53Qk+hnHMCmk7+I+PXoA3li8pAvv0vR88Fh4o0T8iZ1qQo+G8U5ZdPyIndGhTgg6xkRYe1zSZxQppOwVnBBcD1yTjWKIavmV26K0cxeWwnYyUFzpYMHH5DV3QcNM5PGN1AO+66N44t2tSkBXSGARnBO8E/pjo43znyfCiHh+SEUspp6LOBz8oGrv6jvfjPCzqqorq8rVxbM+kMCuksfs63lyMCTFxfXjlqeu1zYllTWx61m8+Po7Dou6sqC5va4o6k0GskMYoOCN4AJiGN04wrnru1R1XPxkeLPbnlBRF25ZMLNy2JNZhUXOA78Qjj0kf9hc0DoIzgtvwZuJ+J15t5oQ1dMvs0NYcpW+82jRtK9n456kFe9o9LOoJ4CsV1eX23HWWsUIaJ8EZwTrgHOD9eLR3zePhRQUHGBOPtkx0xq1q17CoZ4GLopmo2WQOK6RxFJwRrAXOw5tvst1OeyW8Yvwmm4zElXYMi1oKfK6iuvxQInOZ1GWFNM6CM4IvA58hgnVemtOnXrd9++nwUPHWwjGORDEsajUwraK6PNXXhzIJZIU0AYIzgouAzxLZutn/lhPWIzfPDr2X463QaBzzhkX9prVhURuAT1VUl+9OYiyTgqyQJkhwRvA54Ay85Vwj8qO/hhd3a2B0wkKZqPWs33T8iE2Prm1mWNQq4IyK6vJ4D+Q3acgKaQL5p/mTiGBo1Blrw8vGbLHroqlo0NZFpxRuW9p4WNSzeBM0J32+BZOabIb8JCibU9YP+DtwUnPv99ut79x5d6iLQM/kJjPRWD7uB4v2FhTX4Q1xSto8Cyb1WSFNkrI5Zd2Ax4BzG2/PDenh2beHNnY5xCg3yUyENCy5VaNq113f9q4m29ipfZIEZwT34d3N/9iUaj95NLzUimjKOwBcZEXUtMR6pA6UzSm7FLjn7JXhNVfMDZ/iOo9pVR1wQen62lRaztmkGOuROhCcEfwjcMpXng2n1vLOpqk/AydaETVtsR6pQ7Ulpd2A3wL/7TqL+Zi9wJWl62v/4DqISQ9WSFNAbUnpl4C7gIDrLIZlwMWl62vjPpuXyVx2ap8CStfXPgSUAn91nSWLhfBWiD3NiqiJlvVIU0xtSek04HfAMa6zZJHFeKfya1wHMenJCmkKqi0p7QpcjzdBsC12lzjvAj8sXV+b8OViTGazQprCaktKx+FdOz3VdZYMcwTv53pd6framKY8NAaskKYF/3R/JjDWdZYMMA+4pnR97TrXQUzmsEKaJmpLSgWYjnfKf7zjOOnoGWBm6fraZa6DmMxjhTTN1JaU5gAXA9cBxzmOk+oUbx2lG0rX1yZspVdjrJCmqdqS0lzgfOBbeAvv2Yz6HzmCN5TsF6Xra19xHcZkPiukGaC2pHQ48E3gMrJ7Kr4NwGzgD6Xra7e7DmOyhxXSDFJbUtoZuAj4OjDRcZxk2Q/8BZhdur52ieswJjtZIc1QtSWlxwCfw7tBNZnMGo96APgn8CTwaOn62r1u45hsZ4U0C9SWlPbBW4xvOt711Hy3idqlDqjxX/8qXV970G0cYz5ihTTL1JaUdgFOxltLahLeYP9eTkM1721gBd6a8TWl62tfdZzHmBZZIc1y/vjUEj4qqqV4w6r6JTHGdryiudz/7wq7WWTSiRVS0yx/rtTjGr2GAQOAgmZeHZtpIox3I2g/3vye7wJvNnq9dfTr0vW19Yn8XoxJNCukJma1JaX5QBe8qeiOAEdK19cecpvKmOSxQmqMMTGyiZ2NMSZGVkiNMSZGVkiNMSZGVkhNQolIZxFZICLDRGS+v228iPwmynaqROT7/tfXi8jZccxYF8E+vxKR8ngd02SWPNcBTMa7HHgc744+AKq6Am+8aLuo6rVxyBWtO4F78R5NNeZjrEdqEu0S4Cm8QroTQETOEJFn/K+rROR+EZkvIltE5KqjHxSRn4jIRhFZDIxstP1BEfmC//UEEVkqImtEZJmIdBeRXBG5RUSWi8haEfm6v+9AEVkoIqtFZJ2InO43+V5b34SqvgH0FpEB8fmxmExiPVKTMCLSEThWVev8TdNb2LUEOBPoDmwQkbuB0XgzWY3B+z1dCbzcTPuPAheq6nIRKcCb0OQrQL2qThCRfGCJiMzzjz9XVX8hIrl4Y19R1QmN2vw78FVV3dpMzpXAacBjUf0gTMazQmoSqQ+wO4L9alS1AWgQkR1Af+B04AlV/RBARJ5u5nMjgW2quhxAVff4+54LjD7aawUCwHC8R1DvF5EOwJOqurppg6r6H63k3AEURvD9mCxjhdQk0gGgUwT7NTT6OkTsv5cCfFtV537iDZEpwDTgQRG5TVX/EEW7nfC+J2M+xq6RmoRR1V1ArohEUkybWgh8zr/r3x1vWZWmNgADRWQCgH99NA+YC3zT73kiIiNEpKuIDAG2q+q9wH3AuNYCiMiNIvL5RptGALb6qPkEK6Qm0ebhTSwdFVVdiXf9cw3wf3in5U33OQRcCNwpImuAZ/F6jfcBrwIrRWQdcA9eL/cMYI2IrPI/d0fTNkXk7yJy9PS9DG+yFfyifBwxjDYwmcuetTcJJSLjgKtV9VLXWaIlInNV9VP+158HxqnqzxzHMinIeqQmofye5b/8u+Rp5WgR9eUBt7rKYlKb9UiNMSZG1iM1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgY/X94EM/IlO9NngAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "flattened_user_mentions_df.value_counts().head(5).plot(kind=\"pie\");" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.5 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "a265634967a27dd555e8346f2355ee703e655fd7f0a0d20c168527cd0a3d5707" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/pre_process.ipynb b/notebooks/pre_process.ipynb new file mode 100644 index 0000000..1d54880 --- /dev/null +++ b/notebooks/pre_process.ipynb @@ -0,0 +1,1198 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "import pandas as pd\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# import custom libraries and scripts\n", + "# sys.path.append(os.path.abspath(os.path.join(\"../..\")))\n", + "sys.path.append(\".\")\n", + "sys.path.append(\"..\")\n", + "\n", + "from defaults import *\n", + "from extract_dataframe import read_json\n", + "from extract_dataframe import TweetDfExtractor\n", + "from clean_tweets_dataframe import Clean_Tweets" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
created_atsourceoriginal_textpolaritysubjectivitylangfavorite_countstatus_countretweet_countscreen_nameoriginal_authorfollowers_countfriends_countpossibly_sensitivehashtagsuser_mentionsplace
02022-08-07 22:31:20+00:00<a href=\"http://twitter.com/download/android\" ...RT @i_ameztoy: Extra random image (I):\\n\\nLets...-1.250000e-010.190625en480972i_ameztoyi_ameztoy204972621NaN[{'text': 'City', 'indices': [132, 137]}][{'screen_name': 'i_ameztoy', 'name': 'Iban Am...
12022-08-07 22:31:16+00:00<a href=\"http://twitter.com/download/android\" ...RT @IndoPac_Info: #China's media explains the ...-1.000000e-010.100000en6915831201ZIisqZIisq65272NaN[{'text': 'China', 'indices': [18, 24]}, {'tex...[{'screen_name': 'IndoPac_Info', 'name': 'Indo...
22022-08-07 22:31:07+00:00<a href=\"http://twitter.com/download/android\" ...China even cut off communication, they don't a...0.000000e+000.000000en016270Fin21FreeFin21Free85392NaN[{'text': 'XiJinping', 'indices': [127, 137]}][{'screen_name': 'ZelenskyyUa', 'name': 'Волод...Netherlands
32022-08-07 22:31:06+00:00<a href=\"http://twitter.com/download/android\" ...Putin to #XiJinping : I told you my friend, Ta...1.000000e-010.350000en016270Fin21FreeFin21Free85392NaN[{'text': 'XiJinping', 'indices': [9, 19]}][]Netherlands
42022-08-07 22:31:04+00:00<a href=\"http://twitter.com/download/iphone\" r...RT @ChinaUncensored: I’m sorry, I thought Taiw...-6.938894e-180.556250en152118958381VizziniDoloresVizziniDolores9102608NaN[][{'screen_name': 'ChinaUncensored', 'name': 'C...Ayent, Schweiz
52022-08-07 22:31:02+00:00<a href=\"http://twitter.com/download/android\" ...RT @benedictrogers: We must not let this happe...2.000000e-010.500000en1164848336GraceCh15554845GraceCh15554845207540.0[{'text': 'Taiwan', 'indices': [84, 91]}][{'screen_name': 'benedictrogers', 'name': 'Be...Melbourne, Victoria
62022-08-07 22:30:59+00:00<a href=\"http://twitter.com/download/android\" ...RT @TGTM_Official: What kind of country can co...1.583333e-010.800000en11064173411Philipkuma1Philipkuma112264NaN[{'text': 'Taiwan', 'indices': [101, 108]}, {'...[{'screen_name': 'TGTM_Official', 'name': 'The...
72022-08-07 22:30:59+00:00<a href=\"http://twitter.com/download/android\" ...RT @ChinaInfo777: #PinkFloyd singer Roger Wate...0.000000e+000.000000en10241025nhohn2011nhohn2011870508NaN[{'text': 'PinkFloyd', 'indices': [18, 28]}, {...[{'screen_name': 'ChinaInfo777', 'name': 'Chin...Florida, USA
82022-08-07 22:30:50+00:00<a href=\"http://twitter.com/download/android\" ...RT @AmbQinGang: China's SC&amp;FM Wang Yi elab...0.000000e+000.000000en1221630239ClaudioColomaRIClaudioColomaRI127263NaN[{'text': 'Taiwan', 'indices': [80, 87]}][{'screen_name': 'AmbQinGang', 'name': 'Qin Ga...El mundo periférico
92022-08-07 22:30:45+00:00<a href=\"https://mobile.twitter.com\" rel=\"nofo...RT @CGMeifangZhang: Chinese ambassador to the ...2.000000e-010.375000en4910718825jmarzola1jmarzola1213877NaN[{'text': 'USA', 'indices': [66, 70]}, {'text'...[{'screen_name': 'CGMeifangZhang', 'name': 'Zh...
\n", + "
" + ], + "text/plain": [ + " created_at \\\n", + "0 2022-08-07 22:31:20+00:00 \n", + "1 2022-08-07 22:31:16+00:00 \n", + "2 2022-08-07 22:31:07+00:00 \n", + "3 2022-08-07 22:31:06+00:00 \n", + "4 2022-08-07 22:31:04+00:00 \n", + "5 2022-08-07 22:31:02+00:00 \n", + "6 2022-08-07 22:30:59+00:00 \n", + "7 2022-08-07 22:30:59+00:00 \n", + "8 2022-08-07 22:30:50+00:00 \n", + "9 2022-08-07 22:30:45+00:00 \n", + "\n", + " source \\\n", + "0 \n", + "Int64Index: 22000 entries, 0 to 21999\n", + "Data columns (total 17 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 created_at 22000 non-null datetime64[ns, UTC]\n", + " 1 source 22000 non-null object \n", + " 2 original_text 22000 non-null object \n", + " 3 polarity 22000 non-null float64 \n", + " 4 subjectivity 22000 non-null float64 \n", + " 5 lang 22000 non-null object \n", + " 6 favorite_count 22000 non-null int64 \n", + " 7 status_count 22000 non-null int64 \n", + " 8 retweet_count 22000 non-null int64 \n", + " 9 screen_name 22000 non-null object \n", + " 10 original_author 22000 non-null object \n", + " 11 followers_count 22000 non-null int64 \n", + " 12 friends_count 22000 non-null int64 \n", + " 13 possibly_sensitive 6191 non-null float64 \n", + " 14 hashtags 22000 non-null object \n", + " 15 user_mentions 22000 non-null object \n", + " 16 place 22000 non-null object \n", + "dtypes: datetime64[ns, UTC](1), float64(3), int64(5), object(8)\n", + "memory usage: 3.0+ MB\n" + ] + } + ], + "source": [ + "global_data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
polaritysubjectivityfavorite_countstatus_countretweet_countfollowers_countfriends_countpossibly_sensitive
count22000.00000022000.00000022000.0000002.200000e+0422000.0000002.200000e+0422000.0000006191.000000
mean0.0613250.283839852.1373185.446036e+04176.7501821.796764e+041563.1144550.037151
std0.2237010.2909633106.0776451.454120e+05498.4357653.030478e+054358.6512640.189146
min-1.0000000.0000000.0000001.000000e+000.0000000.000000e+000.0000000.000000
25%0.0000000.0000002.0000002.105750e+032.0000005.700000e+01137.0000000.000000
50%0.0000000.200000115.0000001.038750e+0438.0000002.840000e+02487.0000000.000000
75%0.1333330.468824655.0000004.526150e+04187.0000001.324500e+031599.0000000.000000
max1.0000001.00000065170.0000004.108317e+0617409.0000001.449852e+07208360.0000001.000000
\n", + "
" + ], + "text/plain": [ + " polarity subjectivity favorite_count status_count \\\n", + "count 22000.000000 22000.000000 22000.000000 2.200000e+04 \n", + "mean 0.061325 0.283839 852.137318 5.446036e+04 \n", + "std 0.223701 0.290963 3106.077645 1.454120e+05 \n", + "min -1.000000 0.000000 0.000000 1.000000e+00 \n", + "25% 0.000000 0.000000 2.000000 2.105750e+03 \n", + "50% 0.000000 0.200000 115.000000 1.038750e+04 \n", + "75% 0.133333 0.468824 655.000000 4.526150e+04 \n", + "max 1.000000 1.000000 65170.000000 4.108317e+06 \n", + "\n", + " retweet_count followers_count friends_count possibly_sensitive \n", + "count 22000.000000 2.200000e+04 22000.000000 6191.000000 \n", + "mean 176.750182 1.796764e+04 1563.114455 0.037151 \n", + "std 498.435765 3.030478e+05 4358.651264 0.189146 \n", + "min 0.000000 0.000000e+00 0.000000 0.000000 \n", + "25% 2.000000 5.700000e+01 137.000000 0.000000 \n", + "50% 38.000000 2.840000e+02 487.000000 0.000000 \n", + "75% 187.000000 1.324500e+03 1599.000000 0.000000 \n", + "max 17409.000000 1.449852e+07 208360.000000 1.000000 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "global_data.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## EDA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remove duplicated rows" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Automation in Action...!!!\n" + ] + }, + { + "data": { + "text/plain": [ + "(22000, 17)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_tweets = Clean_Tweets(global_data)\n", + "clean_tweets.df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(22000, 17)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_tweets.df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7440, 17)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_tweets = clean_tweets.drop_duplicate(global_data)\n", + "clean_tweets.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see there were too many duplicates" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remove tweets that are not english" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "en 7440\n", + "Name: lang, dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_tweets.lang.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All tweets are in English" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Missing values" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 7440 entries, 0 to 21997\n", + "Data columns (total 17 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 created_at 7440 non-null datetime64[ns, UTC]\n", + " 1 source 7440 non-null object \n", + " 2 original_text 7440 non-null object \n", + " 3 polarity 7440 non-null float64 \n", + " 4 subjectivity 7440 non-null float64 \n", + " 5 lang 7440 non-null object \n", + " 6 favorite_count 7440 non-null int64 \n", + " 7 status_count 7440 non-null int64 \n", + " 8 retweet_count 7440 non-null int64 \n", + " 9 screen_name 7440 non-null object \n", + " 10 original_author 7440 non-null object \n", + " 11 followers_count 7440 non-null int64 \n", + " 12 friends_count 7440 non-null int64 \n", + " 13 possibly_sensitive 3977 non-null float64 \n", + " 14 hashtags 7440 non-null object \n", + " 15 user_mentions 7440 non-null object \n", + " 16 place 7440 non-null object \n", + "dtypes: datetime64[ns, UTC](1), float64(3), int64(5), object(8)\n", + "memory usage: 1.0+ MB\n" + ] + } + ], + "source": [ + "clean_tweets.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Only the feature possibility sensitive have a missing value" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 3866\n", + "1.0 111\n", + "Name: possibly_sensitive, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_tweets.possibly_sensitive.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see there are 3,866 not sensitive and 111 sensitive tweets\n", + "And only 3,988 out of 77,000 tweets are recorded for sensitivity" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Handling missing values" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "created_at 0\n", + "source 0\n", + "original_text 0\n", + "polarity 0\n", + "subjectivity 0\n", + "lang 0\n", + "favorite_count 0\n", + "status_count 0\n", + "retweet_count 0\n", + "screen_name 0\n", + "original_author 0\n", + "followers_count 0\n", + "friends_count 0\n", + "possibly_sensitive 0\n", + "hashtags 0\n", + "user_mentions 0\n", + "place 0\n", + "dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_tweets = Clean_Tweets.fill_missing(clean_tweets, df = clean_tweets, column=\"possibly_sensitive\", value = \"unknown\")\n", + "clean_tweets.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['created_at', 'source', 'original_text', 'polarity', 'subjectivity',\n", + " 'lang', 'favorite_count', 'status_count', 'retweet_count',\n", + " 'screen_name', 'original_author', 'followers_count', 'friends_count',\n", + " 'possibly_sensitive', 'hashtags', 'user_mentions', 'place'],\n", + " dtype='object')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_tweets.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
retweet_countsourceoriginal_texthashtagsplace
02<a href=\"http://twitter.com/download/android\" ...RT @i_ameztoy: Extra random image (I):\\n\\nLets...[{'text': 'City', 'indices': [132, 137]}]
1201<a href=\"http://twitter.com/download/android\" ...RT @IndoPac_Info: #China's media explains the ...[{'text': 'China', 'indices': [18, 24]}, {'tex...
20<a href=\"http://twitter.com/download/android\" ...China even cut off communication, they don't a...[{'text': 'XiJinping', 'indices': [127, 137]}]Netherlands
30<a href=\"http://twitter.com/download/android\" ...Putin to #XiJinping : I told you my friend, Ta...[{'text': 'XiJinping', 'indices': [9, 19]}]Netherlands
4381<a href=\"http://twitter.com/download/iphone\" r...RT @ChinaUncensored: I’m sorry, I thought Taiw...[]Ayent, Schweiz
..................
219743<a href=\"https://mobile.twitter.com\" rel=\"nofo...RT @metesohtaoglu: 📌📸 Map of #China's possible...[{'text': 'China', 'indices': [29, 35]}, {'tex...Seattle, WA
219871<a href=\"http://twitter.com/download/iphone\" r...RT @NEVERBOW: China is doing #exactly what #Ru...[{'text': 'exactly', 'indices': [29, 37]}, {'t...
219890<a href=\"http://twitter.com/download/iphone\" r...Minister Wu is crystal clear in his @BBCNews i...[{'text': 'Taiwan', 'indices': [168, 175]}, {'...Toronto, Canada
219910<a href=\"http://twitter.com/download/android\" ...Reports say that #China is planning to seize #...[{'text': 'China', 'indices': [17, 23]}, {'tex...
219970<a href=\"http://twitter.com/download/android\" ...@Reuters Thanks #Pelosi smart move.[{'text': 'Pelosi', 'indices': [16, 23]}]🇺🇲🇷🇺🇺🇦🇫🇷🇦🇪🇮🇱🏳️‍🌈
\n", + "

7440 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " retweet_count source \\\n", + "0 2
\n", + "Int64Index: 7440 entries, 0 to 21997\n", + "Data columns (total 17 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 created_at 7440 non-null datetime64[ns, UTC]\n", + " 1 source 7440 non-null object \n", + " 2 original_text 7440 non-null object \n", + " 3 polarity 7440 non-null float64 \n", + " 4 subjectivity 7440 non-null float64 \n", + " 5 lang 7440 non-null object \n", + " 6 favorite_count 7440 non-null int64 \n", + " 7 status_count 7440 non-null int64 \n", + " 8 retweet_count 7440 non-null int64 \n", + " 9 screen_name 7440 non-null object \n", + " 10 original_author 7440 non-null object \n", + " 11 followers_count 7440 non-null int64 \n", + " 12 friends_count 7440 non-null int64 \n", + " 13 possibly_sensitive 7440 non-null object \n", + " 14 hashtags 7440 non-null object \n", + " 15 user_mentions 7440 non-null object \n", + " 16 place 7440 non-null object \n", + "dtypes: datetime64[ns, UTC](1), float64(2), int64(5), object(9)\n", + "memory usage: 1.0+ MB\n" + ] + } + ], + "source": [ + "clean_tweets['created_at'] = pd.to_datetime(clean_tweets['created_at'])\n", + "clean_tweets.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### extract source of tweets" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "#clean_tweets[\"source\"] = clean_tweets[\"source\"].apply(Clean_Tweets.extract_device_name(self = clean_tweets, source='source'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### save current dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "clean data saved successfully\n" + ] + } + ], + "source": [ + "clean_tweets.to_csv('../data/clean_data.csv', index = False)\n", + "print('clean data saved successfully')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.5 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "a265634967a27dd555e8346f2355ee703e655fd7f0a0d20c168527cd0a3d5707" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt index d017ed3..15b377b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -pandas>=1.1.0 +pandas>=1.1.0 textblob>=0.15.3 \ No newline at end of file diff --git a/tests/test_extract_dataframe.py b/tests/test_extract_dataframe.py index 8d5f30d..522c2e7 100644 --- a/tests/test_extract_dataframe.py +++ b/tests/test_extract_dataframe.py @@ -1,100 +1,251 @@ -import unittest -import pandas as pd -import sys, os - -sys.path.append(os.path.abspath(os.path.join("../.."))) - -from extract_dataframe import read_json -from extract_dataframe import TweetDfExtractor - -# For unit testing the data reading and processing codes, -# we will need about 5 tweet samples. -# Create a sample not more than 10 tweets and place it in a json file. -# Provide the path to the samples tweets file you created below -sampletweetsjsonfile = "" #put here the path to where you placed the file e.g. ./sampletweets.json. -_, tweet_list = read_json(sampletweetsjsonfile) - -columns = [ - "created_at", - "source", - "original_text", - "clean_text", - "sentiment", - "polarity", - "subjectivity", - "lang", - "favorite_count", - "retweet_count", - "original_author", - "screen_count", - "followers_count", - "friends_count", - "possibly_sensitive", - "hashtags", - "user_mentions", - "place", - "place_coord_boundaries", -] - - -class TestTweetDfExtractor(unittest.TestCase): - """ - A class for unit-testing function in the fix_clean_tweets_dataframe.py file - - Args: - ----- - unittest.TestCase this allows the new class to inherit - from the unittest module - """ - - def setUp(self) -> pd.DataFrame: - self.df = TweetDfExtractor(tweet_list[:5]) - # tweet_df = self.df.get_tweet_df() - - def test_find_statuses_count(self): - self.assertEqual( - self.df.find_statuses_count(), - ) - - def test_find_full_text(self): - text = - - self.assertEqual(self.df.find_full_text(), text) - - def test_find_sentiments(self): - self.assertEqual( - self.df.find_sentiments(self.df.find_full_text()), - ( - , - , - ), - ) - - - def test_find_screen_name(self): - name = - self.assertEqual(self.df.find_screen_name(), name) - - def test_find_followers_count(self): - f_count = - self.assertEqual(self.df.find_followers_count(), f_count) - - def test_find_friends_count(self): - friends_count = - self.assertEqual(self.df.find_friends_count(), friends_count) - - def test_find_is_sensitive(self): - self.assertEqual(self.df.is_sensitive(), ) - - - # def test_find_hashtags(self): - # self.assertEqual(self.df.find_hashtags(), ) - - # def test_find_mentions(self): - # self.assertEqual(self.df.find_mentions(), ) - - - -if __name__ == "__main__": - unittest.main() - +import os +import sys +import unittest +import pandas as pd + +# sys.path.append(os.path.abspath(os.path.join("../.."))) +# sys.path.append(".") +sys.path.append(".") +from defaults import * + +from extract_dataframe import read_json +from extract_dataframe import TweetDfExtractor + +# For unit testing the data reading and processing codes, +# we will need about 5 tweet samples. +# Create a sample not more than 10 tweets and place it in a json file. +# Provide the path to the samples tweets file you created below + +_, tweet_list = read_json(processed_global_data) + +columns = [ + "created_at", + "source", + "original_text", + "clean_text", + "sentiment", + "polarity", + "subjectivity", + "lang", + "favorite_count", + "retweet_count", + "original_author", + "screen_count", + "followers_count", + "friends_count", + "possibly_sensitive", + "hashtags", + "user_mentions", + "place", + "place_coord_boundaries", +] + + +class TestTweetDfExtractor(unittest.TestCase): + """ + A class for unit-testing function in the fix_clean_tweets_dataframe.py file + + Args: + ----- + unittest.TestCase this allows the new class to inherit + from the unittest module + """ + + def setUp(self) -> pd.DataFrame: + self.df = TweetDfExtractor(tweet_list[:5]) + # tweet_df = self.df.get_tweet_df() + + def test_find_status_count(self): + """ + Test case for the find status count method + """ + # error test + # self.assertEqual(self.df.find_statuses_count(), + # [204051, 3462, 6727, 45477, 277957]) + + # the edited error test + self.assertEqual(self.df.find_status_count(), + [40, 40, 40, 40, 40]) + + def test_find_full_text(self): + """ + Test case for hte find full text method + """ + # error test case + error_text = ['🚨Africa is "in the midst of a full-blown third wave" of coronavirus, the head of @WHOAFRO has warned\n\nCases have risen across the continent by more than 20% and deaths have also risen by 15% in the last week\n\n@jriggers reports ~ 🧵\nhttps://t.co/CRDhqPHFWM', 'Dr Moeti is head of WHO in Africa, and one of the best public health experts and leaders I know. Hers is a desperate request for vaccines to Africa. We plead with Germany and the UK to lift patent restrictions and urgently transfer technology to enable production in Africa. https://t.co/sOgIroihOc', "Thank you @research2note for creating this amazing campaign & turning social media #red4research today. @NHSRDFORUM is all about sharing the talent, passion & commitment of individuals coming together as a community for the benefit of all. You've done this. Well done 👋", 'Former Pfizer VP and Virologist, Dr. Michael Yeadon, is one of the most credentialed medical professionals speaking out about the dangers of the #Covid19 vaccines, breaks down his “list of lies” that keeps him up at night. https://t.co/LSE8CrKdqn', 'I think it’s important that we don’t sell COVAX short. It still has a lot going for it and is innovative in its design. But it needs more vaccines to share. We’re hoping our low cost @TexasChildrens recombinant protein COVID19 vaccine with @biological_e will help fill some gaps'] + + # the edited test case + text = ['RT @nikitheblogger: Irre: Annalena Baerbock sagt, es bricht ihr das Herz, dass man nicht bedingungslos schwere Waffen liefert.\nMir bricht e\u2026', + 'RT @sagt_mit: Merkel schaffte es in 1 Jahr 1 Million \"Fl\u00fcchtlinge\" durchzuf\u00fcttern, jedoch nicht nach 16 Jahren 1 Million Rentner aus der Ar\u2026', + 'RT @Kryptonoun: @WRi007 Pharma in Lebensmitteln, Trinkwasser, in der Luft oder in der Zahnpasta irgendwo muss ein Beruhigungsmittel bzw. Be\u2026', + 'RT @WRi007: Die #Deutschen sind ein braves Volk!. Mit #Spritpreisen von 2 Euro abgefunden. Mit #inflation abgefunden. Mit h\u00f6heren #Abgaben\u2026', + 'RT @RolandTichy: Baerbock verk\u00fcndet mal so nebenhin in Riga das Ende der Energieimporte aus Russland. Habeck rudert schon zur\u00fcck, Scholz sc\u2026'] + self.assertEqual(self.df.find_full_text(), text) + + def test_find_sentiments(self): + """ + Test case for the find sentiments method + """ + # error test case + error_sentiment_values = ([0.16666666666666666, 0.13333333333333333, + 0.3166666666666667, 0.08611111111111111, + 0.27999999999999997], + [0.18888888888888888, 0.45555555555555555, + 0.48333333333333334, 0.19722222222222224, + 0.6199999999999999]) + + # the edited error test + sentiment_values = ([0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0]) + self.assertEqual(self.df.find_sentiments(self.df.find_full_text()), + sentiment_values) + + def test_find_created_time(self): + """ + Test case for the find created time method + """ + # error test case + created_at = ['Fri Jun 18 17:55:49 +0000 2021', + 'Fri Jun 18 17:55:59 +0000 2021', + 'Fri Jun 18 17:56:07 +0000 2021', + 'Fri Jun 18 17:56:10 +0000 2021', + 'Fri Jun 18 17:56:20 +0000 2021'] + + # the edited test case + really_created_at = ['Fri Apr 22 22:20:18 +0000 2022', + 'Fri Apr 22 22:19:16 +0000 2022', + 'Fri Apr 22 22:17:28 +0000 2022', + 'Fri Apr 22 22:17:20 +0000 2022', + 'Fri Apr 22 22:13:15 +0000 2022'] + self.assertEqual(self.df.find_created_time(), really_created_at) + + def test_find_source(self): + """ + Test case for the find source method + """ + # error test case + error_source = ['Twitter for iPhone', 'Twitter Web App', 'Twitter for iPhone', 'Twitter Web App', 'Twitter for Android'] + + # the edited test case + source = ['Twitter for Android', 'Twitter for Android', 'Twitter for Android', 'Twitter for Android', 'Twitter for Android'] + self.assertEqual(self.df.find_source(), source) + + def test_find_screen_name(self): + """ + Test case for the find screen name method + """ + # error test case + error_name_test_Case = ['ketuesriche', 'Grid1949', + 'LeeTomlinson8', 'RIPNY08', 'pash22'] + # the edited error test + name = ['McMc74078966', 'McMc74078966', 'McMc74078966', + 'McMc74078966', 'McMc74078966'] + self.assertEqual(self.df.find_screen_name(), name) + + def test_find_followers_count(self): + """ + Test case for the find followers count method + """ + # error test + error_f_count = [551, 66, 1195, 2666, 28250] + + # the edited error test + f_count = [3, 3, 3, 3, 3] + self.assertEqual(self.df.find_followers_count(), f_count) + + def test_find_friends_count(self): + """ + Test case for the find friends count method + """ + # error test + error_friends_count = [351, 92, 1176, 2704, 30819] + + # edited error test + friends_count = [12, 12, 12, 12, 12] + self.assertEqual(self.df.find_friends_count(), friends_count) + + def test_find_is_sensitive(self): + self.assertEqual(self.df.is_sensitive(), + [None, None, None, None, None]) + + def test_find_hashtags(self): + """ + Test case for the find hashtags method + """ + hashtags = [[], [], [], [{'indices': [16, 26], 'text': 'Deutschen'}, + {'indices': [54, 67], 'text': 'Spritpreisen'}, + {'indices': [95, 105], 'text': 'inflation'}, + {'indices': [130, 138], 'text': 'Abgaben'}], + []] + self.assertEqual(self.df.find_hashtags(), hashtags) + + def test_find_mentions(self): + """ + Test case for the find mentions method + """ + mentions = [[{"screen_name": "nikitheblogger", + "name": "Neverforgetniki", "id": 809188392089092097, + "id_str": "809188392089092097", "indices": [3, 18]}], + [{"screen_name": "sagt_mit", + "name": "Sie sagt es mit Bildern", + "id": 1511959918777184256, + "id_str": "1511959918777184256", + "indices": [3, 12]}], + [{"screen_name": "Kryptonoun", + "name": "Kryptoguru", "id": 951051508321345536, + "id_str": "951051508321345536", "indices": [3, 14]}, + {"screen_name": "WRi007", "name": "Wolfgang Berger", + "id": 1214543251283357696, + "id_str": "1214543251283357696", "indices": [16, 23]}], + [{"screen_name": "WRi007", + "name": "Wolfgang Berger", "id": 1214543251283357696, + "id_str": "1214543251283357696", "indices": [3, 10]}], + [{"screen_name": "RolandTichy", "name": "Roland Tichy", + "id": 19962363, "id_str": "19962363", "indices": [3, 15]} + ]] + self.assertEqual(self.df.find_mentions(), mentions) + + def test_find_location(self): + """ + Test case for the find location method + """ + # error test + error_locations = ['Mass', 'Edinburgh, Scotland', None, None, + 'United Kingdom'] + + # edited error test + locations = ['', '', '', '', ''] + self.assertEqual(self.df.find_location(), locations) + + def test_find_lang(self): + """ + Test case for the find lang method + """ + langs = ['de', 'de', 'de', 'de', 'de'] + self.assertEqual(self.df.find_lang(), langs) + + def test_find_retweet_count(self): + """ + Test case for the find retweet count method + """ + # error test + error_retweets_test_Case = [612, 92, 1, 899, 20] + + # the edited error test + retweets = [355, 505, 4, 332, 386] + self.assertEqual(self.df.find_retweet_count(), retweets) + + def test_find_favorite_count(self): + """ + Test case for the find favorite count method + """ + # error test + # self.assertEqual(self.df.find_favorite_count(), + # [548, 195, 2, 1580, 72]) + + # the edited error test + self.assertEqual(self.df.find_favorite_count(), + [2356, 1985, 16, 1242, 1329]) + +if __name__ == "__main__": + unittest.main()