diff --git a/.dvc/.gitignore b/.dvc/.gitignore
new file mode 100644
index 0000000..528f30c
--- /dev/null
+++ b/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/.dvc/config b/.dvc/config
new file mode 100644
index 0000000..e69de29
diff --git a/.dvcignore b/.dvcignore
new file mode 100644
index 0000000..5197305
--- /dev/null
+++ b/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index eefca6a..712ff5d 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -1,29 +1,29 @@
-name: twitter-data-analysis
-
-on:
- push:
- branches: [main]
- pull_request:
- branches: [main]
-
-permissions:
- contents: read
-
-jobs:
- build:
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python 3.10
- uses: actions/setup-python@v3
- with:
- python-version: "3.10"
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install pytest
- if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- - name: Test with pytest
- run: |
- python -m pytest
+name: twitter-data-analysis
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+permissions:
+ contents: read
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python 3.10
+ uses: actions/setup-python@v3
+ with:
+ python-version: "3.10"
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install pytest
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+ - name: Test with pytest
+ run: |
+ python -m pytest
diff --git a/.gitignore b/.gitignore
index 54e6782..7081366 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
-__pycache__/
-data/
\ No newline at end of file
+__pycache__/
+data/
+.ipynb_checkpoints
diff --git a/LICENSE b/LICENSE
index a13471e..e3b94af 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,21 +1,21 @@
-MIT License
-
-Copyright (c) 2022 10 Academy
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+MIT License
+
+Copyright (c) 2022 10 Academy
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index ba4e845..b64ff6b 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,29 @@
-# Twitter-Data-Analysis
-
-### So here are the bare minimum requirement for completing this task
-
-1. Use this template to create a repository called Twitter-Data-Analysis in your github account. See ["Creating a repository from a template."](https://docs.github.com/en/articles/creating-a-repository-from-a-template) for more information.
-2. [Download](https://drive.google.com/drive/folders/19G8dmehf9vU0u6VTKGV-yWsQOn3IvPsd) and extract the necessary data and put it in the data directory. The data should not not be added to git tracking.
-3. Create a branch called “bugfix” to fix the bugs in the fix_clean_tweets_dataframe.py and fix_extract_dataframe.py
-4. In branch “bugfix” use the git mv command to rename fix_clean_tweets_dataframe.py to clean_tweets_dataframe.py and fix_extract_dataframe.py to extract_dataframe.py
-5. Fix the bugs on clean_tweets_dataframe.py and extract_dataframe.py
-6. Multiple times, push the code you are working on to git, and once the fix is complete, merge the fix_bug branch to main branch
-7. Create a new branch called “testing” for updating the unit tests in the test/ folder to be applicable to the code you fixed.
- a. Build your unit and integration tests to run on small data (< 1 MB) that you copied from what is provided - avoid pushing large data to github
- b. Think about the key elements (units can be functions, classes, or modules; multiple of them working together to accomplish a task requires integration testing) of the code base you are working on. Write the following
- - Unit tests: for individual key functions and classes
- - Integration tests: for the integration of multiple units working together
-8. After completing the unit and integration tests, merge the “testing” branch with the main branch
-9. In all cases when you merge, make sure you first do Pull Request, review, then accept the merge.
-10. Use github actions in your repository such that when you git push new code (or merge a branch) to the main branch, the unit test in tests/*.py runs automatically. All tests should pass.
-
-
-After Completing this Challenge, you would have explore
-
-- Unittesting
-- Modular Coding
-- Software Engineering Best Practices
-- Python Package Structure
-- Bug Fix (Debugging)
-
-Have Fun and Cheers
+# Twitter-Data-Analysis
+
+### So here are the bare minimum requirement for completing this task
+
+1. Use this template to create a repository called Twitter-Data-Analysis in your github account. See ["Creating a repository from a template."](https://docs.github.com/en/articles/creating-a-repository-from-a-template) for more information.
+2. [Download](https://drive.google.com/drive/folders/19G8dmehf9vU0u6VTKGV-yWsQOn3IvPsd) and extract the necessary data and put it in the data directory. The data should not not be added to git tracking.
+3. Create a branch called “bugfix” to fix the bugs in the fix_clean_tweets_dataframe.py and fix_extract_dataframe.py
+4. In branch “bugfix” use the git mv command to rename fix_clean_tweets_dataframe.py to clean_tweets_dataframe.py and fix_extract_dataframe.py to extract_dataframe.py
+5. Fix the bugs on clean_tweets_dataframe.py and extract_dataframe.py
+6. Multiple times, push the code you are working on to git, and once the fix is complete, merge the fix_bug branch to main branch
+7. Create a new branch called “testing” for updating the unit tests in the test/ folder to be applicable to the code you fixed.
+ a. Build your unit and integration tests to run on small data (< 1 MB) that you copied from what is provided - avoid pushing large data to github
+ b. Think about the key elements (units can be functions, classes, or modules; multiple of them working together to accomplish a task requires integration testing) of the code base you are working on. Write the following
+ - Unit tests: for individual key functions and classes
+ - Integration tests: for the integration of multiple units working together
+8. After completing the unit and integration tests, merge the “testing” branch with the main branch
+9. In all cases when you merge, make sure you first do Pull Request, review, then accept the merge.
+10. Use github actions in your repository such that when you git push new code (or merge a branch) to the main branch, the unit test in tests/*.py runs automatically. All tests should pass.
+
+
+After Completing this Challenge, you would have explore
+
+- Unittesting
+- Modular Coding
+- Software Engineering Best Practices
+- Python Package Structure
+- Bug Fix (Debugging)
+
+Have Fun and Cheers
diff --git a/clean_tweets_dataframe.py b/clean_tweets_dataframe.py
new file mode 100644
index 0000000..49f6602
--- /dev/null
+++ b/clean_tweets_dataframe.py
@@ -0,0 +1,143 @@
+import re
+import pandas as pd
+from defaults import *
+
+class Clean_Tweets:
+ """
+ The PEP8 Standard AMAZING!!!
+ """
+ def __init__(self, df:pd.DataFrame):
+ self.df = df
+ print('Automation in Action...!!!')
+
+ def drop_unwanted_column(self, df:pd.DataFrame)->pd.DataFrame:
+ """
+ remove rows that has column names. This error originated from
+ the data collection stage.
+ """
+ unwanted_rows = self.df[self.df['retweet_count'] == 'retweet_count' ].index
+ self.df.drop(unwanted_rows , inplace=True)
+ self.df = self.df[self.df['polarity'] != 'polarity']
+ return df
+
+ def drop_duplicate(self, df:pd.DataFrame)->pd.DataFrame:
+ """
+ drop duplicate rows
+ """
+ self.df.drop_duplicates(subset='original_text', inplace=True)
+ return df
+
+ def convert_to_datetime(self, df:pd.DataFrame)->pd.DataFrame:
+ """
+ convert column to datetime
+ """
+ self.df['created_at'] = pd.to_datetime(self.df['created_at'], errors='coerce')
+ return df
+
+ def convert_to_numbers(self, df:pd.DataFrame)->pd.DataFrame:
+ """
+ convert columns like polarity, subjectivity, retweet_count
+ favorite_count etc to numbers
+ """
+ self.df['id'] = pd.to_numeric(self.df['id'], errors='coerce')
+ self.df['subjectivity'] = pd.to_numeric(self.df['subjectivity'],
+ errors='coerce')
+ self.df['listed_count'] = pd.to_numeric(self.df['listed_count'],
+ errors='coerce')
+ self.df['retweet_count'] = pd.to_numeric(self.df['retweet_count'],
+ errors='coerce')
+ self.df['friends_count'] = pd.to_numeric(self.df['friends_count'],
+ errors='coerce')
+ self.df['favorite_count'] = pd.to_numeric(self.df['favorite_count'],
+ errors='coerce')
+ self.df['statuses_count'] = pd.to_numeric(self.df['statuses_count'],
+ errors='coerce')
+ self.df['followers_count'] = pd.to_numeric(self.df['followers_count'],
+ errors='coerce')
+ self.df['polarity'] = pd.to_numeric(self.df['polarity'],
+ errors='coerce')
+ return df
+
+ def remove_non_english_tweets(self, df:pd.DataFrame)->pd.DataFrame:
+ """
+ remove non english tweets from lang
+ """
+ self.df.query("lang == 'en'", inplace=True)
+ return df
+
+ def drop_nulls(self, df: pd.DataFrame) -> pd.DataFrame:
+ """
+ drop nulls
+ """
+ self.df = self.df.dropna(axis=0, how='any', inplace=False)
+ return df
+
+ def find_hashtags(self, df: pd.DataFrame) -> pd.DataFrame:
+ """
+ Method to find hashtags from tweets
+ This function will extract hashtags
+ """
+ self.df = re.findall('(#[A-Za-z]+[A-Za-z0-9-_]+)', df)
+ return df
+
+ def text_category(self, series: pd.Series) -> list:
+ """
+ function that return positive, negative or neutral based on polarity
+ """
+ polarities = []
+ for pol in series:
+ if pol >= 0.00000000001:
+ polarities.append("positive")
+ elif pol == 0.00000000000:
+ polarities.append("neutral")
+ elif pol <= -0.00000000001:
+ polarities.append("negative")
+ else:
+ polarities.append('UNK')
+ return polarities
+
+ def fill_missing(self, df: pd.DataFrame, column: str, value):
+ """
+ fill null values of a specific column with the provided value
+ """
+
+ df[column] = df[column].fillna(value)
+
+ return df
+
+ def replace_empty_string(self, df:pd.DataFrame, column: str, value: str):
+ """
+ replace empty strings in a specific column with the provided value
+ """
+
+ df[column] = df[column].apply(lambda x: value if x == "" else x)
+
+ return df
+
+ def remove_characters(self, df: pd.DataFrame, column: str):
+ """
+ removes non-alphanumeric characters with the exception of underscore hyphen and space
+ from the specified column
+ """
+
+ df[column] = df[column].apply(lambda text: re.sub("[^a-zA-Z0-9\s_-]", "", text))
+
+ return df
+
+ def extract_device_name(self, source: str):
+ """
+ returns device name from source text
+ """
+ res = re.split('<|>', source)[2].strip()
+ return
+
+if __name__ == "__main__":
+ """
+ read the twitter dataset and Pass the data to the Clean_Tweets
+ class
+ """
+ global_tweet_df = pd.read_json(global_data, lines=True)
+ global_cleaner = Clean_Tweets(global_tweet_df)
+
+ african_tweet_df = pd.read_json(african_data, lines=True)
+ african_cleaner = Clean_Tweets(african_tweet_df)
diff --git a/defaults.py b/defaults.py
new file mode 100644
index 0000000..c36af2f
--- /dev/null
+++ b/defaults.py
@@ -0,0 +1,16 @@
+"""
+A script to store all default paths and strings.
+"""
+
+# the global data set
+global_data = 'data/global_twitter_data.json'
+
+# the processed global data set
+processed_global_data = 'data/processed_global_tweet_data.json'
+
+
+# the african data set
+african_data = 'data/africa_twitter_data.json'
+
+# the processed african data set
+processed_african_data = 'data/processed_africa_tweet_data.json'
diff --git a/extract_dataframe.py b/extract_dataframe.py
new file mode 100644
index 0000000..add5256
--- /dev/null
+++ b/extract_dataframe.py
@@ -0,0 +1,277 @@
+import json
+import pandas as pd
+import numpy as np
+from textblob import TextBlob
+from defaults import *
+
+
+def read_json(json_file: str) -> list:
+ """
+ json file reader to open and read json files into a list
+ Args:
+ -----
+ json_file: str - path of a json file
+
+ Returns
+ -------
+ length of the json file and a list of json
+ """
+
+ tweets_data = []
+ for tweets in open(json_file, 'r'):
+ tweets_data.append(json.loads(tweets))
+ return len(tweets_data), tweets_data
+
+class TweetDfExtractor:
+ """
+ this function will parse tweets json into a pandas dataframe
+
+ Return
+ ------
+ dataframe
+ """
+ def __init__(self, tweets_list):
+ """
+ The initializer for the TweetDf Extractor class
+ """
+ self.tweets_list = tweets_list
+
+ def find_statuses_count(self)->list:
+ """
+ an example function
+ """
+ statuses_count = [x['user']['statuses_count']
+ for x in self.tweets_list]
+ return statuses_count
+
+ def find_full_text(self)->list:
+ """
+ a function to find and return full text of a twit from a dataframe
+ """
+ text = []
+ for x in self.tweets_list:
+ try:
+ text.append(x['full_text'])
+ except KeyError:
+ #text.append(x['text'])
+ text.append('NA')
+ return text
+
+ def find_sentiments(self, text)->list:
+ """
+ a function to find and return polarity and subjectivity of a twit
+ """
+ polarity = [TextBlob(x).polarity for x in text]
+ subjectivity = [TextBlob(x).subjectivity for x in text]
+ return (polarity, subjectivity)
+
+ def find_created_time(self)->list:
+ """
+ a function to find and return the date the twit was created at
+ """
+ created_at = [x['created_at'] for x in self.tweets_list]
+ return created_at
+
+ def find_source(self)->list:
+ """
+ a function to find and return the source of a tweet
+ """
+ source = [x['source'] for x in self.tweets_list]
+ return source
+
+ def find_screen_name(self)->list:
+ """
+ a function to find and return the screen name from where the
+ tweet originated
+ """
+ screen_name = [x['user']['screen_name'] for x in self.tweets_list]
+ return screen_name
+
+ def find_followers_count(self)->list:
+ """
+ function to find and return the follower count of a twitter
+ """
+ followers_count = [x['user']['followers_count'] for x in
+ self.tweets_list]
+ return followers_count
+
+ def find_friends_count(self)->list:
+ """
+ function to find and return the friends count of a twitter
+ """
+ friends_count = [x['user']['friends_count'] for x in self.tweets_list]
+ return friends_count
+
+ def is_sensitive(self)->list:
+ """
+ try:
+ is_sensitive = [x['possibly_sensitive'] for x in self.tweets_list]
+ except KeyError:
+ is_sensitive = None
+ return is_sensitive
+ """
+ # function to find and return the possible sensitivity of a tweet
+ is_sensitive = []
+ for tweet in self.tweets_list:
+ if 'possibly_sensitive' in tweet.keys():
+ is_sensitive.append(tweet['possibly_sensitive'])
+ else:
+ is_sensitive.append(None)
+ return is_sensitive
+
+ def find_favorite_count(self)->list:
+ """
+ function to find and return the favorite count of a tweet
+ """
+ favorite_count = []
+ for tweet in self.tweets_list:
+ if 'retweeted_status' in tweet.keys():
+ favorite_count.append(
+ tweet['retweeted_status']['favorite_count'])
+ else:
+ favorite_count.append(0)
+ return favorite_count
+
+ def find_retweet_count(self)->list:
+ """
+ function to find and return the retweet count of a tweet
+ """
+ retweet_count = []
+ for tweet in self.tweets_list:
+ if 'retweeted_status' in tweet.keys():
+ retweet_count.append(
+ tweet['retweeted_status']['retweet_count'])
+ else:
+ retweet_count.append(0)
+ return retweet_count
+
+ def find_hashtags(self)->list:
+ """
+ function to find and return the hashtags of a tweet
+ """
+ hashtags = [x['entities']['hashtags'] for x in self.tweets_list]
+ return hashtags
+
+ def find_mentions(self)->list:
+ """
+ function to find and return the mentions of a tweet
+ """
+ mentions = [x['entities']['user_mentions'] for x in self.tweets_list]
+ return mentions
+
+ def find_location(self)->list:
+ """
+ function to find and return the location of a tweet
+ """
+ location = [x.get('user', {}).get('location', None) for x in
+ self.tweets_list]
+ return location
+
+ def find_lang(self) -> list:
+ """
+ function to find and return the language of a tweet
+ """
+ lang = [x['lang'] for x in self.tweets_list]
+ return lang
+
+ # TODO : make this method
+ def find_authors(self) -> list:
+ """
+ function to find and return authors of tweets
+ """
+ authors = []
+ for x in range(22000):
+ authors.append(x)
+ return authors
+
+ def get_tweet_df(self, save: bool=False, save_as : str = 'processed_tweet_data', as_csv : bool = False) -> pd.DataFrame:
+ """
+ required columns to be generated
+ """
+ # added_column_Names = ['status_count', 'screen_name']
+ selected_columns = ['created_at', 'source', 'original_text','polarity',
+ 'subjectivity', 'lang', 'favorite_count', 'status_count',
+ 'retweet_count', 'screen_name', 'original_author',
+ 'followers_count','friends_count','possibly_sensitive',
+ 'hashtags', 'user_mentions', 'place']
+
+ created_at = self.find_created_time()
+ source = self.find_source()
+ text = self.find_full_text()
+ polarity, subjectivity = self.find_sentiments(text)
+ lang = self.find_lang()
+ fav_count = self.find_favorite_count()
+ status_count = self.find_statuses_count()
+ retweet_count = self.find_retweet_count()
+ screen_name = self.find_screen_name()
+ author = self.find_screen_name()
+ followers_count = self.find_followers_count()
+ friends_count = self.find_friends_count()
+ sensitivity = self.is_sensitive()
+ hashtags = self.find_hashtags()
+ mentions = self.find_mentions()
+ location = self.find_location()
+
+ selected_data = [created_at, source, text, polarity, subjectivity, lang, fav_count, status_count, retweet_count, screen_name, author, followers_count, friends_count, sensitivity, hashtags, mentions, location]
+
+ sel_data = {}
+ for i in range(0, len(selected_columns), 1):
+ sel_data[selected_columns[i]] = selected_data[i]
+
+ final_dataframe = pd.DataFrame(data = sel_data)
+
+ """print({len(status_count)}, {len(created_at)}, {len(source)},
+ {len(text)}, {len(polarity)}, {len(subjectivity)},
+ {len(fav_count)}, {len(retweet_count)}, {len(screen_name)},
+ {len(followers_count)}, {len(friends_count)},
+ {len(sensitivity)}, {len(hashtags)}, {len(mentions)},
+ {len(location)}, {len(lang)}, {len(author)})"""
+
+ """print(status_count, created_at, source,
+ text, polarity, subjectivity,
+ fav_count, retweet_count, screen_name,
+ followers_count, friends_count,
+ sensitivity, hashtags, mentions,
+ location, lang, author)"""
+
+ """print({type(status_count)}, {type(created_at)}, {type(source)},
+ {type(text)}, {type(polarity)}, {type(subjectivity)},
+ {type(fav_count)}, {type(retweet_count)}, {type(screen_name)},
+ {type(followers_count)}, {type(friends_count)},
+ {type(sensitivity)}, {type(hashtags)}, {type(mentions)},
+ {type(location)}, {type(lang)}, {type(author)})"""
+
+ if save:
+ if as_csv:
+ data_path = 'data/' + save_as + '.csv'
+ final_dataframe.to_csv(data_path, index=False)
+ print(f'File {save_as} successfully saved as {data_path}')
+ else:
+ data_path = 'data/' + save_as + '.json'
+ final_dataframe.to_json(data_path, indent=4)
+ print(f'File {save_as} successfully saved as {data_path}')
+ return final_dataframe
+
+
+if __name__ == "__main__":
+ # required column to be generated you should be creative and add more features
+ columns = ['created_at', 'source', 'original_text','clean_text', 'sentiment','polarity','subjectivity', 'lang', 'favorite_count', 'retweet_count',
+ 'original_author', 'screen_count', 'followers_count','friends_count','possibly_sensitive', 'hashtags', 'user_mentions', 'place', 'place_coord_boundaries']
+
+ # for the global data set
+ _, global_tweet_list = read_json(global_data)
+ # to make sure all the data is passe to he
+ print(f"Total number of data: {_}")
+ global_tweet = TweetDfExtractor(global_tweet_list)
+ global_tweet_df = global_tweet.get_tweet_df(save= True, save_as='processed_global_tweet_data')
+ print(global_tweet_df)
+
+ """# for the african data set
+ _, african_tweet_list = read_json(african_data)
+ # to make sure all the data is passe to he
+ print(f"Total number of data: {_}")
+ african_tweet = TweetDfExtractor(african_tweet_list)
+ african_tweet_df = african_tweet.get_tweet_df(save = True, save_as='processed_african_tweet_data')
+ print(african_tweet_df)"""
+
+ # TODO : use all defined functions to generate a dataframe with the specified columns above
diff --git a/fix_clean_tweets_dataframe.py b/fix_clean_tweets_dataframe.py
deleted file mode 100644
index 7b45a35..0000000
--- a/fix_clean_tweets_dataframe.py
+++ /dev/null
@@ -1,58 +0,0 @@
-class Clean_Tweets:
- """
- The PEP8 Standard AMAZING!!!
- """
- def __init__(self, df:pd.DataFrame):
- self.df = df
- print('Automation in Action...!!!')
-
- def drop_unwanted_column(self, df:pd.DataFrame)->pd.DataFrame:
- """
- remove rows that has column names. This error originated from
- the data collection stage.
- """
- unwanted_rows = df[df['retweet_count'] == 'retweet_count' ].index
- df.drop(unwanted_rows , inplace=True)
- df = df[df['polarity'] != 'polarity']
-
- return df
- def drop_duplicate(self, df:pd.DataFrame)->pd.DataFrame:
- """
- drop duplicate rows
- """
-
- ---
-
- return df
- def convert_to_datetime(self, df:pd.DataFrame)->pd.DataFrame:
- """
- convert column to datetime
- """
- ----
-
- ----
-
- df = df[df['created_at'] >= '2020-12-31' ]
-
- return df
-
- def convert_to_numbers(self, df:pd.DataFrame)->pd.DataFrame:
- """
- convert columns like polarity, subjectivity, retweet_count
- favorite_count etc to numbers
- """
- df['polarity'] = pd.----
-
- ----
- ----
-
- return df
-
- def remove_non_english_tweets(self, df:pd.DataFrame)->pd.DataFrame:
- """
- remove non english tweets from lang
- """
-
- df = ----
-
- return df
\ No newline at end of file
diff --git a/fix_extract_dataframe.py b/fix_extract_dataframe.py
deleted file mode 100644
index 3bd792d..0000000
--- a/fix_extract_dataframe.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import json
-import pandas as pd
-from textblob import TextBlob
-
-
-def read_json(json_file: str)->list:
- """
- json file reader to open and read json files into a list
- Args:
- -----
- json_file: str - path of a json file
-
- Returns
- -------
- length of the json file and a list of json
- """
-
- tweets_data = []
- for tweets in open(json_file,'r'):
- tweets_data.append(json.loads(tweets))
-
-
- return len(tweets_data), tweets_data
-
-class TweetDfExtractor:
- """
- this function will parse tweets json into a pandas dataframe
-
- Return
- ------
- dataframe
- """
- def __init__(self, tweets_list):
-
- self.tweets_list = tweets_list
-
- # an example function
- def find_statuses_count(self)->list:
- statuses_count
-
- def find_full_text(self)->list:
- text =
-
-
- def find_sentiments(self, text)->list:
-
- return polarity, self.subjectivity
-
- def find_created_time(self)->list:
-
- return created_at
-
- def find_source(self)->list:
- source =
-
- return source
-
- def find_screen_name(self)->list:
- screen_name =
-
- def find_followers_count(self)->list:
- followers_count =
-
- def find_friends_count(self)->list:
- friends_count =
-
- def is_sensitive(self)->list:
- try:
- is_sensitive = [x['possibly_sensitive'] for x in self.tweets_list]
- except KeyError:
- is_sensitive = None
-
- return is_sensitive
-
- def find_favourite_count(self)->list:
-
-
- def find_retweet_count(self)->list:
- retweet_count =
-
- def find_hashtags(self)->list:
- hashtags =
-
- def find_mentions(self)->list:
- mentions =
-
-
- def find_location(self)->list:
- try:
- location = self.tweets_list['user']['location']
- except TypeError:
- location = ''
-
- return location
-
-
-
-
- def get_tweet_df(self, save=False)->pd.DataFrame:
- """required column to be generated you should be creative and add more features"""
-
- columns = ['created_at', 'source', 'original_text','polarity','subjectivity', 'lang', 'favorite_count', 'retweet_count',
- 'original_author', 'followers_count','friends_count','possibly_sensitive', 'hashtags', 'user_mentions', 'place']
-
- created_at = self.find_created_time()
- source = self.find_source()
- text = self.find_full_text()
- polarity, subjectivity = self.find_sentiments(text)
- lang = self.find_lang()
- fav_count = self.find_favourite_count()
- retweet_count = self.find_retweet_count()
- screen_name = self.find_screen_name()
- follower_count = self.find_followers_count()
- friends_count = self.find_friends_count()
- sensitivity = self.is_sensitive()
- hashtags = self.find_hashtags()
- mentions = self.find_mentions()
- location = self.find_location()
- data = zip(created_at, source, text, polarity, subjectivity, lang, fav_count, retweet_count, screen_name, follower_count, friends_count, sensitivity, hashtags, mentions, location)
- df = pd.DataFrame(data=data, columns=columns)
-
- if save:
- df.to_csv('processed_tweet_data.csv', index=False)
- print('File Successfully Saved.!!!')
-
- return df
-
-
-if __name__ == "__main__":
- # required column to be generated you should be creative and add more features
- columns = ['created_at', 'source', 'original_text','clean_text', 'sentiment','polarity','subjectivity', 'lang', 'favorite_count', 'retweet_count',
- 'original_author', 'screen_count', 'followers_count','friends_count','possibly_sensitive', 'hashtags', 'user_mentions', 'place', 'place_coord_boundaries']
- _, tweet_list = read_json("../covid19.json")
- tweet = TweetDfExtractor(tweet_list)
- tweet_df = tweet.get_tweet_df()
-
- # use all defined functions to generate a dataframe with the specified columns above
\ No newline at end of file
diff --git a/notebooks/EDA.ipynb b/notebooks/EDA.ipynb
new file mode 100644
index 0000000..4435bdc
--- /dev/null
+++ b/notebooks/EDA.ipynb
@@ -0,0 +1,1623 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "# imports\n",
+ "import pandas as pd\n",
+ "import sys\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import custom libraries and scripts\n",
+ "# sys.path.append(os.path.abspath(os.path.join(\"../..\")))\n",
+ "sys.path.append(\".\")\n",
+ "sys.path.append(\"..\")\n",
+ "\n",
+ "from defaults import *\n",
+ "from extract_dataframe import read_json\n",
+ "from extract_dataframe import TweetDfExtractor\n",
+ "from clean_tweets_dataframe import Clean_Tweets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# read processed data set\n",
+ "tweets_df = pd.read_csv('../data/clean_data.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " created_at \n",
+ " source \n",
+ " original_text \n",
+ " polarity \n",
+ " subjectivity \n",
+ " lang \n",
+ " favorite_count \n",
+ " status_count \n",
+ " retweet_count \n",
+ " screen_name \n",
+ " original_author \n",
+ " followers_count \n",
+ " friends_count \n",
+ " possibly_sensitive \n",
+ " hashtags \n",
+ " user_mentions \n",
+ " place \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2022-08-07 22:31:20+00:00 \n",
+ " Twitter for Android \n",
+ " RT @i_ameztoy: Extra random image (I):\\n\\nLets... \n",
+ " -1.250000e-01 \n",
+ " 0.190625 \n",
+ " en \n",
+ " 4 \n",
+ " 8097 \n",
+ " 2 \n",
+ " i_ameztoy \n",
+ " i_ameztoy \n",
+ " 20497 \n",
+ " 2621 \n",
+ " unknown \n",
+ " [{'text': 'City', 'indices': [132, 137]}] \n",
+ " [{'screen_name': 'i_ameztoy', 'name': 'Iban Am... \n",
+ " unknown \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2022-08-07 22:31:16+00:00 \n",
+ " Twitter for Android \n",
+ " RT @IndoPac_Info: #China's media explains the ... \n",
+ " -1.000000e-01 \n",
+ " 0.100000 \n",
+ " en \n",
+ " 691 \n",
+ " 5831 \n",
+ " 201 \n",
+ " ZIisq \n",
+ " ZIisq \n",
+ " 65 \n",
+ " 272 \n",
+ " unknown \n",
+ " [{'text': 'China', 'indices': [18, 24]}, {'tex... \n",
+ " [{'screen_name': 'IndoPac_Info', 'name': 'Indo... \n",
+ " unknown \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2022-08-07 22:31:07+00:00 \n",
+ " Twitter for Android \n",
+ " China even cut off communication, they don't a... \n",
+ " 0.000000e+00 \n",
+ " 0.000000 \n",
+ " en \n",
+ " 0 \n",
+ " 1627 \n",
+ " 0 \n",
+ " Fin21Free \n",
+ " Fin21Free \n",
+ " 85 \n",
+ " 392 \n",
+ " unknown \n",
+ " [{'text': 'XiJinping', 'indices': [127, 137]}] \n",
+ " [{'screen_name': 'ZelenskyyUa', 'name': 'Волод... \n",
+ " Netherlands \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2022-08-07 22:31:06+00:00 \n",
+ " Twitter for Android \n",
+ " Putin to #XiJinping : I told you my friend, Ta... \n",
+ " 1.000000e-01 \n",
+ " 0.350000 \n",
+ " en \n",
+ " 0 \n",
+ " 1627 \n",
+ " 0 \n",
+ " Fin21Free \n",
+ " Fin21Free \n",
+ " 85 \n",
+ " 392 \n",
+ " unknown \n",
+ " [{'text': 'XiJinping', 'indices': [9, 19]}] \n",
+ " [] \n",
+ " Netherlands \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2022-08-07 22:31:04+00:00 \n",
+ " Twitter for iPhone \n",
+ " RT @ChinaUncensored: I’m sorry, I thought Taiw... \n",
+ " -6.938894e-18 \n",
+ " 0.556250 \n",
+ " en \n",
+ " 1521 \n",
+ " 18958 \n",
+ " 381 \n",
+ " VizziniDolores \n",
+ " VizziniDolores \n",
+ " 910 \n",
+ " 2608 \n",
+ " unknown \n",
+ " [] \n",
+ " [{'screen_name': 'ChinaUncensored', 'name': 'C... \n",
+ " Ayent, Schweiz \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2022-08-07 22:31:02+00:00 \n",
+ " Twitter for Android \n",
+ " RT @benedictrogers: We must not let this happe... \n",
+ " 2.000000e-01 \n",
+ " 0.500000 \n",
+ " en \n",
+ " 116 \n",
+ " 48483 \n",
+ " 36 \n",
+ " GraceCh15554845 \n",
+ " GraceCh15554845 \n",
+ " 207 \n",
+ " 54 \n",
+ " 0.0 \n",
+ " [{'text': 'Taiwan', 'indices': [84, 91]}] \n",
+ " [{'screen_name': 'benedictrogers', 'name': 'Be... \n",
+ " Melbourne, Victoria \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2022-08-07 22:30:59+00:00 \n",
+ " Twitter for Android \n",
+ " RT @TGTM_Official: What kind of country can co... \n",
+ " 1.583333e-01 \n",
+ " 0.800000 \n",
+ " en \n",
+ " 1106 \n",
+ " 4173 \n",
+ " 411 \n",
+ " Philipkuma1 \n",
+ " Philipkuma1 \n",
+ " 12 \n",
+ " 264 \n",
+ " unknown \n",
+ " [{'text': 'Taiwan', 'indices': [101, 108]}, {'... \n",
+ " [{'screen_name': 'TGTM_Official', 'name': 'The... \n",
+ " unknown \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2022-08-07 22:30:59+00:00 \n",
+ " Twitter for Android \n",
+ " RT @ChinaInfo777: #PinkFloyd singer Roger Wate... \n",
+ " 0.000000e+00 \n",
+ " 0.000000 \n",
+ " en \n",
+ " 10 \n",
+ " 24102 \n",
+ " 5 \n",
+ " nhohn2011 \n",
+ " nhohn2011 \n",
+ " 870 \n",
+ " 508 \n",
+ " unknown \n",
+ " [{'text': 'PinkFloyd', 'indices': [18, 28]}, {... \n",
+ " [{'screen_name': 'ChinaInfo777', 'name': 'Chin... \n",
+ " Florida, USA \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2022-08-07 22:30:50+00:00 \n",
+ " Twitter for Android \n",
+ " RT @AmbQinGang: China's SC&FM Wang Yi elab... \n",
+ " 0.000000e+00 \n",
+ " 0.000000 \n",
+ " en \n",
+ " 1221 \n",
+ " 630 \n",
+ " 239 \n",
+ " ClaudioColomaRI \n",
+ " ClaudioColomaRI \n",
+ " 127 \n",
+ " 263 \n",
+ " unknown \n",
+ " [{'text': 'Taiwan', 'indices': [80, 87]}] \n",
+ " [{'screen_name': 'AmbQinGang', 'name': 'Qin Ga... \n",
+ " El mundo periférico \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2022-08-07 22:30:45+00:00 \n",
+ " Twitter Web App \n",
+ " RT @CGMeifangZhang: Chinese ambassador to the ... \n",
+ " 2.000000e-01 \n",
+ " 0.375000 \n",
+ " en \n",
+ " 49 \n",
+ " 107188 \n",
+ " 25 \n",
+ " jmarzola1 \n",
+ " jmarzola1 \n",
+ " 213 \n",
+ " 877 \n",
+ " unknown \n",
+ " [{'text': 'USA', 'indices': [66, 70]}, {'text'... \n",
+ " [{'screen_name': 'CGMeifangZhang', 'name': 'Zh... \n",
+ " unknown \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " created_at source \\\n",
+ "0 2022-08-07 22:31:20+00:00 Twitter for Android \n",
+ "1 2022-08-07 22:31:16+00:00 Twitter for Android \n",
+ "2 2022-08-07 22:31:07+00:00 Twitter for Android \n",
+ "3 2022-08-07 22:31:06+00:00 Twitter for Android \n",
+ "4 2022-08-07 22:31:04+00:00 Twitter for iPhone \n",
+ "5 2022-08-07 22:31:02+00:00 Twitter for Android \n",
+ "6 2022-08-07 22:30:59+00:00 Twitter for Android \n",
+ "7 2022-08-07 22:30:59+00:00 Twitter for Android \n",
+ "8 2022-08-07 22:30:50+00:00 Twitter for Android \n",
+ "9 2022-08-07 22:30:45+00:00 Twitter Web App \n",
+ "\n",
+ " original_text polarity \\\n",
+ "0 RT @i_ameztoy: Extra random image (I):\\n\\nLets... -1.250000e-01 \n",
+ "1 RT @IndoPac_Info: #China's media explains the ... -1.000000e-01 \n",
+ "2 China even cut off communication, they don't a... 0.000000e+00 \n",
+ "3 Putin to #XiJinping : I told you my friend, Ta... 1.000000e-01 \n",
+ "4 RT @ChinaUncensored: I’m sorry, I thought Taiw... -6.938894e-18 \n",
+ "5 RT @benedictrogers: We must not let this happe... 2.000000e-01 \n",
+ "6 RT @TGTM_Official: What kind of country can co... 1.583333e-01 \n",
+ "7 RT @ChinaInfo777: #PinkFloyd singer Roger Wate... 0.000000e+00 \n",
+ "8 RT @AmbQinGang: China's SC&FM Wang Yi elab... 0.000000e+00 \n",
+ "9 RT @CGMeifangZhang: Chinese ambassador to the ... 2.000000e-01 \n",
+ "\n",
+ " subjectivity lang favorite_count status_count retweet_count \\\n",
+ "0 0.190625 en 4 8097 2 \n",
+ "1 0.100000 en 691 5831 201 \n",
+ "2 0.000000 en 0 1627 0 \n",
+ "3 0.350000 en 0 1627 0 \n",
+ "4 0.556250 en 1521 18958 381 \n",
+ "5 0.500000 en 116 48483 36 \n",
+ "6 0.800000 en 1106 4173 411 \n",
+ "7 0.000000 en 10 24102 5 \n",
+ "8 0.000000 en 1221 630 239 \n",
+ "9 0.375000 en 49 107188 25 \n",
+ "\n",
+ " screen_name original_author followers_count friends_count \\\n",
+ "0 i_ameztoy i_ameztoy 20497 2621 \n",
+ "1 ZIisq ZIisq 65 272 \n",
+ "2 Fin21Free Fin21Free 85 392 \n",
+ "3 Fin21Free Fin21Free 85 392 \n",
+ "4 VizziniDolores VizziniDolores 910 2608 \n",
+ "5 GraceCh15554845 GraceCh15554845 207 54 \n",
+ "6 Philipkuma1 Philipkuma1 12 264 \n",
+ "7 nhohn2011 nhohn2011 870 508 \n",
+ "8 ClaudioColomaRI ClaudioColomaRI 127 263 \n",
+ "9 jmarzola1 jmarzola1 213 877 \n",
+ "\n",
+ " possibly_sensitive hashtags \\\n",
+ "0 unknown [{'text': 'City', 'indices': [132, 137]}] \n",
+ "1 unknown [{'text': 'China', 'indices': [18, 24]}, {'tex... \n",
+ "2 unknown [{'text': 'XiJinping', 'indices': [127, 137]}] \n",
+ "3 unknown [{'text': 'XiJinping', 'indices': [9, 19]}] \n",
+ "4 unknown [] \n",
+ "5 0.0 [{'text': 'Taiwan', 'indices': [84, 91]}] \n",
+ "6 unknown [{'text': 'Taiwan', 'indices': [101, 108]}, {'... \n",
+ "7 unknown [{'text': 'PinkFloyd', 'indices': [18, 28]}, {... \n",
+ "8 unknown [{'text': 'Taiwan', 'indices': [80, 87]}] \n",
+ "9 unknown [{'text': 'USA', 'indices': [66, 70]}, {'text'... \n",
+ "\n",
+ " user_mentions place \n",
+ "0 [{'screen_name': 'i_ameztoy', 'name': 'Iban Am... unknown \n",
+ "1 [{'screen_name': 'IndoPac_Info', 'name': 'Indo... unknown \n",
+ "2 [{'screen_name': 'ZelenskyyUa', 'name': 'Волод... Netherlands \n",
+ "3 [] Netherlands \n",
+ "4 [{'screen_name': 'ChinaUncensored', 'name': 'C... Ayent, Schweiz \n",
+ "5 [{'screen_name': 'benedictrogers', 'name': 'Be... Melbourne, Victoria \n",
+ "6 [{'screen_name': 'TGTM_Official', 'name': 'The... unknown \n",
+ "7 [{'screen_name': 'ChinaInfo777', 'name': 'Chin... Florida, USA \n",
+ "8 [{'screen_name': 'AmbQinGang', 'name': 'Qin Ga... El mundo periférico \n",
+ "9 [{'screen_name': 'CGMeifangZhang', 'name': 'Zh... unknown "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Automation in Action...!!!\n"
+ ]
+ }
+ ],
+ "source": [
+ "cleaner = Clean_Tweets(tweets_df.copy())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Making explorations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(7440, 17)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# shape of dataframe\n",
+ "tweets_df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 7440 entries, 0 to 7439\n",
+ "Data columns (total 17 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 created_at 7440 non-null object \n",
+ " 1 source 7440 non-null object \n",
+ " 2 original_text 7440 non-null object \n",
+ " 3 polarity 7440 non-null float64\n",
+ " 4 subjectivity 7440 non-null float64\n",
+ " 5 lang 7440 non-null object \n",
+ " 6 favorite_count 7440 non-null int64 \n",
+ " 7 status_count 7440 non-null int64 \n",
+ " 8 retweet_count 7440 non-null int64 \n",
+ " 9 screen_name 7440 non-null object \n",
+ " 10 original_author 7440 non-null object \n",
+ " 11 followers_count 7440 non-null int64 \n",
+ " 12 friends_count 7440 non-null int64 \n",
+ " 13 possibly_sensitive 7440 non-null object \n",
+ " 14 hashtags 7440 non-null object \n",
+ " 15 user_mentions 7440 non-null object \n",
+ " 16 place 7440 non-null object \n",
+ "dtypes: float64(2), int64(5), object(10)\n",
+ "memory usage: 988.2+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "tweets_df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "created_at 0\n",
+ "source 0\n",
+ "original_text 0\n",
+ "polarity 0\n",
+ "subjectivity 0\n",
+ "lang 0\n",
+ "favorite_count 0\n",
+ "status_count 0\n",
+ "retweet_count 0\n",
+ "screen_name 0\n",
+ "original_author 0\n",
+ "followers_count 0\n",
+ "friends_count 0\n",
+ "possibly_sensitive 0\n",
+ "hashtags 0\n",
+ "user_mentions 0\n",
+ "place 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "No missing values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " polarity \n",
+ " subjectivity \n",
+ " favorite_count \n",
+ " status_count \n",
+ " retweet_count \n",
+ " followers_count \n",
+ " friends_count \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " count \n",
+ " 7440.000000 \n",
+ " 7440.000000 \n",
+ " 7440.000000 \n",
+ " 7.440000e+03 \n",
+ " 7440.000000 \n",
+ " 7.440000e+03 \n",
+ " 7440.000000 \n",
+ " \n",
+ " \n",
+ " mean \n",
+ " 0.056783 \n",
+ " 0.295638 \n",
+ " 203.351210 \n",
+ " 4.900565e+04 \n",
+ " 38.713172 \n",
+ " 4.107761e+04 \n",
+ " 1715.558871 \n",
+ " \n",
+ " \n",
+ " std \n",
+ " 0.230156 \n",
+ " 0.287805 \n",
+ " 1655.690148 \n",
+ " 1.432954e+05 \n",
+ " 326.757025 \n",
+ " 4.910108e+05 \n",
+ " 5305.897528 \n",
+ " \n",
+ " \n",
+ " min \n",
+ " -1.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 1.000000e+00 \n",
+ " 0.000000 \n",
+ " 0.000000e+00 \n",
+ " 0.000000 \n",
+ " \n",
+ " \n",
+ " 25% \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 1.549250e+03 \n",
+ " 0.000000 \n",
+ " 7.275000e+01 \n",
+ " 106.000000 \n",
+ " \n",
+ " \n",
+ " 50% \n",
+ " 0.000000 \n",
+ " 0.250000 \n",
+ " 0.000000 \n",
+ " 7.904000e+03 \n",
+ " 0.000000 \n",
+ " 3.670000e+02 \n",
+ " 440.000000 \n",
+ " \n",
+ " \n",
+ " 75% \n",
+ " 0.136364 \n",
+ " 0.500000 \n",
+ " 4.000000 \n",
+ " 3.510900e+04 \n",
+ " 2.000000 \n",
+ " 1.833000e+03 \n",
+ " 1505.000000 \n",
+ " \n",
+ " \n",
+ " max \n",
+ " 1.000000 \n",
+ " 1.000000 \n",
+ " 65170.000000 \n",
+ " 4.108317e+06 \n",
+ " 17409.000000 \n",
+ " 1.449852e+07 \n",
+ " 208360.000000 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " polarity subjectivity favorite_count status_count retweet_count \\\n",
+ "count 7440.000000 7440.000000 7440.000000 7.440000e+03 7440.000000 \n",
+ "mean 0.056783 0.295638 203.351210 4.900565e+04 38.713172 \n",
+ "std 0.230156 0.287805 1655.690148 1.432954e+05 326.757025 \n",
+ "min -1.000000 0.000000 0.000000 1.000000e+00 0.000000 \n",
+ "25% 0.000000 0.000000 0.000000 1.549250e+03 0.000000 \n",
+ "50% 0.000000 0.250000 0.000000 7.904000e+03 0.000000 \n",
+ "75% 0.136364 0.500000 4.000000 3.510900e+04 2.000000 \n",
+ "max 1.000000 1.000000 65170.000000 4.108317e+06 17409.000000 \n",
+ "\n",
+ " followers_count friends_count \n",
+ "count 7.440000e+03 7440.000000 \n",
+ "mean 4.107761e+04 1715.558871 \n",
+ "std 4.910108e+05 5305.897528 \n",
+ "min 0.000000e+00 0.000000 \n",
+ "25% 7.275000e+01 106.000000 \n",
+ "50% 3.670000e+02 440.000000 \n",
+ "75% 1.833000e+03 1505.000000 \n",
+ "max 1.449852e+07 208360.000000 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# stats about numerical columns\n",
+ "tweets_df.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Univariate"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 i_ameztoy\n",
+ "1 ZIisq\n",
+ "2 Fin21Free\n",
+ "3 Fin21Free\n",
+ "4 VizziniDolores\n",
+ " ... \n",
+ "7435 PelosiLibArmy\n",
+ "7436 SonnyMullins13\n",
+ "7437 TECO_Toronto\n",
+ "7438 samserjio93\n",
+ "7439 ZeitounRimal\n",
+ "Name: original_author, Length: 7440, dtype: object"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df.original_author"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TrumpThuan 116\n",
+ "AarianNewsX 57\n",
+ "CGMeifangZhang 43\n",
+ "SoizaDavid 42\n",
+ "doos94619918 36\n",
+ " ... \n",
+ "AoxiPRNew 1\n",
+ "Eloy_Sauvan 1\n",
+ "carnivorecabbie 1\n",
+ "FarmSailing 1\n",
+ "ZeitounRimal 1\n",
+ "Name: original_author, Length: 4624, dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df.original_author.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAFhCAYAAACf9rbcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4oklEQVR4nO3dd5ikVZn+8e/NjOQMIypIUDFgAHFAEFGCAUUREUFARAzsGlHXhKJgWDMqhlVQQFwBBURBwUCSoAjMwEhGEPEHCIIrWcn3749zqqam6e5pZrpOtVP357r66qq3qvo801Ndz/ue8BzZJiIiAmCxQQcQERFTR5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpxL8tSatImlO/bpJ0Q8/9xRfg5z1V0jmS7pX0/hGPbSPpSklXS/rwKK/9Zm33Mkn/6oljx4X5N47Szkcm8+dFjKSsU4hFgaT9gbtsf2khfsajgbWA7YFbOz9L0jTgj8CLgeuB84FdbF82ys9YG/i57WcsaBzzifEu28s+wtdMs/1gP+KJRU+uFGKRImlrSRdKuljSoZKWqMevlfSFevw8SU8a+VrbN9s+H7h/xEMbA1fbvsb2fcAPgVdNIJYTJT2r3r5Q0sfr7U9Kemu9/QFJ50u6SNInel77+hrnHEkHSZom6XPAUvXYEWM9rx6/S9IBkv4AbCrpc/Uq5iJJC5w4Y9GXpBCLkiWB7wE7234mMB14W8/jt9fj3wC++gh+7urAdT33r6/H5ucsYHNJKwAPAJvV45sDZ0p6CbAuJelsADxH0gskPQ3YGdjM9gbAg8Butj8M/Mv2BrZ3G+t5tY1lgHNtrw9cDrwaeLrtZwGffgT/9hgySQqxKJkG/Nn2H+v9w4EX9Dx+VM/3TRvEc1ZtfzPgRGBZSUsD69i+EnhJ/boQuAB4KiVJbA08Bzhf0px6/wmj/Pzxnvcg8ON6+3bgHuAQSTsA/5zUf2UsUqYPOoCIhjzG7fm5AXh8z/016rH5OR+YCVwDnAysCrwVmF0fF/BZ2wf1vkjSu4DDbe8zn5+vcZ53T2ccwfYDkjamJI0dgXcCW00g/hhCuVKIRcmDwNo94wW7A2f0PL5zz/dzHsHPPR9YV9I6dVbT64AT5veiOv5wHfDa2t5ZwPuBM+tTfgW8SdKyAJJWr4PdpwI71ttIWlnSWvU190t6VL093vO66s9fwfZJwHuB9R/Bvz2GTK4UYlFyD7AncIyk6ZQP82/3PL6SpIuAe4FdRr5Y0mOAWcDywEOS3gOsZ/sOSe+kfIhPAw61fekEYzoL2Nr2vySdRbnKOAvA9q/ruMA5kgDuAl5v+zJJ+wK/lrQYZeD7HcBfgIOBiyRdUMcVxnper+WA4yUtSbm6eN8EY48hlCmpMRQkXQvMtP33QccSMZWl+ygiIrpypRAREV25UoiIiK4khYiI6Orb7CNJhwKvAG7u1IGR9EXglcB9wJ+APW3fVh/bB3gzZVrhu23/an5trLrqql577bX7En9ExKJq9uzZf7c9Y7TH+jamIOkFlCl23+9JCi8BTquLaT4PYPtDktajrDLdGHgccArw5PkV8Zo5c6ZnzZrVl/gjIhZVkmbbnjnaY33rPrJ9JvCPEcd+bfuBevf3lDnbUIqL/dD2vbb/DFxNSRAREdHQIMcU3gT8ot5e0IJjERExiQaSFCR9lFI18ogFeO1ekmZJmnXLLbdMfnAREUOseVKQ9EbKAPRunjugMeGCY7YPtj3T9swZM0YdJ4mIiAXUNClI2gb4ILCd7d7yvScAr5O0hKR1KOWDz2sZW0RE9HdK6lHAFsCqkq4H9gP2AZYATq4FwH5v+z9tXyrpaOAySrfSO7J9YEREe//WZS4yJTUi4pEbyJTUiIj495OkEBERXYv+JjtHauFev+u/b/daRMQjlSuFiIjoSlKIiIiuJIWIiOhKUoiIiK4khYiI6EpSiIiIriSFiIjoSlKIiIiuJIWIiOhKUoiIiK4khYiI6EpSiIiIriSFiIjoSlKIiIiuJIWIiOhKUoiIiK4khYiI6EpSiIiIriSFiIjoSlKIiIiuJIWIiOhKUoiIiK4khYiI6EpSiIiIrr4lBUmHSrpZ0iU9x1aWdLKkq+r3lepxSfqapKslXSRpw37FFRERY+vnlcL3gG1GHPswcKrtdYFT632AlwHr1q+9gG/1Ma6IiBhD35KC7TOBf4w4/Crg8Hr7cGD7nuPfd/F7YEVJj+1XbBERMbrWYwqr2b6x3r4JWK3eXh24rud519djDyNpL0mzJM265ZZb+hdpRMQQGthAs20DXoDXHWx7pu2ZM2bM6ENkERHDq3VS+FunW6h+v7kevwF4fM/z1qjHIiKiodZJ4QRgj3p7D+D4nuNvqLOQNgFu7+lmioiIRqb36wdLOgrYAlhV0vXAfsDngKMlvRn4C7BTffpJwMuBq4F/Anv2K66IiBhb35KC7V3GeGjrUZ5r4B39iiUiIiYmK5ojIqIrSSEiIrqSFCIioitJISIiupIUIiKiK0khIiK6khQiIqIrSSEiIrqSFCIioitJISIiupIUIiKiK0khIiK6khQiIqIrSSEiIrqSFCIioitJISIiupIUIiKiK0khIiK6khQiIqIrSSEiIrqSFCIioitJISIiuuabFCS9VtJy9fa+ko6TtGH/Q4uIiNYmcqXwMdt3Sno+8CLgEOBb/Q0rIiIGYSJJ4cH6fVvgYNsnAov3L6SIiBiUiSSFGyQdBOwMnCRpiQm+LiIi/s1M5MN9J+BXwEtt3wasDHygn0FFRMRgTCQpHGT7ONtXAdi+Edh9YRqV9F5Jl0q6RNJRkpaUtI6kcyVdLelHktJFFRHR2ESSwtN770iaBjxnQRuUtDrwbmCm7WcA04DXAZ8HvmL7ScCtwJsXtI2IiFgwYyYFSftIuhN4lqQ7JN1Z798MHL+Q7U4HlpI0HVgauBHYCji2Pn44sP1CthEREY/QmEnB9mdtLwd80fbytperX6vY3mdBG7R9A/Al4P9RksHtwGzgNtsP1KddD6w+2usl7SVplqRZt9xyy4KGERERo5hI99FHJb1e0scAJD1e0sYL2qCklYBXAesAjwOWAbaZ6OttH2x7pu2ZM2bMWNAwIiJiFBNJCt8ENgV2rffvqscW1IuAP9u+xfb9wHHAZsCKtTsJYA3ghoVoIyIiFsBEksJzbb8DuAfA9q0s3OK1/wdsImlpSQK2Bi4DTgd2rM/Zg4Uft4iIiEdoIknh/jrjyACSZgAPLWiDts+lDChfAFxcYzgY+BDwPklXA6tQymlERERD0+f/FL4G/ARYTdJ/U87m912YRm3vB+w34vA1wAKPVURExMKbb1KwfYSk2ZRuHgHb276875FFRERzE61htCrwT9vfAP4uaZ0+xhQREQMykf0U9qP093fWJjwK+EE/g4qIiMGYyJXCq4HtgLsBbP8VWK6fQUVExGBMJCncZ9vMnX20TH9DioiIQZlIUji67qewoqS3AqcA3+lvWBERMQgTmX30JUkvBu4AngJ83PbJfY8sIiKam29SkPRm4Ezb2VgnImIRN5HFa2sCB0lam1LN9EzgLNtz+hhXREQMwHzHFGzvZ3srymY7Z1G24pzd78AiIqK9iXQf7UupYroscCHwfkpyiIiIRcxEuo92AB4ATgTOAM6xfW9fo4qIiIGYSPfRhpQ9EM4DXgxcLOnsfgcWERHtTaT76BnA5sALgZnAdaT7KCJikTSR7qPPUWYcfQ04v+6WFhERi6CJrGg+xfYXbP+ukxAk7d3nuCIiYgAmkhTeMMqxN05yHBERMQWM2X0kaRdgV2AdSSf0PLQc8I9+BxYREe2NN6bwO+BGygY7B/QcvxO4qJ9BRUTEYIyZFGz/BfgLsGm7cCIiYpAmuh1nREQMgSSFiIjoGjMpSDq1fv98u3AiImKQxhtofqyk5wHbSfohoN4HbV/Q18giIqK58ZLCx4GPAWsAXx7xmIGt+hVUREQMxnizj44FjpX0MdufahhTREQMyET2aP6UpO2AF9RDv7H98/6GFRERgzDf2UeSPgvsDVxWv/aW9JmFaVTSipKOlXSFpMslbSppZUknS7qqfl9pYdqIiIhHbiJTUrcFXmz7UNuHAtsAr1jIdg8Efmn7qcD6wOXAh4FTba8LnFrvR0REQxNdp7Biz+0VFqZBSStQuqIOAbB9n+3bgFcBh9enHQ5svzDtRETEIzeR/RQ+C1wo6XTKtNQXsHBn8esAtwCHSVofmE3pnlrN9o31OTcBq432Ykl7AXsBrLnmmgsRRkREjDSR7TiPAjYBjgN+DGxq+0cL0eZ0YEPgW7afDdzNiCRj25Rpr6PFc7DtmbZnzpgxYyHCiIiIkSZypUA9gz9hvk+cmOuB622fW+8fS0kKf5P0WNs3SnoscPMktRcRERPUvPaR7ZuA6yQ9pR7amjKr6QRgj3psD+D41rFFRAy7CV0p9MG7gCMkLQ5cA+xJSVBHS3ozpWT3TgOKLSJiaI2bFCRNAy6tU0cnje05wMxRHtp6MtuZMo7U/J8zP7uOOsQSETGpxu0+sv0gcKWkTPOJiBgCE+k+Wgm4VNJ5lJlCANjerm9RRUTEQEwkKXys71FERMSUMJGCeGdIWgtY1/YpkpYGpvU/tIiIaG0iBfHeSllLcFA9tDrw0z7GFBERAzKRdQrvADYD7gCwfRXw6H4GFRERgzGRpHCv7fs6dyRNZ4wSFBER8e9tIknhDEkfAZaS9GLgGOBn/Q0rIiIGYSJJ4cOUqqYXA/8BnATs28+gIiJiMCYy++ghSYcD51K6ja6sVUwjImIRM9+kIGlb4NvAnyj7Kawj6T9s/6LfwUVERFsTWbx2ALCl7asBJD0ROBFIUoiIWMRMZEzhzk5CqK4B7uxTPBERMUBjXilI2qHenCXpJOBoypjCa4HzG8QWERGNjdd99Mqe238DXlhv3wIs1beIIiJiYMZMCrb3bBlIREQM3kRmH61D2Slt7d7np3R2RMSiZyKzj34KHEJZxfxQX6OJiIiBmkhSuMf21/oeSUREDNxEksKBkvYDfg3c2zlo+4K+RRUREQMxkaTwTGB3YCvmdh+53o+IiEXIRJLCa4En9JbPjoiIRdNEVjRfAqzY5zgiImIKmMiVworAFZLOZ94xhUxJjYhYxEwkKezX9yiijSO1cK/fNRXTIxZ1E9lP4YwWgURExOBNZEXznczdk3lx4FHA3baXX5iGJU0DZgE32H5FXTn9Q2AVYDawewa3F0ELe7UCuWKJ6KP5DjTbXs728jUJLAW8BvifSWh7b+DynvufB75i+0nArcCbJ6GNiIh4BCYy+6jLxU+Bly5Mo5LWALYFvlvvi7Lu4dj6lMOB7RemjYiIeOQm0n20Q8/dxYCZwD0L2e5XgQ8Cy9X7qwC32X6g3r8eWH0h24iIiEdoIrOPevdVeAC4FnjVgjYo6RXAzbZnS9piAV6/F7AXwJprrrmgYURExCgmMvtosvdV2AzYTtLLgSWB5YEDgRUlTa9XC2sAN4wRz8HAwQAzZ87MiGNExCQabzvOj4/zOtv+1II0aHsfYJ/axhbA+23vJukYYEfKDKQ9gOMX5OdHRMSCG2+g+e5RvqDMCvpQH2L5EPA+SVdTxhgO6UMbERExjvG24zygc1vScpQppHtSzuQPGOt1j4Tt3wC/qbevATaejJ8bERELZtwxBUkrA+8DdqNME93Q9q0tAouIiPbGG1P4IrADZVD3mbbvahZVREQMxHhXCv9FqYq6L/DRsr4MAFEGmheqzEXEwKTURsSYxhtTeESrnSMi4t9fPvgjIqIrSSEiIrqSFCIiomsitY8ioh8y4B1TUK4UIiKiK0khIiK6khQiIqIrSSEiIrqSFCIioitJISIiujIlNWKYZVpsjJArhYiI6MqVQkQMXq5YpoxcKURERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHRlSmpEBEydabEDjiNXChER0ZWkEBERXUkKERHR1TwpSHq8pNMlXSbpUkl71+MrSzpZ0lX1+0qtY4uIGHaDuFJ4APgv2+sBmwDvkLQe8GHgVNvrAqfW+xER0VDzpGD7RtsX1Nt3ApcDqwOvAg6vTzsc2L51bBERw26gYwqS1gaeDZwLrGb7xvrQTcBqY7xmL0mzJM265ZZb2gQaETEkBpYUJC0L/Bh4j+07eh+zbWDUiba2D7Y90/bMGTNmNIg0ImJ4DCQpSHoUJSEcYfu4evhvkh5bH38scPMgYouIGGaDmH0k4BDgcttf7nnoBGCPensP4PjWsUVEDLtBlLnYDNgduFjSnHrsI8DngKMlvRn4C7DTAGKLiBhqzZOC7bOBsYp7bN0yloiImFdWNEdERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdCUpRERE15RLCpK2kXSlpKslfXjQ8UREDJMplRQkTQO+CbwMWA/YRdJ6g40qImJ4TKmkAGwMXG37Gtv3AT8EXjXgmCIihoZsDzqGLkk7AtvYfku9vzvwXNvv7HnOXsBe9e5TgCsXstlVgb8v5M9YWFMhBpgacUyFGGBqxDEVYoCpEcdUiAGmRhyTEcNatmeM9sD0hfzBzdk+GDh4sn6epFm2Z07Wz/t3jWGqxDEVYpgqcUyFGKZKHFMhhqkSR79jmGrdRzcAj++5v0Y9FhERDUy1pHA+sK6kdSQtDrwOOGHAMUVEDI0p1X1k+wFJ7wR+BUwDDrV9aZ+bnbSuqIUwFWKAqRHHVIgBpkYcUyEGmBpxTIUYYGrE0dcYptRAc0REDNZU6z6KiIgBSlKIiIiuJIWIiOgayqQgaXVJz5P0gs7XoGOKmCokrTPKsY0GEcugSdp7IscWJUM30Czp88DOwGXAg/WwbW/XoO2vAx+xfeeI408FvmH7Rf2OYSqR9L7xHrf95QYxrAy8E/grcAjwEWBT4HLgM7Zv7XcMNY5lgQ8Cr6Gsz7kP+BPwbdvfaxFDTywXAK+0fUO9/0LK+/OZjdrfcLzHbV/QIo4aywW2Nxxx7ELbz24VQ2tTakpqI9sDT7F97wDavgmYI+ljto+UtDSwP/BqygdCU5LuBEaeFdwOzAL+y/Y1fQ5hufr9KcBGzF2T8krgvD633fED4GLgOcDr6+3PAy8Gvke72ltHAD8BXgrsBCxDqf21r6Qn2/5IozgA/gP4qaRXAhsCnwVe3rD9A+r3JYGZwB8AAc+ivDc37XcAknYBdgXWkdS7Vmo54B/9bn+UeDYBvg48DVicMmX/btvLT3pjtofqC/gFsOwA238CcCJwJnA18Blg6QHF8inKB8BywPKUmlKdK6nfNIzjTGC5nvvLAWc2antO/S7ghtEeaxTHH0bcP79+Xwy4YgDvjU2BiyjJeUbr9msMxwHP7Ln/DODYRm2vBWwBnAO8sOdrQ2D6AH4Xs4AnARdSEsKewGf70dYwXin8k3K2firQvVqw/e5G7T9Uv0+n/OdebvufjdoeaTvb6/fcP1jSHNsfktTyzHQ1SndJx331WAuLSVqJkoiWlbS27WslrUI5I2vlbknPt322pO2oZ6O2H5KkFgFI+hnzXjkuTblyPEQSbtDFOsJTbF/cuWP7EklPa9Gw7b8Af6FelUhanrk9K8szgKsF21dLmmb7QeAwSRcC+0x2O8OYFE5gQKUzJH0M2AP4qO0fSVodOFDSW4C32b6scUj/lLQTcGy9vyNwT73dcrDp+8B5kn5S729P6bpp4bPAFfX2m4DvSjJlP49PNIoB4D9r2+sCl9ZYkDSDssdIC19q1M5EXSzpu5QuPoDdKFcvzdSqzJ+k/F08RLmiNOWKv6V/1tI/cyR9AbiRPk0UGrqB5kGSdCCwrx8+0Pwy4Mu2m5wF9bT7BOBAytmQgd8D76UUIXyO7bMbxrIhsHm9e6btCxu2PY3yt/CApOnABpSupBtbxTCV1NlHN9q+p95fCljN9rWN41gSeBvQmR14JvCtTlyNYrgK2NT2QMtlS1oL+Bvl6vW9wArA/9i+etLbGrakUM/EPks5E1yyc9x268w/D0lLeDCD3wMjaXnbd9QZQA9ju/klei9JT7V9xfyf2bf2T7O91QDanQU8z2WjK+oZ6m9tN5uWWhP1Kba3bNXmGHH8EthhgF28zQ1j99FhwH7AV4AtKQM2Tddr1Mu/TwP/An5JmVXxXuZeJreKYwbwVmBtet4Ltt/UKIQjgVcAs5m3u2pQl+gj/RpYs0VDkkZ2iwh4cue47We1iKOa3kkIte37amJoxvaDkh6StILt21u2PcI+wO8knctgxiABkLQZZabiWsz7tzrpfyPDmBSWsn2qJNXBpP0lzQY+3jCGl9j+oKRXA9cCO1AujZsmBeB44CzgFOau2WjG9ivq94ctlmpF0tfGeghYsWEo1wJ3MPdkQZT/m1c2jKHjFknb2T4BQNKrGMxuY3dRxhVOBu7uHGz8gXwQcBplqvJD83luPx1COXGcTZ//VocxKdwraTHgqlqm+wZg2cYxdH7v2wLH2L690QSTkZa2/aFBNNyrzgM/Cjh+AJfpewL/Rc9ZYI9dWgVhe7t6knAw8CXbJ0i6v564tPafwBGSvkFJTtcBbxhAHMfVr0F6lO1xF1k2crvtX7RoaBjHFDairFZdkTJPfwXgC7Z/3zCGz1Fm2PwL2LjG8nPbz20VQ43j08DvbJ/Ust1R4nghZW3EtpSNln5I+X30fUBR0mmUwf/fjfLYn1tfxUhahvK+fCJlsH+Nlu2PiGVZANt3DSqGQZP0GcpV3M+Yt/uo6XhX/cyYRkmSvXFM+uruoUsKU0UdXL299p0uQ1m8dVPjGO6krJy9F7if2pfvfqySnFg804CtKOMc27SIo/4/3DPVBhIlrU+Z9fLtAbW/LfB05p2M8cnGMQx8UoikP49y2K0npkg6fYw4Jn0iwtB1H9Vf7sMyYctZHpLOBs4AzpL02zpF9e75vGzS2V5u/s9qo057fCXlimFD4PAW7Q56htNIkmZS9il/EPjNgGL4NmXh2pbAdynrV1qVHek18Ekhgxzv6tVyFtbQXSlIek7P3SUpBcgesN2s9lCdB755/dqEcqZ+lu33toqhJ5aVgHWZ90zszMYxHE3pRvsl8CPgDNtNBvWmSiG62oV2AHAbpQ7Tb4GVKFdwu9u+rmEsF9l+Vs/3ZYFf2N58vi+e3Dhm236OpItdi/F1jjWMYYdRDt8OXGz75gbtv972DzRG8Uj3oWjk0F0p2J494tBvJTU9C7L9Z0n3UD6A7qOcBTVduAZQV1LvTfkwnENJUOdQunBaOgTYpS7fb22qFKL7KmVW2i31pOHLtjeT9GLK7+cljeKAMtYFZRXt44D/Ax7bsP2OqTAp5M2UxZ2d7pstKDOA1pH0Sdv/2+f2l6nfm13VD+OVQu9CqcUoZ2Vfs/2UhjH8iTLF70jKtMM5rc6MR8RxMaU66e9tb6BSwvsztkc7O+p3LM/g4X3H32/Q7h966z9JOt/2RvXD6DLbT+13DLXdizprEerYyvmuJZslXWr76S3iqO19jFKRc2tKiQ0D37Hdctr2VJkU8ivgDbb/Vu+vRinLsgtl5f0zWsXSytBdKTB3oZSAB4A/U84GWvoa8HzKG+vZwBmSzrT9p8Zx3GP7HkmdFdVXSGqWHDsk7Uc5A1sPOAl4GXA25Y+v3wZeiK6aJekQypz47ajjCSrl1ac1jAPbn6o3fyzp58CSg1hAZvv8evMuynjCIDy+kxCqm+uxf0i6v1UQteTHm3n44P+kLzQduqQwFQaObB9IKYS3LOXNvj+lC6fpHz9wvaQVgZ8CJ0u6lVIZsrUdgfWBC23vWc/GWi3k6xSiezJwCfUEQW0L0UEpYf5WSlfFKcCh9bgpXVvN1A+gt1NOXAycLalpzaEax5OBD/DwVbwtuzd/UxPjMfX+a+qxZSjjP638L6Vw40spBfp2o1xFTbqh6z4CkPQ8Hl7aocVZaaf9Ayh/cMsCv6OcFZ/l/m9qM15ML6Rcnv+yt8RBo7bPs71xXVm+JXAnpaR4k66bmFcd+L+TuYl5V2BF269tHMcfgG8zYhXvKOOC/Wh7Cdv31qvFHSh/r1AmAPzYjT84VXd76xn8fxTlM2OTyW5r6K4UJP0vZWHQHHq246RNV0XHOZS+0b/N95l9VvuvV6N0owE8Bvh/jcOYVa9YvkP5ALiL8jtqQqVa7A7MnQr6R+BI23c0jGEb27+st1cAvkwZ77kEeG/j98ozbK/Xc/90Sa3LukOZFfitAbQL5f23IfB927sDPx5QHB2drqrb6vjbTcCj+9HQ0CUFyvZ+67XO9CMcB+wqaR3bn5K0JvAY201nQUl6F2Ue+N+YW9fFlAJ9zdh+e735bZWqlMvbblI3X9K7KesjzqB8CF9ISQ6/l/R2279pEQdlB75f1tsHUOrlv5KSrA6irIBv5QJJm3QGdCU9l7LzV2s/k/R2yuyw1quJF5e0K/C80aal2m5dfuPgOn18X8p+MMvSp3ptQ9d9JOkY4N0eYK18Sd+ifAhvZftp9T/7125YmrjGcTXwXNv/17LdETFMpwwsd7qKLqd0YT3QqP2LgQ3qyvKlgZNsb1ET9fFutEG7ejaIV9n9boOex+a538cYLqacFDyKsm9254pxTcqWoOuN9do+xTOw1cSSnk/pt9+Jh2/K5X4M8E4VQ3OloLlbDS4HXFbXJvSefbTcavC5tjdU2U4P27eqcWni6jrKQpyBUNl57jTKWfGFlBlhrwAOkLSl7b82CmU6pdtoCeo8eNv/r/bbtvLoukBJwPKS1HM122oV7ysatTMhg5wU4rLB1NmSZtk+ZFBxSDqHslPjaaM8dqrtrSe7zaFJCkytrQbvr335Zcf4MtOl2TqFntWR11BmUpzIvAly0ldJjuG/KTtpfXVEfO+m1LzZo0EM3wXOV6mXvznw+RrDDNruw/sd5i5QOhxYlVLC+jGU8a++c63IKumJwPV1oHULSndiyzE3ahyzKQv3jrR9W+v2q/+t78fO7m9nUFa7t5qOuibwDUknAfuMaHfUzakW1tB0H0n6te2Wq0LHJGk35q3xsyOlUucx475w8trfb7zHbTfZm1jSFWPNMJJ0ZasFhZKeTllRfokHuNPaVCFpDmXsbW3KupHjgafbfnnjOJ5EmbK9M2VM4zBKN2uzDy2VPaIfxdxaXLsDD9p+S6P2L6DMfPoa5fNiF9tXdh7rdDlOaptDlBT68gtcUHX18NaU7oJTbfdlzvEYbS/tMaqC1sHv0fpy+xHHhWP12Y/3WCuSlnWjstF1MPdyl+1JlwI+TPkQuIyyyrxZN1/nb0XSB4F/2f76IP8/VFaXvwL4FqWb7zDgwBYDzhqx4n2sY31sv3esaQdKOZTP2P52v/5Phqn7aMXRZhF0tJ5NUM9IB3VWertKnfhP+OHlNX5M+TBqYYUx/k8EDKR89wiX0Wg7Tspitc4HzYHAPyldWVtTPgRblh65X9IulI11Oju/tRxf6ZL0LMrVwssp780jKGfOpwEbNAjhQUlP7FQbqNOXB1GjC9vH1W7O70l6OX2qAzVMSWEFytnGaKULTIMdnlT2L+hcmqnn9nRgcdut/j+uoazV+K2kXUdcGbQs7XAGY2832aRSq8aoPkn5PbQsvrZYz4yrmT1XtWfX7pyW9qSs9P5vl+KN61BW1DZVxxRuo4wrfNh2Z9zrXJU9i1t4P2WdxjWU98RatC25MU8lVts3AC+W9AH6VCQx3UcDpFLm4h2UEgc/sf1fjdrtdA+8njKg+9HOiu6p+HvqJ5VqtV+k1MEa6b22V2wUxzGU6bCHSToM+KbtWSqlHo5oPV15KpD0hAGv8p8GvBv4H8oUXYAre5JTy1hWaTV1fJiSwsD7qDvq6t33UC7PjwS+0nKtwIh+yrUpZ4E3UJLT6YNIChrQTl+Sfge8y6OUTpB0ne3H9zuG2tYKlG6jzSkVdDekTBm+jrKu5g8NYjja9k496xW6D1Hm5jdd1FhjGugOcKolWFq1N04cV1FmoR1G2duibx/cw5QUnmH7knr7MZRNXUwpUdxkG0xJq1I2id+Z0of89ZYDiD1xzJMg60DexyhTQJey3bR2vsbY6ct236vXqlSF/T/bfx/lsdXcuBSJpOWBdShdite3bF/SY23fKGmt0R7vTFltGM/A3hc9MXyFMp7yI3p2R3Qf9kaeTxwCXgS8ibLy/mjge7b/OOltDUtS6FDZWObjlIEqAS8EPmn70HFfODlt3w3cQsn2d458vNX6AEmftr3vKMc3Afa3vU2LOHranRI7fU0FdX3EGpTBzGtazX4aJY7VKB8+UD6I+77L2CgxDPx9oYZ7I0+UpC0pxQqXAf5AGW+ZtFphwzTQ3PEB4Nmd7hpJq1AqlfY9KVD6rjtZeGD7I4+WEOrx3wNNE0I1sJ2+VHb0+qHtv9d58YdSFmtdCbzF9sWN4liPMhd9bcqMpwspq5zPAPZuPCV1J8p79TeUE6evS/qA7WNbxVANfAc4N9wbeTz1c+r1lHUSfwPeRSm/sQGlrPfkrf62PVRflASweM/9xYHfDTquAf0uLgYuGvF1FmWj9FUaxvExyu5ar6FUf7wR+FSjti/tuX0i8Op6ewvgtw1/B78HnlJvbwwcXm+/FTi28fviD8Cje+7PAP7QMoZBvy96YliNMvvpF/X+esCbB/C7+GP9fawxymMfmsy2hrH76PvAMymrNA28irkfiLhBF07tIngrD9/ToWmRLUlfoHRTHFkPvY7Sh3sT8HzbY00X7WdMS9Bwp6/eldOqW3H2PNbdIrNBHCO3Be2dDHC57WZ7eEu62PYze+4vRkkKzxznZf2Oqen7oqfdX1C6ez9qe32VAo4Xtv5dSKUWVh1zsu2HdT9PlmHsPvpT/eo4vn5v2Z1zPOWM/BQGtBCmepHnnWl08Yjpqk2o7Fn9Rdvfdpnud6+kn9tuUaDtWEnfo+xm9RNJ76GUat6KtvtK/Ellb+TTKAvV5gCoFOVrVRCv45cqexMfVe/vDPyiVePjLTKVhNsuNF3V9tGS9gGw/YCkQfzNPqdOVV6OkiNuA97kPmw4NHRJwY3q+szH0rY/NOgggGmSNnbdx0Flo/TOlqBNSldX9wNbqpR6+A+Xnd9Wb9Gw7Y9KeiPlA/CJlEqpe1G2KN2tRQzVm4CPAPtQum/2rseXpk1hwC7bH6gfzJ3dxg62/ZOGIRxLSYpz6v3eBZVNFpr2uLv253eKV27CYCoLHwq83fZZNY7nU65gJv1Kdhi7j2YCH+Xh+742m4Mt6dOUcYyTWrU5RhwbUd5sy1L+8O4A3gJcCmxr++hGcfTW2nkN8Frgpx6iRXRTTZ2Wuq7tU1T2mZjWzy6LEW1vT+nKfBLlqvoo21e3aHuUWDYEvg48g7IL3gxgRzfaBKonjgs9Yp1VvxaaDmNSuJIyA+liespVu+EcbJVyF8tQylXfz9zFQQOp91MXTtG6v7an/e4bXtKLgG8AK9vuy3aDE4jn+7bf0LjN4yi1fX5q++75Pb/PsbyVcrW0su0nSlqXUi560mv3zyeOZShjfjsDq1D69c9oGUONYzplRbMoK5pblc3uJCUoC12XolzRmvI7ucf2WGVaFtjQdR8Bt9geuZNSU7YHNh21Vx28ew11wLusj2m7YrTqbitYz0xfSqMuE0kj3wuidGWtWONptfnScyknKV+XdArlj//E2pXW2jsoM6DOBbB9laRBJOh7KF01d1Cu7Jcc/+l9szFzJ4VsWMc1Wu0vccCI+71l7/tyRj+MSWE/lRrppzLvxjJNq6SqbMG5LvMu329SBK7H8ZQ/utn0/C5akfRUl2qxN/ScEXX8vFEYa1CqoX6X8kcmyl4CI/8Y++1m2zvW2SWvosxOO1jSzyndJ79uGMu9tu/rnCTUM+WWexhsRek+2pgyGeNA24PYIxpJ/0sZa5rD3EkhptGmQx7AOolh7D76AWU/4Evp2ay+5XTQuqp6b8oH0hxgE+AcN14lKekS289o2eaI9g+2vdcgV43W6ZZ7U0ozf8D2HEnXuME+wCPieFj/cB3gfC2wU8v3Rp2qfBuly+JdwNuBy2x/tFH7D1GmiJ9N+QCe50PK9rtbxFFjuRxYz1Pgg1KN6kANY1JotqPXODFcTCkh8HvbG6hsuPMZ2y1r5iPpYEr9pSardqcySWtQFu39DdjOdqt9FDrtn2n7BfN/Zv/VRPlmSmlmAb8Cvtvqg1HSuF2Htg8f7/FJjuUYSkHCG1u1OUYczepADWNSOIwyJ/6yAcZwvu2NVOrkP9dlL9xLbT+9cRyXUWZ4/JnSfTSQaph1Lv7bmLsP7m+Ag1oO6PXEsi2wme2PtG57KpK0MmUVbdPZNqPEsRiwrO07Grd7OqWUxHnM293caqypE0ezOlDDOKawCTBH0iA/CK+vA5k/BU6WdCvQtAJl9bIBtDmab1EqUf5Pvb97PdZkH1yYpxDddcBnWrU7IoaNKe/F81VqIW0DXNF66rKk3wDbUT4fZgM3S/qd7fc2juNIymY/DwLnA8tLOtD2FxuGsX/DtsbTrA7UMCaFQRR8m4ftV9eb+9czkRWAXw4gjr8A1Jklg5rZAbCR593z9jRJfd8/AMYsRDdD0pk0LEQnaT9Kkp4u6WTKbKTTgQ9Lerbt/24RR7WCy17RbwG+b3s/SYO4UlivxrEbZUX1hylJqmVSeBJwpu2rGrY5mp/XE8kvAhdQxlm+24+GhjEpDKy/TNLy9U2+cs/hTn/+skDfNyIfEc92lFk2j6Ns+7cWcDllMKulQe6Deyiwh+0r65n6O2w/t87VP4TSd9vCjpRuiiUotafWqO+VL1GmhrZMCtMlPRbYibLQc1AeVbsWtwe+Yft+Sa3/ftcEDlLZjGo2ZZvYs2zPaRmE7U/Vmz+uM9L6VgdqGJPCicydergkpeTslbT5IDySsk/07J4Yer83nfECfIrSnXaK7Wer1GlvVvOoxweYuw8ulLP2VvvgLmX7SgDb59UBPWx/R2Pv39wPD9h+kNI98KdO37ntf9XZOC19kjK4fHbtynoCMIgz5YOAayllP86sq6ybjinY3g9A0lKUacIfAL7K3HIwzUh6Hj1FNPu1XmLoBppHqvPj3267Sf+1yuTvx9tuWWxtrFhm2Z5Zu2qebfshjajW2ef2NwKus31TXUj3H5SzwqspG4f0/cqpriS+kLmF6Fay/aZ6hnpJq5lqks4FtrT9T0mL2X6oHl+BAW2ROhVJmm67WV0uSfsCm1Gu5C+kTJM9q/VspLHWS/Rjeu7QJwV4eKngRa29ceI4hfIh/FlgVUoX0ka2n9eo/QsolVr/IekFwA8p8+I3AJ5mu+9dN7Wf9iOUOvl/AD5n+876Yfw0l42H+k7SEh5lQ3iVLVwf23LacF2n8GnK4OYvKUXX3mv7B61iqHGsQlnB+3zKlfTZlF0Sm+5nTikOeSJwBmU90SAWejZbLzF0SWFEl8BilA3SV7H90oYxHE7pIz2/VZsj2l+iToNdhlJKQJSKoCsAR7T6o+u9KpH0TUoJkv3r/Tm2N2gRxyhxDWT6Y0/7z6cUozuszopa1vafG7Y/p66feTWlu/N9lMHWJleQPXGcTOnD7ySj3YAtbL+ocRzLU64Wnk9ZTHiz7eeP/6pJj6HZeolhHFPorTvUOQP4ceMYngvsJukvlM3AW0+LPYeSDL9te/d6rNmCoB7TeroDtqYUYeto+t6cItMfO7OQZlIKsB1Gmar7A8qHUiud3/22wDG2b5c03vP75bE9A6wAn5a0c8sAJD0D2Jyyl/tMypTlsxq2/zPKVdJywGWS+r5eYqiSgqRpwHK23z/gUJpdlYxhcUm7As/TKBuauF0dqKOAMyT9ndJV0akV/yTa16yfCtMfAV4NPJsy7RDbf5XUuoDizyVdQfk/eVu9WrmncQwAv5b0OqBTwn1HygB4SwdSpgZ/k7Lj2l2N2z+BsiXoyES0OWV70kk3NN1HnTNSSefY3nTQ8cDD1we0Gnyu3RO7UaYcjqwSaretA7UJZRHOr11LRkt6MqXL5IKGcVxKGcs4ktK1d0bLQfeeOM6zvbHm7jGxDKUfu/Uq85WB220/qLKfwvK2b2rU9p3MnZW3DHNrlC0G3OUGJeZVigB+hrL5Uefv8vHM3ZqzyWr7Ov10n5FjSpKeSSmNM+lb5g7TlcJ5lC6TOSrlko+hdN0AbaukDnp9gO2zgbPr7KNDWrQ5TiwPG8i1/ccBhDLw6Y/V0ZIOAlasayXeBHynRcOStrJ9Wu/V44huoyZ/I54apeW/SOmyWcd1c6E6tvCl+rX3OK+dTKuNNsnA9sV17cSkG6Yrhc6Z12E9h7trBBqfHf+BsgfwPOsD3IfiVmO0/7A//l4tE+RU1nr6Y0+7L6anGJ3tkxu1+4m6evmwUR5u+jfSE1NnW1BTpoL+tFG7VwFPHjnbp3ZBX2F73VZxjNWWpKttP2my2xymK4VH15lHlzA3GXS0zoz32/4/SYvVOemnS/pqw/ZfSJmXP9qlZ+s9cKeEOgV1P+YW5TuDsoir+W50NQk0SQQj2t2vfm+1cHBckv6HUmbiqHroPyW92PY7GjTv0aZ/1u60lp8XsyS91fY8V4sqJUhm96PBYUoK05i7F/FIrZPCbSpVDs8CjpB0M9BsAGuq/fFPEYdSThh2qvd3p/QfNyln3tOP/rCHaLRVq+azgtv2l/sdwwhbUdaKGLpTuS9t1PZlkt7gESuGJb0euKJRDADvAX5SJ0B0ksBMYHHKpIRJN3TdR4OOA6AOHv6LMnD2ekp9ldUanQH1xrEaZTDtcbZfplIcbtNBjzMMwmjrIga5VmIQ6nRYKNNhN2LuJIRXUmr3Ny2BUgdZ3+G5hRvXokwCmPTB1VHaXp1yxfwv5v0wXgp4te0b+h3DiHi2BDobYl1q+7R+tTVMVwoDmWg9Gtt3S3o2sCtlMcy1wLEDCOV71NkU9f4fgR9RCsENm39Jen4dhEfSZswtV9yUSumV7ipe2xe2aNf2J2r7ZwIb9gyw7k9Zz9PEiLn5l9e5+aas7zmvRQz1Q/+5KluDdiaAnGT71BbtjxLP6ZSpsX03TElh60EHUKda7lK//k75AJbtLQYU0qq2j5a0D0CdstuqOulU8zbg8Dq2IErF2nF3AOsHSR+nnCh0xnW+J+kY259uGMZqwH099++rx1r5UsO2xlXPyPt2Vj4VDU1ScIPiahNwBWUc4RW2rwaQ1HTjkhHuVqkv0+mz3YQBDKxOBS6lkNev0w4ZVIkLyvqR9W3fAyDpc5QiaC2TwveB8yT9pN5/NQ1XvNs+o3O7dhmta/sUlUqlQ/OZNSiLDTqAIbMDZRXi6ZK+I2lrBtut9T5Kv/ETJf2W8mHwrgHGMzCSVpD0ZcpZ4WmSDqhXDa39lXk3PFoCaNp/7bKhz56Ukh8PAG+03Xw3urpO41jKGhIoO+P9tHUcw2ZoBpqnkjrQ/CpKN9JWlA/jn9j+9QBimU4ZWBRwZauVmlONpB9TZh91zoh3p5yxN5l91BPHTymDvCdTruBeTOlHvx7AfSiVPEoM76bsHXAc5X2xPfAd21/vd9sj4pgDbAyca/vZ9diUqDC8KEtSGDBJK1H6kHe23WTcY6xFax3DuHhtqsw+kjTuOIbtvnfjqGy9uWlP2ZFBldo412UXvAvrIs/pwAWt4xg26Z8bMNu3AgfXr1aOpfRTz6n3Ry7kG7qkwBSZfdTiQ38CxLzboT7IYLo5z5D0EWCpusr77cDPBhDHUMmVwhCStD3wOspq0eOBozoD38NK0vqUbrzOOMKtlL2bm25YL2ldyqZH6zFvscRmW7XWRWx7AJ2B5u2B79n+aqsYahwC3kJPyQ/gu6OtNI7Jk6QwxHrGNnYGVqFUfzxj/FctWkas4u1U5YRSLNGtV/FKOptSbuMrlEVjewKL2f544zg6ayWg1Bxqslaip/1plEVaT23ZbqT7aNjdQ5mCegelUuuS4z99kdSpyNlZxXs8JTm8nkYLpUZYyvapklRX8u4vaTbQNCm4lC1vVrp8lPYflHSlpDU9BfYzHyZJCkOortJ8HWVmxynAgbZnDTaqwZgqq3h73KuyHehVkt5JmY667ADimApWAi6tK5p7y9xP+m5jMVe6j4aQpIeAiygboZsRhdhaTHucaiRdCTzLdVN2SUsAF9l+SuM4NqLsrbEi8CnKGMcXPMq+E4sqlZ33VuPhJ62bAzcOY22ulnKlMJzeRPvKsFPdyFW821NqQzVl+/x68y7KeMIw+iqj7zb2D0oBxySFPsqVQkRVB1c3r3fPbDm4qrIb4JiGqctE0vm2NxrjsSxe67NcKQwhlT2an9CpFS/pWGDl+vCn+1mWdyob8ODqpsB1lA1lzmUKVfUdgBXHeWypVkEMq9Q+Gk6fAHoHlp8CfADYH/jgIAIKHgN8hFIz/0BKeYu/2z5j2KYJU3cbG3mwn7uNxVzpPhpCIy/PJR3XqfEj6be2NxtcdFEHuXehbB7/CdvfGHBITdXNn35CKdn9sN3GbN80qNiGQZLCEBrEZuAxfzUZbEtJCGtTKtge2nqXr6mi5W5jMVfGFIbTFZK2tT3PPHxJrwCuHFBMQ03S9ykfgCdRrg4uGXBIA9dyt7GYK1cKQ6jOAz8R+B1zB1afAzyPsgHQHwcV27Cqa0c6C7R6/yhFKbexfPuoYhglKQyhmhQeAzyZufvPXkrZo/lG238aVGwRMVhJCkNI0s8ZfXHQM4HP2H7lYCKLiEHLlNThtNrIhABQj63dPpyImCqSFIbTiuM8lsVBEUMsSWE4ZXFQRIwqYwpDKIuDImIsSQpDLIuDImKkJIWIiOjKmEJERHQlKURERFeSQsQoJK0iaU79uknSDT33Fx/jNf8p6Q319hslPa7nsfdIWrpV/BELKmMKEfMhaX/gLttfegSv+Q3wftuz6v1rgZm2//4IfsY02w8+smgjFk6uFCImZjFJswEkrS/Jktas9/8kaWlJ+0t6v6QdKVN8j6hXFnsDjwNOl3R6fc1LJJ0j6QJJx0hath6/VtLnJV0AvHYg/9IYakkKERPzELCkpOUp+zjPAjaXtBZws+1/dp5o+9j6+G62N7B9IPBXYEvbW0paFdgXeJHtDetz39fT1v/Z3tD2D9v80yLmyn4KERP3O2Az4AXAZ4BtKKWtz3qEP2cTYD3gt5KgLBo8p+fxHy10pBELKEkhYuLOpFwlrAUcD3yIsvfBieO9aBQCTra9yxiP3z3G8Yi+S/dRxMSdBbweuMr2Q8A/gJcDZ4/y3DuB5ca4/3tgs7qvBZKWkfTkvkUd8QgkKURMkO1rKWf5Z9ZDZwO32b51lKd/D/h2HWheCjgY+KWk023fArwROErSRZSuo6f2OfyICcmU1IiI6MqVQkREdCUpREREV5JCRER0JSlERERXkkJERHQlKURERFeSQkREdP1/sGpHRnGCXDEAAAAASUVORK5CYII=",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "fig, ax = plt.subplots()\n",
+ "ax.tick_params(axis='x', labelsize=10)\n",
+ "ax.tick_params(axis='y', labelsize=10)\n",
+ "ax.set_xlabel('Twitter', fontsize=10)\n",
+ "ax.set_ylabel('Number of tweets' , fontsize=10)\n",
+ "ax.set_title('Top 10 Tweeters', fontsize=10)\n",
+ "tweets_df.original_author.value_counts()[:10].plot(ax=ax, kind='bar', color='orange')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### locations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "unknown 2805\n",
+ "Việt Nam 116\n",
+ "India 107\n",
+ "United States 72\n",
+ "Turn on 🔔 57\n",
+ " ... \n",
+ "New York, New York 1\n",
+ "Fontaines-Saint-Martin, France 1\n",
+ "🇺🇲🇺🇲🇺🇲 1\n",
+ "Lisbon 1\n",
+ "🇺🇲🇷🇺🇺🇦🇫🇷🇦🇪🇮🇱🏳️🌈 1\n",
+ "Name: place, Length: 1809, dtype: int64"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df.place.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Most locations are unknown"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAFOCAYAAAB3xTGMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAhaklEQVR4nO3deZhkZX328e/NACLrgIwEhmVA8c1rRJCMohKJSGRVUQkKkojEVzBCRM0CqBGCVxQVjGIMiDA6RIEgCA4IEiAs4sYMwzKsMrKEnUEIDCDEgfv94zwNxdDdp7q7qk5V9/25rrrqnOecOufXxdC/ftYj20RERIxmhaYDiIiI/pdkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySImLUkvk3RNed0v6Z6W/ZUncN3XS1om6c9HOP74+KMe9b7TJX2sZX8DSWd0414Ry1PmWcRUIOkI4HHbR0/wOtOAC4GngDm2X/TLWtLjtlefyH1GuPcs4Fzbr+n0tSPqpGYRU4qkHSRdLWmRpDmSXlLK75D05VJ+paRXjnCJvwHOBB4c4323kvRLSddJOkvS2qX8lZIuknStpIWSXiFpdUkXl/1FknYvlzkKeEWpGX1F0ixJ15frrCLpO+X8qyVtX8o/JOmHkn4i6VZJXy7l0yR9V9L15TOfHPOXGVNKkkVMJasA3wXeb3sLYEXgr1uOP1rK/xX42vIfljQTeA9w3DjufTJwiO3XAouAw0v594Fv2t4SeDNwH1Wt5T22twa2B46RJOBQ4De2t7L998td/0DAJf69gbmSVinHtgLeD2wBvF/SRqVspu3XlM98Zxw/U0whSRYxlUwDbrf967I/F9iu5fipLe9vGubzX6P6hf/sWG4qaS1guu3LWu8raQ2qX9hnAdh+yvaTgIAvSLoOuAiYCaxXc5s/Ab5XrnMzcCfwqnLsYtuP2n4KuBHYBLgN2EzSNyTtDDw2lp8ppp4Vmw4goo94hO0hs4HTqj/yWRfYVdIy22d3OI59gBnAH9v+vaQ7qGpF4/V0y/YzwIq2H5G0JbAT8FHgfcBfTeAeMcmlZhFTyTPArJb+iL8ELms5/v6W918s/2Hbm9qeZXsWcAbwsXYShe1HgUckvaX1vraXAndLejeApJdIWhVYC3iwJIrtqWoCAEuBNUa4zU+pkgySXgVsDNwyUkyS1gVWsH0m8Flg67qfI6a21CxiKnkK2A/4gaQVgfnA8S3H1y5NP09TtfuP16qS7m7Z/yqwL3B8SQa3lTigShzfknQk8HtgT6p+jHMkLQIWADcD2P6tpJ+VTu3zgW+23OPfgOPKZ5YBH7L9dKkFDWcm8B1JQ38wHjaBnzemgAydjaAaDQXMtv1Q07FE9KM0Q0VERK3ULCIiolZqFhERUSvJIiIiaiVZRERErUk5dHbdddf1rFmzmg4jImKgXHXVVQ/ZnjHcsUmZLGbNmsWCBQuaDiMiYqBIunOkY2mGioiIWkkWERFRK8kiIiJqJVlEREStJIuIiKiVZBEREbWSLCIiolaSRURE1JqUk/K64pQRHyLTXz6QVYQjovNSs4iIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUatryULSRpIukXSjpBskHVzKj5B0j6RrymvXls8cJmmxpFsk7dRSvnMpWyzp0G7FHBERw+vmM7iXAX9re6GkNYCrJF1Yjv2L7aNbT5b0amAv4I+ADYCLJL2qHP4m8HbgbmC+pHm2b+xi7BER0aJrycL2fcB9ZXuppJuAmaN8ZHfgNNtPA7dLWgy8oRxbbPs2AEmnlXOTLCIieqQnfRaSZgGvA35Vig6SdJ2kOZLWLmUzgbtaPnZ3KRupPCIieqTryULS6sCZwCdsPwYcB7wC2Iqq5nFMh+6zv6QFkhYsWbKkE5eMiIiiq8lC0kpUieL7tn8IYPsB28/Yfhb4Ns83Nd0DbNTy8Q1L2UjlL2D7BNuzbc+eMWNG53+YiIgprJujoQScBNxk+6st5eu3nPYe4PqyPQ/YS9JLJG0KbA5cCcwHNpe0qaSVqTrB53Ur7oiIeLFujobaFvhLYJGka0rZp4G9JW0FGLgDOADA9g2STqfquF4GHGj7GQBJBwEXANOAObZv6GLcERGxnG6OhroC0DCHzhvlM/8M/PMw5eeN9rmIiOiuzOCOiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqDWmZCFpbUmv7VYwERHRn2qThaRLJa0paR1gIfBtSV/tfmgREdEv2qlZrGX7MeC9wMm2twH+rLthRUREP2knWawoaX3gfcC5XY4nIiL6UDvJ4kjgAmCx7fmSNgNu7W5YERHRT1asO8H2D4AftOzfBuzRzaAiIqK/1CYLSTOAjwCzWs+3/VfdCysiIvpJO81QPwLWAi4CftzyGpWkjSRdIulGSTdIOriUryPpQkm3lve1S7kkHStpsaTrJG3dcq19y/m3Stp3PD9oRESMX23NAljV9iHjuPYy4G9tL5S0BnCVpAuBDwEX2z5K0qHAocAhwC7A5uW1DXAcsE0Zsns4MBtwuc4824+MI6aIiBiHdmoW50radawXtn2f7YVleylwEzAT2B2YW06bC7y7bO9ONTTXtn8JTC+jsHYCLrT9cEkQFwI7jzWeiIgYv3aSxcFUCeMpSUvL67Gx3ETSLOB1wK+A9WzfVw7dD6xXtmcCd7V87O5SNlJ5RET0SDujodaYyA0krQ6cCXzC9mOSWq9tSZ7I9Vvusz+wP8DGG2/ciUtGRETR1tpQkt4l6ejyeke7F5e0ElWi+L7tH5biB0rzEuX9wVJ+D7BRy8c3LGUjlb+A7RNsz7Y9e8aMGe2GGBERbWhnbaijqJqibiyvgyV9sY3PCTgJuMl261pS84ChEU37Uo22Gir/YBkV9Ubg0dJcdQGwY1nEcG1gx1IWERE90s5oqF2BrWw/CyBpLnA1cFjN57YF/hJYJOmaUvZp4CjgdEkfBu6kWkYE4Lxyr8XAk8B+ALYflvR5YH4570jbD7cRd0REdEg7yQJgOjD0C3qtdj5g+wpAIxzeYZjzDRw4wrXmAHPauW9ERHReO8nii8DVki6h+uW/HdXciIiImCLaGQ11qqRLgdeXokNs39/VqCIioq+M2MEt6Q/L+9bA+lTzG+4GNmhdiiMiIia/0WoWn6Kat3DMMMcMvK0rEUVERN8ZMVnY3r9s7mL7qdZjklbpalQREdFX2pmU9/M2yyIiYpIasWYh6Q+o1mB6qaTX8fww2DWBVXsQW0RE9InR+ix2olpOfEOgdQb2UqrJdRERMUWM1mcxF5graQ/bZ/YwpoiI6DPtzLM4U9JuwB8Bq7SUH9nNwCIion+0s5Dg8cD7gb+h6rfYE9iky3FFREQfaWc01JttfxB4xPY/AW8CXtXdsCIiop+0kyx+V96flLQB8HuqGd0RETFFtLOQ4LmSpgNfARZSzd4+sZtBRUREf2mng/vzZfNMSecCq9h+tLthRUREP2mng/vAUrPA9tPACpI+1u3AIiKif7TTZ/ER2/8ztGP7EeAjXYsoIiL6TjvJYlp5njYAkqYBK3cvpIiI6DftdHD/BPgPSd8q+weUsoiImCLaSRaHUCWIvy77F5LRUBERU0o7o6GelXQScAXVsNlbbD/T9cgiIqJv1CYLSW8F5gJ3UC33sZGkfW1f3tXIIiKib7TTDHUMsKPtWwAkvQo4FfjjbgYWERH9o53RUCsNJQoA278GVupeSBER0W/aqVkskHQi8L2yvw+woHshRUREv2knWfw1cCDw8bL/U+DfuhZRRET0nXZGQz1N9VjVr9adGxERk9OIyULSIqqhssOy/dquRBQREX1ntJrFOyZyYUlzyjUetP2aUnYE1bpSS8ppn7Z9Xjl2GPBh4Bng47YvKOU7A18HpgEn2j5qInFFRMTYjZgsbN85wWt/F/hX4OTlyv/F9tGtBZJeDexF9ZzvDYCLyhBdgG8CbwfuBuZLmmf7xgnGFhERY9BOB/e42L5c0qw2T98dOK30j9wuaTHwhnJsse3bACSdVs5NsoiI6KF25ll02kGSrpM0R9LapWwmcFfLOXeXspHKX0TS/pIWSFqwZMmS4U6JiIhxGjFZSLq4vH+pg/c7DngFsBVwH9Xs8I6wfYLt2bZnz5gxo1OXjYgIRm+GWl/Sm4F3leYftR60vXCsN7P9wNC2pG8D55bde4CNWk7dsJQxSnlERPTIaMnic8A/Uv2CXn6OhYG3jfVmkta3fV/ZfQ9wfdmeB5wi6atUHdybA1dSJajNJW1KlST2Aj4w1vtGRMTEjDYa6gzgDEn/aPvzY72wpFOBtwLrSrobOBx4q6StqJLNHVTPycD2DZJOp+q4XgYcOLQMuqSDgAuohs7OsX3DWGOJiIiJkT3ivLvnT5LeBWxXdi+1fe5o5zdt9uzZXrCgw8tXnaL6c/rBB+r/e0ZEDEfSVbZnD3esdjSUpC8CB1P91X8jcLCkL3Q2xIiI6GftzLPYDdjK9rMAkuYCVwOf7mZgERHRP9qdZzG9ZXutLsQRERF9rJ2axReBqyVdQjU6aTvg0K5GFRERfaWdJcpPlXQp8PpSdIjt+7saVURE9JW21oYqcyPmdTmWiIjoU02sDRUREQMmySIiImqNmiwkTZN0c6+CiYiI/jRqsihLbtwiaeMexRMREX2onQ7utYEbJF0JPDFUaPtdXYsqIiL6SjvJ4h+7HkVERPS1duZZXCZpE2Bz2xdJWpVqBdiIiJgi2llI8CPAGcC3StFM4OwuxhQREX2mnaGzBwLbAo8B2L4VeHk3g4qIiP7STrJ42vb/Du1IWpHq4UURETFFtJMsLpP0aeClkt4O/AA4p7thRUREP2knWRwKLAEWUT0G9Tzgs90MKiIi+ks7o6GeLQ88+hVV89MtbudZrBERMWnUJgtJuwHHA7+hep7FppIOsH1+t4OLiIj+0M6kvGOA7W0vBpD0CuDHQJJFRMQU0U6fxdKhRFHcBiztUjwREdGHRqxZSHpv2Vwg6TzgdKo+iz2B+T2ILSIi+sRozVDvbNl+APjTsr0EeGnXIoqIiL4zYrKwvV8vA4mIiP7VzmioTYG/AWa1np8lyiMipo52RkOdDZxENWv72a5GExERfamdZPGU7WO7HklERPStdobOfl3S4ZLeJGnroVfdhyTNkfSgpOtbytaRdKGkW8v72qVcko6VtFjSda3Xl7RvOf9WSfuO66eMiIgJaSdZbAF8BDiKaoLeMcDRbXzuu8DOy5UdClxse3Pg4rIPsAuweXntDxwHVXIBDge2Ad4AHD6UYCIionfaaYbaE9isdZnydti+XNKs5Yp3B95atucClwKHlPKTy5pTv5Q0XdL65dwLbT8MIOlCqgR06lhiiYiIiWmnZnE9ML1D91vP9n1l+35gvbI9E7ir5by7S9lI5S8iaX9JCyQtWLJkSYfCjYgIaK9mMR24WdJ84OmhwokOnbVtSR1bvdb2CcAJALNnz86quBERHdROsji8g/d7QNL6tu8rzUwPlvJ7gI1aztuwlN3D881WQ+WXdjCeiIhoQzvPs7isg/ebB+xL1Vm+L/CjlvKDJJ1G1Zn9aEkoFwBfaOnU3hE4rIPxREREG9qZwb2U55+5vTKwEvCE7TVrPncqVa1gXUl3U9VQjgJOl/Rh4E7gfeX084BdgcXAk8B+ALYflvR5nl+48Mihzu6IiOiddmoWawxtSxLVyKU3tvG5vUc4tMMw5xo4cITrzAHm1N0vIiK6p53RUM9x5Wxgp+6EExER/aidZqj3tuyuAMwGnupaRBER0XfaGQ3V+lyLZcAdVE1RERExRbTTZ5HnWkRETHGjPVb1c6N8zrY/34V4IiKiD41Ws3himLLVgA8DLwOSLCIipojRHqt6zNC2pDWAg6nmP5xGtfJsRERMEaP2WZQlwj8F7EO1SuzWth/pRWAREdE/Ruuz+ArwXqrF+baw/XjPooqIiL4y2qS8vwU2AD4L3CvpsfJaKumx3oQXERH9YLQ+izHN7o6IiMkrCSEiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImo1kiwk3SFpkaRrJC0oZetIulDSreV97VIuScdKWizpOklbNxFzRMRU1mTNYnvbW9meXfYPBS62vTlwcdkH2AXYvLz2B47reaQREVNcPzVD7Q7MLdtzgXe3lJ/syi+B6ZLWbyC+iIgpq6lkYeA/JV0laf9Stp7t+8r2/cB6ZXsmcFfLZ+8uZS8gaX9JCyQtWLJkSbfijoiYklZs6L5/YvseSS8HLpR0c+tB25bksVzQ9gnACQCzZ88e02cjImJ0jdQsbN9T3h8EzgLeADww1LxU3h8sp98DbNTy8Q1LWURE9EjPk4Wk1SStMbQN7AhcD8wD9i2n7Qv8qGzPAz5YRkW9EXi0pbkqIiJ6oIlmqPWAsyQN3f8U2z+RNB84XdKHgTuB95XzzwN2BRYDTwL79T7kiIiprefJwvZtwJbDlP8W2GGYcgMH9iC0iIgYQT8NnY2IiD6VZBEREbWSLCIiolaSRURE1EqyiIiIWkkWERFRK8kiIiJqJVlEREStJIuIiKiVZBEREbWSLCIiolaSRURE1EqyiIiIWkkWERFRK8kiIiJqJVlEREStJIuIiKiVZBEREbWSLCIiolaSRURE1EqyiIiIWkkWERFRK8kiIiJqrdh0ADFFnaKmI2jPB9x0BO3J9xldlppFRETUSrKIiIhaaYaKiGiVJr1hpWYRERG1BiZZSNpZ0i2SFks6tOl4IiKmkoFIFpKmAd8EdgFeDewt6dXNRhURMXUMRLIA3gAstn2b7f8FTgN2bzimiIgpY1A6uGcCd7Xs3w1s03qCpP2B/cvu45Ju6VFsE7Eu8FBHr7jPgHTOdUe+z87K99k5g/JdbjLSgUFJFrVsnwCc0HQcYyFpge3ZTccxWeT77Kx8n50zGb7LQWmGugfYqGV/w1IWERE9MCjJYj6wuaRNJa0M7AXMazimiIgpYyCaoWwvk3QQcAEwDZhj+4aGw+qEgWo2GwD5Pjsr32fnDPx3KTsLe0VExOgGpRkqIiIalGQRERG1kiwiIrpI0gqS1mw6jolKsoiBJ2lNSesMvZqOJ0LSKeXf5WrA9cCNkv6+6bgmIh3cPSbpJcAewCxaRqPZPrKpmAaVpAOAfwKeAob+Idv2Zs1FNdgkzQAOoVqDbZWhcttvayyoASTpGttbSdoH2Bo4FLjK9msbDm3cBmLo7CTzI+BR4Crg6YZjGXR/B7zGdmeXUZjavg/8B7Ab8FFgX2BJoxENppUkrQS8G/hX27+XNNB/mSdZ9N6GtnduOohJ4jfAk00HMcm8zPZJkg62fRlwmaT5TQc1gL4F3AFcC1wuaRPgsUYjmqAki977uaQtbC9qOpBJ4DCq7/NXtNTSbH+8uZAG3u/L+32SdgPuBdIPNEa2jwWObSm6U9L2TcXTCemz6DFJNwKvBG6n+gUnqnb2gW3LbIqkK4ErgEXAs0Pltuc2FtSAk/QO4KdUa7F9A1gT+CfbWV5nDCStB3wB2MD2LuX5O2+yfVLDoY1bkkWPleroi9i+s9exDDpJV9t+XdNxRCxP0vnAd4DP2N5S0orA1ba3aDi0cUszVO/9P+By4Oe2n2g6mAF3fnmOyTm8sBnq4eZCGkyS/sH2lyV9g+dHlj0nTXtjtq7t0yUdBs+tb/dM00FNRJJF790G7A0cK2kpVZX/cts/ajasgbR3eT+spcxAhs6O3U3lfUGjUUweT0h6GSXxSnoj1SjIgZVmqIZI+gPgfVTDP9e2vUbDIUVEh0jamqrP5zVUk/JmAHvavrbRwCYgyaLHJJ1INeHpAapaxRXAQtvLGg1sQEl6DS+eQHZycxENJknnMEzz0xDb7+phOAOvTL59Bvg/VINYbgFWsD2wc6vSDNV7L6N6Jsf/AA8DDyVRjI+kw4G3UiWL84BdqJJvksXYHV3e3wv8AfC9sr831R82MTa/sL018NxzdyQtpJrNPZCSLHrM9nsAJP1fYCfgEknTbG/YbGQD6c+BLalGmexXhit+r+YzMYwyAQ9Jxyz3rOhzJKUfo02leXkm8FJJr6OqVUA1BHnVxgLrgCSLHivj2N8CbAdMB/6Lqjkqxu53tp+VtKys6vkgL3xWe4zdapI2s30bgKRNgdUajmmQ7AR8CNgQ+GpL+VLg000E1ClJFr23M1Vy+Lrte5sOZsAtkDQd+DbVWluPA79oNKLB90ngUkm3Uf1VvAlwQLMhDY4yIXSupD1sn9l0PJ2UDu4GlOaS15fdK20/2GQ8k4GkWcCatq9rOpZBVzpn/7Ds3jzInbJNKsul/BEvHHwxsKtLJ1n0mKQ9qToTL6X6y+0twN/bPqPJuAaJpI1HO277v3sVy2Qk6c28eAn9DBoYA0nHU/VRbA+cSNW/dqXtDzca2AQkWfSYpGuBtw/VJsrzAy6yvWWzkQ0OSYuohnmqpdhUY9lfbntaI4FNApL+HXgFcA3V0E+o1i7LDO4xkHSd7de2vK8OnG/7LU3HNl7ps+i9FZZrdvoteWLhmCy/vk5pgjoE+DOqxdti/GYDr3b+ipyo35X3JyVtQPX/+foNxjNhSRa99xNJFwCnlv33U80RiDGStDnwGWAb4Bjg47Z/P/qnosb1VPMs7ms6kAF3bhl88RVgIVXN98RGI5qgNEM1QNIewLZl96e2z2oynkFTZm1/hqrz8MvAqbYHepG2fiHpEmAr4EpeuDhjZnCPgaSXDA0MKAMGVgGeGuTBAkkWMXDK6p13AT/m+Xb156R9ffwk/elw5UOT9qI9khaWGdyjlg2SNEP1mKT3Al8CXk7VQTv08KM1Gw1ssPxV0wFMVkkKEzOZZ3CnZtFjkhYD77R9U+3JET1Slssf7pdB/pgZA0n7Us3gng3M5/lksRT4ru0fNhTahCVZ9Jikn9netv7MiBhUmcEdEybp61SjTc7mhR2IA/sXR0RUJL0TuG7oMcmSPgfsAdwJHGz79ibjm4iM7++9NYEngR2Bd5bXOxqNaEBJelENbbiyiB76Z2AJPLdo6F9Q9bHNA45vMK4JS82ixySts/wzoiVtOsh/cTRlMo44icEm6dqh1RgkzQFusf2lsj/Q/zYzGqr3zpG0i+3H4LnnWvyA6vGL0QZJbwLeDMyQ9KmWQ2tSPVgqoikqS3s8CewA/FvLsVWG/8hgSLLovS9QJYzdqB65eDKwT7MhDZyVgdWp/v22Prv8MaoF2yKa8jWqdbUeA26yvQCgDKMd6FnxaYZqgKR3A/9A9YtuD9u/bjaiwSRpE9t3SlrV9pNNxxMBIGkm1Tyqa20/W8rWB1Ya5BWRkyx6RNI3eOE49h2A3wB3QGYdj4Uk2XZpjjoJWN32xpK2BA6w/bGGQ4yYdNIM1TvLP8f4qkaiGHBlSffPAx+lqvLvRDXSBNvXStqunLfxIP8VF9Fvkix6pDxuMSZuP+DcoR3bd0mtj7VgWXm/gwwNj+iYJIseK/MAjqB6tvGKPL+cwmZNxjVAjgE+R5Uw7ipPdbOklYCDgZsBbCdRRM9JWme048sPmx8k6bPoMUk3A5+kaoZ6bsVU279tLKgBJWld4OtUDz0S8J9Us2TzXUYjJN3O809x3Bh4pGxPB/7b9qbNRTcxSRY9JulXtrdpOo6I6B5J3wbOsn1e2d8FeLftA5qNbPySLHpM0lFUE8d+yAvXhlrYWFADRtI/2P7yMCPMgIwsi+ZJWjTM439fVDZI0mfRe0O1ij8u76L6hfe2ZsIZSDeW9+VHmEX0i3slfRb4XtnfB7i3wXgmLMmi9y4dpizVu7HZRdIjGWEWfWxv4HDgLKr/vy8vZQMryaL3Hm/ZXoVqxdk8CGlsfg0cXWbFnk71DO6rG44p4jll1NPBklaz/UTT8XRC+iwaVh7mfoHttzYdy6CRtAmwV3m9FDiVKnFk+ZRoVBnSfSKTaHWBJIuGSVobmG/7lU3HMsjKQm1zgNfazsqz0ShJv6Ja1HKe7deVsuttD+zq0mmG6jFJi3i+j2IaMAM4srmIBpekFYFdqGoWO1D1Bx3RYEgRzxlmdYFnRjp3ECRZ9F7rU/GWAQ/YXjbSyfFikt5O1Vm4K3AlcBqw/2RpG45JYbjVBQa6bzLNUDFwJP0XcApwpu1Hmo4nYnkjrC7w8Sz3ERERz5G0re2f1ZUNkiSLiIgOm4zPh0+fRUREh0zm58MnWUREdM6kfT58mqEiIjps6PnwTcfRSUkWEREdIulrtj8h6RyGXxH5XQ2E1RFphoqI6Jx/L+9HNxpFF6RmERERtVKziIjoMEnbUi09swnV71kBtr1Zk3FNRGoWEREdJulm4JPAVbSsCTXIz4dPzSIiovMetX1+00F0UmoWEREdJukoqkl4PwSeHiq3vbCxoCYoySIiosMkXTJMsW2/refBdEiSRURE1EqfRUREhyy3HhRUE/MeAq6wfXsDIXXMCk0HEBExiayx3GtNYDZwvqS9mgxsotIMFRHRZZLWAS4a5CXKU7OIiOiy8oQ81Z7Yx5IsIiK6TNL2wEA/Ajgd3BERHSJpES9ebXYd4F7gg72PqHPSZxER0SGSNlmuyMBvbT/RRDydlGQRERG10mcRERG1kiwiIqJWkkVEDUkvk3RNed0v6Z6W/ZVH+MxHJX2wbH9I0gYtxz4hadVexR/RCemziBgDSUcAj9tu+7GZki4F/s72grJ/BzDb9kNjuMY028/UnxnRHalZRIzdCpKuApC0pSRL2rjs/0bSqpKOkPR3kv6carmH75eayMHABsAlQyuTStpR0i8kLZT0A0mrl/I7JH1J0kJgT0kfl3SjpOskndbITx5TVuZZRIzds8AqktYE3gIsAN4i6QrgQdtPStVkXdtnSDqIF9YsPglsb/shSesCnwX+zPYTkg4BPgUcWe7126ElIiTdC2xq+2lJ03v200aQZBExXj8HtgW2A74A7Ey1nMNPx3idNwKvBn5WEszKwC9ajv9Hy/Z1VDWUs4GzxxN0xHglWUSMz+VUtYpNgB8Bh1BNwPrxGK8j4ELbe49wvHUy125UyemdwGckbWF72RjvFzEu6bOIGJ+fAn8B3Gr7WeBhYFfgimHOXUq1XPVw+78EtpX0SgBJq0l61fIXkLQCsJHtS6gS01rA6h36WSJqpWYRMQ6271DVbnR5KboC2ND2cIvFfRc4XtLvgDcBJwA/kXSv7e0lfQg4VdJLyvmfBX693DWmAd+TtBZVbeRY2//TyZ8pYjQZOhsREbXSDBUREbWSLCIiolaSRURE1EqyiIiIWkkWERFRK8kiIiJqJVlEREStJIuIiKj1/wG1tv0FCCzQQwAAAABJRU5ErkJggg==",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# top 4 locations of users\n",
+ "fig, ax = plt.subplots()\n",
+ "ax.tick_params(axis='x', labelsize=10)\n",
+ "ax.tick_params(axis='y', labelsize=10)\n",
+ "ax.set_xlabel('Twitters', fontsize=10)\n",
+ "ax.set_ylabel('Number of locations' , fontsize=10)\n",
+ "ax.set_title('Top 4 Locations', fontsize=10)\n",
+ "tweets_df.place.value_counts()[:4].plot(ax=ax, kind='bar', color='orange')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### source"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Twitter Web App 2717\n",
+ "Twitter for Android 2360\n",
+ "Twitter for iPhone 1531\n",
+ "Twitter for iPad 191\n",
+ "TweetDeck 127\n",
+ "Name: source, dtype: int64"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df[\"source\"].value_counts()[:5]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The top five sources of tweets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAFmCAYAAACcOrbXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAngklEQVR4nO3deZgkVZ3u8e9LI7JvgsjeiO2CC4stgrigKCAouKACLsBwRR0YUcYZ0UFB3PCKzMjIoCggXh0QlE1hREBWZWsWWUSkRRBalmYRWhCYhvf+Eaeo7O6qimzIzMiueD/PU09FnIjM/FU93fWLOOcX58g2ERERE1ms6QAiImL4JVlEREStJIuIiKiVZBEREbWSLCIiolaSRURE1Fq86QAiBkXSc4Bzy+7zgCeA2WV/U9uPL+T7bQmcBvypNJ1s++BnHmnE8EmyiNawfR+wEYCkg4C/2T70Gb7tRbbf9gzfY6FIWtz23EF+ZkS6oaLVJG0l6WpJ10k6RtKzS/utkv5vab9c0gue5vtPkfR9SdeX9/pkad9I0qWSrpV0iqSVSvv5kqaX7VUk3Vq2d5d0uqRfAedKWlbSseU9r5X07nLe1pIukXSVpJMkLVvaD5H0u3LuM02Q0UJJFtFmSwLfB95n++VUd9of6zj+YGn/FvAf47zH5pJ+K+l/JL10jOMbAWvafll5r2NL+w+AT9t+BXAdcGAX8W4C7GT7DcDnRuIr7/ErSasABwBvtr0JMAPYr3S/vRN4aTn3S118VsQ8kiyizaYAf7L9h7J/HPD6juPHd3zffIzXXwWsa3tD4D+BU8c45xbg+ZL+U9K2wEOSVgBWtH3BOJ87nrNt31+23wwcMXLA9gPAZsAGwK8lXQPsBqwLPAg8Chwt6V3AI118VsQ8kiwixudxtqsG+yHbfyvbZwLPKlf3nec8AGwInA98FPhezWfOZfT/5ZLzHXu45rWiSigbla8NbO9Zxjc2BX4CvA34Rc37RCwgySLa7Algasd4xAeBCzqOv6/j+yXzv1jS8ySpbG9K9f/pvvnOWQVYzPZPqbqINrH9IPCApNeN8bm3Aq8s2ztNEPvZwN4dn7MScCmwxcjPI2kZSS8s4xYrlIT2SarkFbFQUg0VbfYosAdwkqTFgSuAb3ccX0nStcBjwC5jvH4n4GOS5gJ/B3b2gtM4rwkcK2nkwuwz5ftuwLclLU3VVbVHaT8UOFHSXsAZE8T+JeAISddTJb0v2D5Z0u7A8SMD9VQJag5wmqQlqe4+9pvgfSPGpExRHrGgUoU03fa9TccSMQzSDRUREbVyZxEREbVyZxEREbWSLCIiotakrIZaZZVVPHXq1KbDiIhYpFx55ZX32l51rGOTMllMnTqVGTNmNB1GRMQiRdJt4x1LN1RERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNSalE9w98LU/Sdad2Zwbj1k+6ZDiIjInUVERNRLsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiavUtWUhaW9J5kn4n6QZJ+5b2gyTNknRN+dqu4zWfkTRT0k2Stulo37a0zZS0f79ijoiIsfVz1tm5wD/bvkrScsCVks4ux/7d9qGdJ0vaANgZeCmwBnCOpBeWw0cAbwHuAK6QdLrt3/Ux9oiI6NC3ZGH7TuDOsj1H0o3AmhO8ZEfgBNuPAX+SNBPYtBybafsWAEknlHOTLCIiBmQgYxaSpgIbA5eVpn0kXSvpGEkrlbY1gds7XnZHaRuvff7P2EvSDEkzZs+e3esfISKi1fqeLCQtC/wU+ITth4AjgfWBjajuPL7Ri8+xfZTt6banr7rqqr14y4iIKPq6Up6kZ1Elih/ZPhnA9t0dx78L/LzszgLW7nj5WqWNCdojImIA+lkNJeBo4Ebbh3W0r95x2juB68v26cDOkp4taT1gGnA5cAUwTdJ6kpagGgQ/vV9xR0TEgvp5Z7EF8EHgOknXlLbPArtI2ggwcCvwEQDbN0g6kWrgei6wt+0nACTtA5wFTAGOsX1DH+OOiIj59LMa6mJAYxw6c4LXfBn48hjtZ070uoiI6K88wR0REbWSLCIiolaSRURE1EqyiIiIWkkWERFRq68P5cXkMHX/M5oOAYBbD9m+6RAiWit3FhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNTqW7KQtLak8yT9TtINkvYt7StLOlvSzeX7SqVdkg6XNFPStZI26Xiv3cr5N0varV8xR0TE2Pp5ZzEX+GfbGwCbAXtL2gDYHzjX9jTg3LIP8FZgWvnaCzgSquQCHAi8GtgUOHAkwURExGDUJgtJ75G0XNk+QNLJnVf947F9p+2ryvYc4EZgTWBH4Lhy2nHAO8r2jsAPXLkUWFHS6sA2wNm277f9AHA2sO3C/JAREfHMdHNn8TnbcyS9FngzcDTlqr9bkqYCGwOXAavZvrMcugtYrWyvCdze8bI7Stt47fN/xl6SZkiaMXv27IUJLyIianSTLJ4o37cHjrJ9BrBEtx8gaVngp8AnbD/Uecy2AXf7XhOxfZTt6banr7rqqr14y4iIKLpJFrMkfQd4H3CmpGd3+TokPYsqUfzI9sml+e7SvUT5fs/I5wBrd7x8rdI2XntERAxIN3/03wucBWxj+6/AysC/1L1Ikqi6rG60fVjHodOBkYqm3YDTOto/VKqiNgMeLN1VZwFbS1qpDGxvXdoiImJAFq87wfYjku4BXgvcTFXldHMX770F8EHgOknXlLbPAocAJ0raE7iNKhkBnAlsB8wEHgH2KJ9/v6QvAleU8w62fX8Xnx8RET1SmywkHQhMB14EHAs8C/ghVTIYl+2LAY1zeKsxzjew9zjvdQxwTF2sERHRH910Q70T2AF4GMD2X4Dl+hlUREQMl26SxeOdVUuSlulvSBERMWy6SRYnlmqoFSV9GDgH+G5/w4qIiGHSzQD3oZLeAjxENW7xedtn9z2yiIgYGt0McK8HXDSSICQtJWmq7Vv7HVxERAyHbrqhTgKe7Nh/orRFRERLdJMsFrf9+MhO2e56uo+IiFj0dZMsZkvaYWRH0o7Avf0LKSIihk3tmAXwUeBHkr5V9u+gejI7IiJaYsJkIWkK8DHbm5XZY7H9t4FEFhERQ2PCZGH7ibKORZJERESLddMNdbWk06kqoB4eaeyYcjwiIia5bpLFksB9wJs62gwkWUREtEQ3T3DvMYhAIiJieHXzBPexjLH0qe1/6EtEERExdLrphvp5x/aSVFOW/6U/4URExDDqphvqp537ko4HLu5bRBERMXS6eYJ7ftOA5/Y6kIiIGF7djFnMoRqzUPl+F/DpPscVERFDpJtuqCyhGhHRct0McFMmEnx92T3f9s8nOj8iIiaX2jELSYcA+wK/K1/7SvpKvwOLiIjh0c2dxXbARrafBJB0HHA18Nl+BhYREcOj22qoFTu2V+hDHBERMcS6ubP4KtVkgudRVUS9Hti/r1FFRMRQ6aYa6nhJ5wOvKk2ftn1XX6OKiIih0s0A9xbAQ7ZPB5YH/lXSun2PLCIihkY3YxZHAo9I2hDYD/gj8IO+RhUREUOlm2Qx17aBHYEjbB8B5EG9iIgW6SZZzJH0GeADwBmSFgOeVfciScdIukfS9R1tB0maJema8rVdx7HPSJop6SZJ23S0b1vaZkrKwHpERAO6qYZ6H7ArsKftuyStA3y9i9d9H/gWC3ZZ/bvtQzsbJG0A7Ay8FFgDOEfSC8vhI4C3AHcAV0g63fbvuvj8iJ6buv8ZTYcAwK2HbN90CNEy3VRD3QUc1rH/Z7oYs7B9oaSpXcaxI3CC7ceAP0maCWxajs20fQuApBPKuUkWERED9HSmKH+m9pF0bemmWqm0rQnc3nHOHaVtvPYFSNpL0gxJM2bPnt2PuCMiWmvQyeJIYH1gI+BO4Bu9emPbR9mebnv6qquu2qu3jYgIJkgWks4t37/Wqw+zfbftJ8o8U99ltKtpFrB2x6lrlbbx2iMiYoAmGrNYXdJrgB3KWIE6D9q+amE/TNLqtu8su+8ERiqlTgf+W9JhVAPc04DLy2dOk7QeVZLYmWqwPSIiBmiiZPF54HNUV/OHzXfMwJsmeuOyVveWwCqS7gAOBLaUtFF5/a3ARwBs3yDpRKqB67nA3rafKO+zD3AWMAU4xvYN3f94ERHRC+MmC9s/AX4i6XO2v7iwb2x7lzGaj57g/C8DXx6j/UzgzIX9/IiI6J1uSme/mJXyIiLarZuJBL9KVsqLiGi1bp7g3p6slBcR0WpZKS8iImplpbyIiKiVlfIiIqJWN3cWlAfpTu9zLBERMaSamEgwIiIWMUkWERFRa8JkIWmKpN8PKpiIiBhOEyaLMj/TTWV1vIiIaKluBrhXAm6QdDnw8Eij7R36FlVERAyVbpLF5/oeRUREDLVunrO4QNK6wDTb50hammq68IiIaIluJhL8MPAT4DulaU3g1D7GFBERQ6ab0tm9gS2AhwBs3ww8t59BRUTEcOkmWTxm+/GRHUmLU610FxERLdFNsrhA0meBpSS9BTgJ+Fl/w4qIiGHSTbLYH5gNXEe1ZvaZwAH9DCoiIoZLN9VQT5YFjy6j6n66yXa6oSIiWqQ2WUjaHvg28Eeq9SzWk/QR2//T7+AiImI4dPNQ3jeAN9qeCSBpfeAMIMkiIqIluhmzmDOSKIpbgDl9iiciIobQuHcWkt5VNmdIOhM4kWrM4j3AFQOILSIihsRE3VBv79i+G3hD2Z4NLNW3iCIiYuiMmyxs7zHIQCIiYnh1Uw21HvBPwNTO8zNFeUREe3RTDXUqcDTVU9tP9jWaiIgYSt0ki0dtH973SCIiYmh1Uzr7TUkHStpc0iYjX3UvknSMpHskXd/RtrKksyXdXL6vVNol6XBJMyVd2/n+knYr598saben9VNGRMQz0k2yeDnwYeAQqgf0vgEc2sXrvg9sO1/b/sC5tqcB55Z9gLcC08rXXsCRUCUX4EDg1cCmwIEjCSYiIganm26o9wDP75ymvBu2L5Q0db7mHYEty/ZxwPnAp0v7D8qcU5dKWlHS6uXcs23fDyDpbKoEdPzCxBIREc9MN3cW1wMr9ujzVrN9Z9m+C1itbK8J3N5x3h2lbbz2BUjaS9IMSTNmz57do3AjIgK6u7NYEfi9pCuAx0Yan2nprG1L6tnstbaPAo4CmD59embFjYjooW6SxYE9/Ly7Ja1u+87SzXRPaZ8FrN1x3lqlbRaj3VYj7ef3MJ6IiOhCN+tZXNDDzzsd2I1qsHw34LSO9n0knUA1mP1gSShnAV/pGNTeGvhMD+OJiIgudPME9xxG19xeAngW8LDt5WtedzzVXcEqku6gukM5BDhR0p7AbcB7y+lnAtsBM4FHgD0AbN8v6YuMTlx48Mhgd0REDE43dxbLjWxLElXl0mZdvG6XcQ5tNca5BvYe532OAY6p+7yIiOifbqqhnuLKqcA2/QknIiKGUTfdUO/q2F0MmA482reIIiJi6HRTDdW5rsVc4FaqrqiIiGiJbsYssq5FRETLTbSs6ucneJ1tf7EP8URExBCa6M7i4THalgH2BJ4DJFlERLTERMuqfmNkW9JywL5Uzz+cQDXzbEREtMSEYxZlivD9gPdTzRK7ie0HBhFYREQMj4nGLL4OvItqcr6X2/7bwKKKiIihMtFDef8MrAEcAPxF0kPla46khwYTXkREDIOJxiwW6unuiIiYvJIQIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIhajSQLSbdKuk7SNZJmlLaVJZ0t6ebyfaXSLkmHS5op6VpJmzQRc0REmzV5Z/FG2xvZnl729wfOtT0NOLfsA7wVmFa+9gKOHHikEREtN0zdUDsCx5Xt44B3dLT/wJVLgRUlrd5AfBERrdVUsjDwS0lXStqrtK1m+86yfRewWtleE7i947V3lLZ5SNpL0gxJM2bPnt2vuCMiWmnxhj73tbZnSXoucLak33cetG1JXpg3tH0UcBTA9OnTF+q1ERExsUbuLGzPKt/vAU4BNgXuHuleKt/vKafPAtbuePlapS0iIgZk4MlC0jKSlhvZBrYGrgdOB3Yrp+0GnFa2Twc+VKqiNgMe7OiuioiIAWiiG2o14BRJI5//37Z/IekK4ERJewK3Ae8t558JbAfMBB4B9hh8yBER7TbwZGH7FmDDMdrvA7Yao93A3gMILSIixjFMpbMRETGkkiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1kiwiIqJWkkVERNRKsoiIiFpJFhERUSvJIiIiaiVZRERErSSLiIiolWQRERG1mlhWNSImgan7n9F0CADcesj2TYfQCrmziIiIWkkWERFRK8kiIiJqJVlEREStDHBHRDxDbRjsz51FRETUSrKIiIhaSRYREVErySIiImolWURERK0ki4iIqLXIJAtJ20q6SdJMSfs3HU9ERJssEslC0hTgCOCtwAbALpI2aDaqiIj2WCSSBbApMNP2LbYfB04Admw4poiI1pDtpmOoJWknYFvb/6fsfxB4te19Os7ZC9ir7L4IuGnggS5oFeDepoMYEvldjMrvYlR+F6OG4Xexru1Vxzowaab7sH0UcFTTcXSSNMP29KbjGAb5XYzK72JUfhejhv13sah0Q80C1u7YX6u0RUTEACwqyeIKYJqk9SQtAewMnN5wTBERrbFIdEPZnitpH+AsYApwjO0bGg6rG0PVLdaw/C5G5XcxKr+LUUP9u1gkBrgjIqJZi0o3VERENCjJIiIiaiVZ9IGk5SUt13QcETG8JL1yjLa3NRFLNzJm0UOSXgUcAywHCPgr8A+2r2wyrkGStMlEx21fNahYhoUkAe8Hnm/7YEnrAM+zfXnDoQ2MpHdNdNz2yYOKZVhIugr4kO3ry/4uwCdsv7rZyMaWZNFDkq4F9rZ9Udl/LfBftl/RbGSDI+m8srkkMB34LVXifAUww/bmTcXWFElHAk8Cb7L9EkkrAb+0/aqGQxsYSceWzecCrwF+VfbfCPzG9tBeUfeLpOcDPwF2BV4HfAh4m+0HGw1sHItE6ewi5ImRRAFg+2JJc5sMaNBsvxFA0snAJravK/svAw5qMLQmvdr2JpKuBrD9QHleqDVs7wEg6ZfABrbvLPurA99vMLTG2L5F0s7AqcCfga1t/73ZqMaXZNFbF0j6DnA8YOB9wPkjXTMt64J50UiiALB9vaSXNBlQg/63zJxsAEmrUt1ptNHaI4miuBtYp6lgmiDpOsq/hWJlqufHLpPEsPZEJFn01obl+4HztW9M9Y/jTYMNp1HXSvoe8MOy/37g2gbjadLhwCnAcyV9GdgJOKDZkBpzrqSzqC6ooLqgOqfBeJqwSHa5Zcwi+kLSksDHgNeXpguBI20/2lxUzZH0YmArqvGbc23f2HBIjSmD3a8ruxfaPqXJeJoiaTPgBttzyv7ywEtsX9ZsZGNLsughSc+huqt4LdWdxMXAwbbvazSwaFzphlqNjrt5239uLqJoWhnD2sTlj7CkxaiKQCasKGxKuqF66wSqK+h3l/33Az8G3txYRAMm6UTb7x2jXxZgaPtj+0nSP1FdRNwNPEF1d2GqCrFWKVfT/wm8BFiCqq/+YdvLNxpYM+SOq3XbT0oa2r/JubPoIUnX237ZfG3X2X55UzENmqTVbd8pad2xjtu+bdAxNU3STKqKqNbfYUqaQTVr9ElUpdUfAl5o+zONBtaAUjF4PnBkafpH4I2239FUTBPJE9y99UtJO0tarHy9l2qm3NYYqXQpSeFR4OXl6+9tTBTF7cBQ1s43wfZMYIrtJ2wfC2zbdEwN+SjVMyezgDuAVzO62ufQyZ1FD0maAyzDaFfDYsDD5bDbdKtdEuXXqa6cRDWg+S+2f9JkXE2QdDTVUr9nAI+NtNs+rLGgGiLpQqpu2e8BdwF3Arvb3nDCF0bjkiyiLyT9FniL7XvK/qrAOW38oyBp/lJqAGx/YdCxNK1MdXIP1XjFJ4EVqGY5mNloYA2Q9EKqLqjVbL9M0iuAHWx/qeHQxpRk0SeS1gd2AXax/dKm4xm0+cdqSqXHb9s0fjM/ScsC2P5b07EMmqRpwKHA+sB1wKdst3ppZEkXAP8CfMf2xqVtgXHPYZExix6StIak/SRdAdxAVemxc8NhNeUXks6StLuk3am6YM5sOKZGSHpZKZO8AbhB0pWS2nYBcQzwc6pKwauoKqLabukxJpMc2umBhrZMa1EiaS+qu4g1gROBPYHT2tjNAE/Nsno48CqqZ04Ajmrrw1dUy2XuZ/s8AElbAt+lGtxsi+Vsf7dsf73MuNp295YeiJHnLHaiGsMZSkkWvfEt4BJgV9szACS1tn/PtiWdWbqcWjf19BiWGUkUALbPl7RMkwE1YElJG1MVOwAs1bnfsnnTRuxNdSHxYkmzgD9RPZs1lDJm0QPlye33UN1dPI/q7mJ322s3GliDJB0HfMv2FU3H0jRJp1B1vfy/0vQB4JW239lcVIMl6XzGeEizsO02zZs2j3LhsNjItB/DKsmixyStRTU52i5UZbSn2P5ss1ENnqTfAy8AbqMqHxbVH4U2PrW8EvAFRrvkLgIOsv1Ac1FFkyS9iOqZiheXphupumr/0FxUE0uy6KNSGrez7YObjmXQ8gR3dJL0Jtu/Gm/FvDatlCdpc6ru2e8AV1NdSG0MfBh4l+1LGwxvXEkW0VOSVp7ouO37BxXLsCgXDZ8CpjLvRIKt6XqR9AXbB3asmNfJtv9h4EE1RNL/AF+zff587W8A9rf91kYCq5FkET0l6U9UfdOiWtTmgbK9IvBn2+s1F10zygOK3waupHq6H4A2rc0eoyT9wfYLxzl2k+0XDTqmbqQaKnpqJBlI+i7VeM2ZZf+twDsaDK1Jc20fWX9atMREA9kPT3CsUbmz6LHSJ/vUehZtfbZgrNl2WzgD70iX3Mepprg4hXnnhmpdl1yApHuoljNY4BDwXturDTikriRZ9JCk/6KqAOpcMvKPtvduLqpmlKUzL2LeZVVfb3ub5qIarNIlNx7bfv7AghkCZcqXzWz/pulYmiRpt4mO2z5uULEsjCSLHirloi+Zb+WrG2y/pNnIBq9cVR/IvMuqfiFX0+0m6eqReZDaTtJ7bJ9U1zYsMjdUb82kGtQdsXZpax3b99ve1/bG5WvftiUKSdMknSrpeknHS1qz6ZiGwLmS3l2mhGm7sRZ8GtpFoHJn0QOSfkY1RrEC1XxIl5f9VwOX296yueiakXJRkHQR8AOqu6odgM1tj/mcQVvMt+bL3xl9WLNNa728FdgOeC/Vsssjlgc2sL1pI4HVSDVUbxzadABD6CSqctHv0VEu2jKZPG8+tpdrOoYh8BdgBtUFRGf59ByqNT6GUu4seqw8uTzN9jmSlgIWH/Y5X/pB0pW2X9l0HE0qY1i7MDp53o+AXUf2Wzp5HpJ2YHQs63zbP28ynqZIehbVBfs6tm9qOp46SRY9JOnDVPO9rGx7/bLgy7dtb9VwaAMn6SBaXi6ayfMWJOkQqq7aH5WmXYAZtoe2r75fJL2dqldiCdvrSdoIONj2Ds1GNrYkix6SdA2wKXBZx8pXrXq2YMQ4ZaOtKxeNeUm6FtjI9pNlfwpwdUsnmLwSeBPV3dXQ/73ImEVvPWb78ZFCD0mLM/6V5aQ21rQekpZoIpamZPK8ca0IjNxhrtBgHE37X9sPzlcYNrR/L5IseusCSZ+lWtjlLcA/Aj9rOKZGlRLJN1H11b8NGMqnU/vkDcCvgLePccy0c2GorwJXSzqPauzm9cD+zYbUmBsk7QpMKV3WHweG9oHFdEP1UHkIb09ga6r/CGcB33MLf8mSNqNKEO8AVqZaFez0rOHQTpK2sP1rSc+m+vfwqnLoctt3NRhaYyQtDfwb1d8LqP5efMn2o81FNb4kix6QdB9wGfBrqiuDy2w/0mxUzZD0FapVA/9MNe3JKVQDmK2bbTZGjVTHSbrK9iZNxzNMJC29KPy9SLLoAUnLA5sBrylfr6RaT/fXwK9tn9hgeANVJkn7A/AfwM9sPybplgxst5ukS4Frqe40F5hEz/bHBx1T0yS9huo5pGVtryNpQ+Ajtv+x4dDGlGTRB2VN3T2ATwDr2Z7SbESDU6pb3kJVErkVcB7wZmBt23ObjK0JmTyvImkVqn8HXwM+P//xYZ08r58kXQbsRNU9O1INdb3tlzUb2dgywN0DktZg9K5ipC/2SuAA4JKm4mqC7SeAXwC/KP3TbwOWAmZJOtf2ro0GOGC2n5R0BNWyma1l+17gBEk32v5t0/EMC9u3z1cNNbSzHSRZ9MYdwFXAv1Mti/h4w/EMBduPAT8Fflq66t7RbESNOVfSu4GT21js0CmJYh63l64ol6e59wVubDimcaUbqgfKAuybU91ZrAfcSnVHcQnV4O5j4786JrtMnhdjKV1z36TqnhPwS2Bf2/c1Gtg4kiz6QNJUqtr6fYG1bC/ZbEQRzSvjNzu1qeBjIpKWHNYy2bEkWfSIpBczOm6xBdVTqpdSVUO1albaDOouKJPnVSTNsD296TiGgaSZwN1UK0peRLUM84PNRjW+JIsekHQv1bTDl1CetbDdykWPRmRFtFGZPG9U+V3cS7WOw8Mj7W2aYLKTpHWA11FdYG4H/NX2Ro0GNY4kix6QtMIwXxE0QdKhVMmz9YO6mTxvVCaYHCVpLapE8QZgQ6r5si62/dVGAxtHkkX0RQZ1R5VkseXI1XNZn/z8NiaLGCXpSeAK4Cu2T2s6njpJFhF9JmkX4BCqBxSfmjzP9o8nfOEkVOZD2o9qwZ+9ygR6L2rTGI6kxW3PLU9sv5bq38M6wM3ABbaPbjTAcSRZ9EgqPRbU9kHdTJ63IEk/pnpg9UO2X1aSx2+GtZ++Hzrnx5K0LFXCeB3wAQDb6zYY3riSLHoolR6jMqibyfPGMvJ/pLMAQtJvbW/YdGyDMvLvQdIM4NlUk49eSDVecVuz0Y0vyaKHUukxKoO6mTxvLJJ+QzVn2K/LH8z1geNtb9pwaAMj6Q7gMGAK8GRpfuoPse3DmoirTqb76K33le97d7QZaF2lR7Ei7V4R7W1UT+duQ9X1EnAQ1dxha0v6EVXJ6B6NRjR4U4BlqcavFhm5s4i+yKDuKEkbZk6kUZKeQzWlv4BLyySDrbGodksmWfRQKj0yqBsTKzMPb1XXNpktqg+sphuqt46l6m54TdmfBZwEtCZZAIdTLf50Sbl6Or3heGIISFoSWBpYRdJKjHbBLA+s2VhgzVgkE2OSRW+tb/t9pQsG249ovsnqW+B/JR0FrCXp8PkPtm1QNyXVT/kI1WJga1BdUI38v3gI+FZDMTViUS14SbLorcclLUWpbCiVHm2bnjyDuh3K4kf/CrQ6Wdj+JvBNSR+3Pc9FROmyjCGXMYsekrQ18G/ABlRz028B7GH7vEYDa0AGdUelpHrUWIO7i+qAb9skWfRY2ys9YkGZPA8kPY9qbOKHwK7MO2bxbdsvbiq26E6SRQ+l0iNibJJ2A3YHpgMzOg7NAb5v++Qm4oruJVn0QEelx3nAlsx71fSLtl01ZVB3XimpHiXp3bZ/2nQcsfCSLHpA0r6MVnrMYt5Kj+/ablW1B2SerE6ZPA8kfcD2DyX9Mx1TW4wY1ikuYlSqoXoglR5jOkfSp8igLqSkGqq1TaCa5iIWQbmz6KFUeozKoO6oTJ43StKSth9tOo5YeLmz6IGOSo+lJG3MvGMWSzcWWINsr9d0DEPkIDJ53ojrJd0NXFS+Ls6SxIuG3Fn0QCo9FpRB3XmlpHqUpHWoFvvZAtgO+Gubxm8WVUkWPZRKj1EZ1B2VkupRktaiShRvADakmsL+YttfbTSwqJVuqB4YqfQApkrab/7jLa30aP2gbibPG9OfgSuAr9j+aNPBRPeSLHojlR4LyjxZmTzvKZIWtz0X2JhqzeldJe0P3AxcYPvoRgOMWumG6qFUeozKPFmjxiuptt2a5NlZFShpWaqE8TrgAwC2120wvOhCkkUPSZoJpNKjyKBuJSXVoz+vpBnAs4HfABdS/R+5rdnoohtJFj2WSo9KBnUzeV4nSXcAh1GtP/1kaX7qj09Lx/UWKRmz6KFS6bEFVbLYELgBuLjRoAYsg7rz2IaqpHotqj+UI+YAn20ioAZNoRrTa1WRw2SSO4sekvQko5UepzUdTxMyT9aCUlLdvm63ySjJogdGKj0kbUg1cPd6YB1aXOmRQd1MntdJ0tW2N246jnj6Fms6gEnicoCyMtxxwLHAr6gePPp8g3E1afcx2i4ZdBAN6yypXm6MrzZpzVjVZJUxix4ao9Lj9W2r9Mg8WaNsf6dsfq3tJdUtnW14Ukk3VA+k0mNU5slaUEqqYzJIsugBSXcCRzJOpYftLww2ouZlUHdeKamORV26oXrjTtsHNx3EMMg8WQtKSXVMBkkWvZHa8VGZJ2tBmTwvFnnphuoBSStnAG9emScrJdUxuSRZRF9kUDeT58Xkkm6o6AvbL+gY1N0eOEJSKwd1U1Idk0HuLKIvsiJaSqpjcsmdRfRLBnUzeV5MIrmziJ7KoO6oTJ4Xk0mSRfRUBnVHZfK8mEzSDRV9kUFdIJPnxSSSO4voqQzqRkxOubOIXsugbsQklDuL6KkM6kZMTln8KHotdxQRk1DuLKKnMk9WxOSUZBEREbXSDRUREbWSLCIiolaSRUQNSc+RdE35ukvSrI79JcZ5zUclfahs7y5pjY5jn5C09KDij+iFjFlELARJBwF/s33oQrzmfOBTtmeU/VuB6bbvXYj3mGL7iYWLNqJ3cmcRsfAWk3QlgKQNJbms3YGkP0paWtJBkj4laSdgOvCjcieyL7AGcJ6k88prtpZ0iaSrJJ1U5tRC0q2SvibpKuA9kj4u6XeSrpV0QiM/ebRWnuCOWHhPAktKWp5qksQZwOskXQzcY/sRqXrcxPZPJO3DvHcWnwTeaPteSasABwBvtv2wpE8D+wEHl8+6r2Nixr8A69l+TNKKA/tpI0iyiHi6fgNsQTUF+1eAbakeSLxoId9nM2AD4NclwSwBXNJx/Mcd29dS3aGcCpz6dIKOeLqSLCKengup7irWBU4DPk01YeIZC/k+As62vcs4xx/u2N6eKjm9Hfg3SS+3PXchPy/iacmYRcTTcxHVGh03236SatnY7YCLxzh3DrDcOPuXAltIegGApGUkvXD+N5C0GLC27fOoEtMKVBM2RgxE7iwingbbt6rqN7qwNF0MrGX7gTFO/z7wbUl/BzYHjgJ+Iekvtt8oaXfgeEnPLucfAPxhvveYAvxQ0gpUdyOH2/5rL3+miImkdDYiImqlGyoiImolWURERK0ki4iIqJVkERERtZIsIiKiVpJFRETUSrKIiIha/x/BeLoI+FpeGQAAAABJRU5ErkJggg==",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# top 5 sources of users\n",
+ "fig, ax = plt.subplots()\n",
+ "ax.tick_params(axis='x', labelsize=10)\n",
+ "ax.tick_params(axis='y', labelsize=10)\n",
+ "ax.set_xlabel('Twitters', fontsize=10)\n",
+ "ax.set_ylabel('Number of sources' , fontsize=10)\n",
+ "ax.set_title('Top 5 sources', fontsize=10)\n",
+ "tweets_df.source.value_counts()[:5].plot(ax=ax, kind='bar')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Possibly sensitive"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.0 3866\n",
+ "unknown 3463\n",
+ "1.0 111\n",
+ "Name: possibly_sensitive, dtype: int64"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df[\"possibly_sensitive\"].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPwAAADnCAYAAAA6ujs/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAaD0lEQVR4nO3deZhcVZ3G8e+v6nZC1srSkIQEUiEJCQRZREDAFQHRQoiKKK4s4uOoiA8jWCojd2DGp1VERFAZBoy4gSJ7jQqyK6sBlE0lQpFOZyPpcLOntzN/nNvQaXqp213V5966v8/z9FPV1dVdb5S37lLnniPGGJRS6ZBxHUApNXK08EqliBZeqRTRwiuVIlp4pVJEC69UimjhlUoRLbxSKaKFVypFtPBKpYgWXqkU0cIrlSJaeKVSRAuvVIpo4euEiBwnIv8QkWUiUuzj56NF5Prw54+ISN5BTOWYFr4OiEgWuAJ4D7AvcIqI7NvraWcAG4wx84DvAd8a2ZQqDrTw9eFQYJkx5gVjTBtwHXBir+ecCPw0vH8D8C4RkRHMqGJAC18fZgLNPb5fET7W53OMMR1AAEwdkXQqNrTwSqWIFr4+tAB79Ph+VvhYn88REQ/IAetHJJ2KDS18fXgMmC8ic0RkFPAR4NZez7kV+FR4/yTgbqMzmKaO5zqAGj5jTIeIfAH4A5AFrjHGPCMiFwJ/McbcClwN/ExElgGt2DcFlTKib/JKpYfu0iuVIlp4pVJEC69UimjhlUoRPUufEvliaQJ2nP0CYDrQGH5NBSYB44Fx4e1oYDuwFdjS67b7fivwL+B5YBnQUm4q6BngmNOz9HUmXyxNxBZ7UXjbfX+PgX6vCray8xvA88DjwJPlpkJXjV9bVUgLn3D5YikHHAUcE37Nc5vodQLgT8C94dcT5aZCp8tAaaaFT5h8sZQFDgOOxRb8UJJ1aNb9BnAfcHe5qbDUcZ5U0cInQL5YGgWcAJwCvAs7Dr5elIFfA9eVmwpPOM5S97TwMZYvlvbDTlzxcewJtnr3T+BnwJJyU2GF6zD1SAsfM+FJt1OwRT/EcRxXuoA/Aj8Bbiw3Fdoc56kbWviYyBdLBwNnAx8ExjqOEycrgO8AV5WbCttch0k6Lbxj+WLpCOA/gONcZ4m5tdi5+K4oNxU2uQ6TVFp4R/LF0pHAhdiP1FTlXgF+AFxabiq0Os6SOFr4EZYvlg4Avgm813WWhNsM/BhoKjcVdOaeCmnhR0i+WJqFnRr6FEBni62edcBXgJ/o0N7BaeFrLF8sCfBvQBMwwXGcevZn4LPlpsLTroPEmRa+hvLF0kLgf4EjXWdJiQ7gUsAvNxW2OM4SS1r4GsgXSw3Y3czzsVeeqZHVDJxdbirc5DpI3GjhqyxfLB2K3aq/wXUWxS3A6Xo2/zVa+CoJL2r5L+A8dGKROHkJ+FC5qfCY6yBxoIWvgnyxNBW7ntvRrrOoPrUB55SbCle4DuKaFn6Yws/VbwbybpOoClwHnFluKmx2HcQV3fUchnyx9FHgQbTsSfER4LF8sdR7Ke3U0C38EITH698GznGdRQ3JFuAz5abCL10HGWla+IjC4/XrsRNRqGT7armp0OQ6xEjSwkeQL5amA3cD+7jOoqrmUuwJvVQUQQtfoXyxNBNb9r1dZ1FV90vgU+WmQofrILWmha9AvljaA7gHmOs6i6qZW4CT6312HT1LP4h8sTQbO8Oqlr2+nQjclC+WdnEdpJZ0Cz+AfLG0F3bLvqfrLGrE3AmcWK/TaekWvh/5Ymk+dsuuZU+XY4Cf54uluuxGXf6jhqvHCbpZrrMoJz6AnTiz7mjhe8kXS+OB29Cyp905+WLp865DVJsew/cQjqC7BSi4zqJioRNYXG4q3O46SLXoFn5nl6BlV6/JAtfli6U3ug5SLbqFD+WLpVOxK50o1dsq4LByU6HZdZDh0sLz6iw196PTUan+PY0t/VbXQYYj9bv0+WJpGnAjWnY1sP2A77oOMVypLzywBJjpOoRKhM/mi6VEn+NJdeHzxdIZ6JpuKpqr88XSrq5DDFVqCx9eEHOJ6xwqcaZhZyVOpNQWHrgamOg6hEqkE/LF0pmuQwxFKs/S54ulzwBXus6hEm0LcGC5qbDMdZAoUlf48HLXp9B13tTwPQIcWW4qdLoOUqlU7dKHCztejZZdVcdhwBmuQ0SRqsIDp6GTT6rqujBfLCVmA5KawoczmVzoOoeqO9OwC4cmQmoKD5yFDrBRtXFO+DFv7FVceBEZKyL/ISJXhd/PF5HjaxetevLFUg4ous6h6tYY4JuuQ1Qiyhb+J8AO4PDw+xbsaqlJcB4wxXUIVdc+li+W3uQ6xGCiFH6uMebbQDuAMWYrIDVJVUXh4hFfcp1D1T0hARfXRCl8m4iMAQyAiMzFbvHj7hvAWNchVCq8LV8sneA6xECiFN4Hfg/sISK/AO7C7irHVr5Ymgt82nUOlSqxPmNfceGNMXdgZ/M8FfgV8CZjzL21iVU1RaDBdQiVKkfki6XDB3+aG1HO0t8GHAvca4y53Rizrnaxhi88M3+K6xwqlb7sOkB/ouzSXwy8FXhWRG4QkZNEJM7L8nwCGOc6hEqlxeHhZOxE2aW/zxjzOWAv7JVmJwNraxWsCj7rOoBKrQwx/e8v0ki78Cz9B7H/mEOAn9Yi1HDli6W3Aotc51CpdlocF6aMcgz/a+A54Cjgcuzn8mfVKtgwxfLdVaXKVOxecKx4EZ57NXCKMSbW1/6G842d5DqHUtgNz7WuQ/Q06BZeRI4K744DThSRD/T8qm28ITkNGOU6hFLA4fliKVarD1eyhX87diXV9/XxM4Od0z1OEjUhgap7i4HLXIfoVvEUVyIyxxjz4mCPuZQvlhZhVwhRKi7uKTcVjhr8aSMjyln63/bx2A3VClIli10HUKqXt+WLpamuQ3QbdJdeRBZiP+LK9TpmnwjE7WOHxa4DKNVLFns4vMRxDqCyY/gFwPHAJHY+jt8ExGZu7nyxtDtwsOscSvXh/cSk8FGO4Q83xjxU4zxDli+WTsd+dKhU3GwHGstNhS2ug1SyS39eOPHFR0XkdRejGGO+WJNk0R3rOoBS/dgFu4ZhX+fBRlQlu/TPhbd/qWWQ4cgXSxngaNc5lBrAsSSh8MaY28LbV8fNi0gGGG+M2VjDbFG8ETuUUam4OsR1AIg2lv6XIjJRRMZhP+t+VkTOrV20SI5wHUCpQbwhDhfTRPkcft9wi74Y+B0wB3vNeRwc4DqAUoPwgANdh4hS+AYRacAW/lZjTDvhhJYxoIVXSeB8tz5K4a8EytiLaO4XkdmA82P4fLHkode+q2RwXviKL481xlzGzhcBvCQi76x+pMgWEL8Rf0r1JTmFF5HR2Nlu8r1+z/UCjbo7r5JiQb5YmlhuKjjbM46yS38LcCLQAWzp8eWaFl4lheB4+HeUGW9mGWOOq1mSoTvQdQClIpgL3OPqxaNs4R8UkTfULMnQxTGTUv3Z3eWLR9nCvwU4VURexK4pJ4Axxuxfk2QVyBdLWWCaq9dXaggSU/j31CzF0E0h4lTbSjnmtPBRFqJ4CdgDOCq8vzXK79fIro5fX6moZrp88Shj6S/Aroz51fChBuDntQgVgRZeJU0ytvDYWTtOIPwozhizEphQi1ARaOFV0uwWjg51Ikrh24ydHscAhFfNudboOoBSEWWA6S5fvFK/FpErgUkicibwR+Cq2sSqmG7hVRI5K3yUsfQXi8gx2AtmFgDfMMbcWbNkldHCqyQa4+qFo4ylHwfcbYy5U0QWAAtEpCG8TNaVyQ5fW6mhSsQx/P3AaBGZCfweO/nFklqEiqDL8esrNRQNrl44SuHFGLMV+ADwI2PMh3B/HXqb49dXaiicbeGjvLCIyOHAx3htwcZs9SNF4vJwIvGmsLF1rqxcszDTHCyUl3bMz7TITFm3yxQ2TRbax6/xvKDZa9i0vGHUtuUNXnuL55nVXjbbms2O3pSR8dsz5Low47HDrFWlukYZKAz6NBG5BrsIzFpjzH59/FyA7wPvxQ6EO9UY8/hAfzNK4c/GDrq5yRjzjIjshcOrfkK6hR9AI6+sm5dpWbtAmoN9pLltXqZFZsrLYyazefJo2qeLMAU7PLlPszvbZ8zubIcdW/t9jR3C9jVZb12L573S3OBtafa8tpYGr2tVNptZ72VHb8xkxm0XmdQJjdg5FVS2rdI3yCXA5fS/xvx7gPnh12HAj8LbfkU5S38/9ji++/sXgFcXoRCRHxhjzqr071VJqrfwu9H68vzMyrULZfnGBbJ8x7zMyuxMWTdmElsmj6J9hgiN1HiswmjDLnt2dMzas6Nj1uHbB35ukMkEq7zs+hbP29jseVub7V4Da72s15rJjtmckQltIpMNTMVuvepVRf/dGmPuF5H8AE85Ebg2HB/zsIhMEpEZxphV/f1CNY8ljqzi36pUHRfemBm0rp2XaXl5H1m+cUGmuW2urMzuLuvGTmLLlAY6ZoiwKwn6aDLX1ZXLtXXlFrYN/H9bB3Ssy2bXr/KyG1Y0eJubvYbtzQ1ex0ovm3k5mx0VZDJjt2UyuXb7xjB+hOJXU/+7TNHMBJp7fL8ifGxECu9CYnfpha6u3Vm/Zl6mZd0+snzTwszy9r1kVWaGtI7LsXlqA53TRZhGCi//9cCb3tk5bXpn57SDdgz8f/FWkS1r7F5DsMLztjQ3eG3huYbMumx29KZMZsJ2kcld9s3B2dnxXja7euGkFz62W/gMXZ0zZd3q+bJi/T6yfNOCTHPHXrIqM11ax+fYMtWjc4YIM4AZrrMm2Vhjxs1p7xg3p71jwOcZMBsymdZVntfa4mU3NTc0bGv2vPaVXpa1XrZhQzY7ZovIhDaRKQYm1/iQolpz2rVgr2DtNit8rF/VLLyLYy5nc+pl6ezYQ9auni8t6xbK8i0LM80dc2SVN102jJvA1kbPbqFn4vhySGUJyJSurilT2tqmLGoD2Nbvc9ug7WUvu36l520I9xq2r/C8rlWeJ+uymVFBJjtuW0ZyHfZE5NghxNkw1H9HL7cCXxCR67An64KBjt8h2ki7NxhjnhrgKd+v9G9V0YD/uOHw6GjfU9aumi8r1u+TWb5lgTR3zpHV3jTZMH4CWxuzdE0XYRb2XVXVkVEwamZH54yZHZ0zDmHHgM/dLLJ5jZddt8LzNtpPKRraWxqyZk3Wy7ZmM7tsymTG77CHFI2IZIH2pz711CuV5BCRXwHvABpFZAVwAeGgHWPMj4H/w34ktwx7XuC0wf5mlC38D8OpqpcAvzDGBD1/aIxZEuFvVcuKof5iAx1ts2X1qgWyYv3CzPIte0tz1xxZ3TBNXpkwjm2NWbqmibAnsGcV86o6M96Y8ePbO8bPbe8YaKeBLuhqzWbWrfS8f1T6t40xr1uevdfPDfD5isMS7WO5t4rIfOB0YKmIPAr8xPEFNP0er4ymbXteVq/aW1a07pNZvnVvae7My5pRu8krE8exrTGDmSbCbGD2COZVKZWBTGNnV2NjZ9u/XOaIdAxvjHleRM7HrhV/GXBQONrna8aYG2sRcBArT8g8+JdFmfK2+bKic7asGb2bvDJxLNt3y2AaRZiDXfRSqbgY8l5pNUQ5ht8fe4xQAO4E3meMeVxEdgceAka88OWmQhv+R2egJ8ZUcjQP/pTaiXLxzA+AJ4ADjDGf7x6zG051dX4twlVomcPXViqqF1y+eJRj+LcP8LOfVSfOkCwD+s2mVMw86fLFBy28iDxF3+vAO1+IIvS849dXqlIG+KvLAJVs4Y+veYrhecJ1AKUq9CJ+4GzlWKig8OGiEwCIyHTgUOw71WPGmNU1zFapR7B56vnqKlUfnnQdIMpCFJ8GHsXOeHMS9nK802sVrGJ+EAB/dx1DqQo43xuN8jn8ucBBxpj1ACIyFXgQuKYWwSJ6GNjHdQilBvGk6wBRPpZbD2zq8f2m8LE4eNh1AKUq8KTrAJWcpT8nvLsMeEREbsEeM58I/K2G2aLQwqu4ewk/cDrKDirbpe9eP+5f4Ve3W6ofZ8iexk4qkMTZT1Q6/N51ALBTT7vOUB1+7g7gGNcxlOrHYvzA+Uaykl36S40xXxKR2+hjAI4x5oSaJIvuNrTwKp7agLtch4DKdum7h81eXMsgVXAz9go+peLmT/iBs3nsehr0LL0xZml4e1/3F/Zk3Ybwfjz4QTP2sl2l4uZ3rgN0izLw5l4RmSgiU4DHgatE5JLaRRuSm1wHUKoPySs8kDPGbMSOtLvWGHMYcHRtYg2ZFl7FzXL84BnXIbpFKbwnIjOAk4Hba5RnePzgOaDiOcOUGgEuLx1/nSiFvxD4A7DMGPNYuLZcHC9N1a28igtDPIaev6p+Pofv5ucWYQfiKOXavfjBO12H6CnKSbtvhyftGkTkLhF5WUQ+XstwQ2KPl+Lz6YFKs6tdB+gtyi79seFJu+OBMjAPewVdHF3uOoBKvQD4resQvUU6aRfeFoDf9F6IImZuZpA1tpSqsV/iBwMsTeFGlMLfLiJ/Bw4G7hKRXYFBVgR3xA86gCtdx1CpFrvdeYh40i4cdBMYYzrFLqI3MSbTXL2en5sGLAdGuY6iUufP+MFbXIfoS5STdg3Ax4HrReQG4AziMwHG6/nBGmJ4DKVS4ULXAfoTZZf+R9jd+R+GX28MH4uz77kOoFLnYfzgDtch+lPxLr2I/NUYc8Bgj8WOn7sJWOw6hkqN9+IHsRk731uULXyniMzt/iYcaddZ/UhV93WSkVMl32NxLjtEK/y5wD3hVXP3AncD/16TVNXkB88Ss/HMqm7F9ti9W5TC/xn7UVcX0Bref6gWoWrgAmCH6xCqri3FD+J5UVkPUQp/LXat9YuwK8nuRVK2nH6wHHuiUalaKboOUIkoJ+2eNcbsO9hjseXnGrGz7k50HUXVnevwg1Nch6hElC384yLy5u5vROQwkjSllB+sA77pOoaqOxuBcwZ9VkxEKfzBwIMiUhaRMvb4/RAReUpE4rIgxWC+SwzW91J15Xz8YJXrEJWKsks/e6Cf91xlNtb83AHAY0CD6ygq8R4HDsUPEvOxb/1NgFEJP3cRcL7rGCrRuoA34wePuQ4SRZRd+npyEfCs6xAq0a5MWtkhrYX3gzbgNHQEnhqafwDnuQ4xFOksPIAfPApc6jqGSpztwMlxWUkmqvQW3vo6SfpoUcXB2fhBUj6Vep10nrTryc/tASwFdnUdRcVeYgbY9CftW/juNek+jB7Pq4E9D3zGdYjh0sID+ME9JPQkjBoRO7DH7ZtcBxkuLXw3P7gE+JXrGCqWPosfPOk6RDVo4Xf2aexS2Ep1+zp+sMR1iGrRk3a9+bk88AAwy3ES5d7l+MFZrkNUk27he/ODMnYZ7LWOkyi3fgOc7TpEtekWvj9+bn/gHmCK6yhqxN0LHIcf1N0sSVr4gfi5Q4C7gAmuo6gR8zfgbfhBnJdSGzLdpR+IvTiiAGx1HUWNiGeAd9dr2UELPzg/eAA7r33d7d6pnTwOvAM/iOfSaVWiha+EH9yJ3dInfuCF6tODwFHhNGh1TQtfKT+4C3gbUNdbgBQqAcfU8258T1r4KOxoqyOw10Or5LsGWIwfpOYcjZ6lHwo/Nxm4ATjKdRQ1JAa4CD+4wHWQkaaFHyo/14Bd3OLTrqOoSDYAn0zCKjG1oIUfLj/3eeBiYBfXUdSglgIfwg9edB3EFT2GHy4/uAI4FPsZroqv/wGOTHPZQbfw1ePndsEudPE511HUTrZiL29NxjqINaaFrzY/dwL27O9U11EUTwKfwA+edh0kLnSXvtr84FZgf+wYfOXGVuBc4E1a9p3pFr5W/JwAnwC+BUx3nCZNfgd8LrzMWfWiha81PzcR+AbwRXQ9u1pag51C+nrXQeJMCz9S/NxC4DLgGNdR6kwncDVQxA82uA4Td1r4kebn3g9cAuQdJ0m6LuA64D/xg3+6DpMUWngX/Nwo7PH9V4D5jtMkjcEOa/bxA10QNCItvEt+LgN8EPgqcJDjNElwM3BBkpd6ck0LHxd+7t3A17CX4KrXbAWuB36AHzzhOkzSaeHjxs8dDpwJnES659L7G3Y47M/Tcq36SNDCx5WfG4OdWuuT2DP7Wad5Rkb31vxK/OAR12HqkRY+CfzcdOCj2PIf4DhNtW0C7gBuA27WrXltaeGTxs/Nw27xjwbeCUx2G2hIXgBux5b8fvygzXGe1NDCJ5k9y38wr70BHAGMdpqpby9hr0V/CCjhB885zpNaWvh6Yo/79+vxtSi8nTmCKZZjy70U+AuwNA2zwSaFFj4N/NwkbPn3BWYAjeHX1B73G4Gx/fyFLmA7sA07a28LsKKP22b8oLVW/ww1fFp49Ro/Nxrwej3ahh+0u4ijqk8Lr1SK6AQYSqWIFl6pFNHCK5UiWnhVdSJyqohc7jqHej0tvFIpooVXgxKRvIg83eP7L4uILyL3isi3RORREfmniLy1j98tiMhDItIoIktE5DIReVBEXhCRk8LniIh8R0SeFpGnROTD4eNXiMgJ4f2bROSa8P7pIvLfYa7nROQqEXlGRO4QkTEj879KMmnh1XB5xphDgS8BOy3OKCLvB4rAe40x3aPtZgBvAY4HmsLHPgAciL0w6GjgOyIyA3gA6H4TmYkdOET42P3h/fnAFcaYRcAr2AlFVD+08Gq4bgxvl7LzPH1HYafwKhhjek4uebMxpssY8ywwLXzsLcCvjDGdxpg1wH3AIYSFF5F9gWeBNeEbweHAg+HvvmiMebKfDKoXLbyqRAc7/7fSc+HMHeFtJzuP0vsXdgKPvXv9rR097stAL2qMaQEmAcdht+gPACcDm40xm/r4e70zqF608KoSa4DdRGSqiIzG7o4P5iXs7vW1IrJokOc+AHxYRLIisit2mq9Hw589jD1c6C78l8NbNQRaeDUoY0w7cCG2hHcCf6/w9/4OfAz4jYjMHeCpN2GntPorcDdwnjFmdfizB7DnCZYBjwNT0MIPmY6lVypFdAuvVIpo4ZVKES28UimihVcqRbTwSqWIFl6pFNHCK5UiWnilUkQLr1SKaOGVShEtvFIpooVXKkW08EqliBZeqRTRwiuVIv8PA7aRwIUDwgEAAAAASUVORK5CYII=",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "tweets_df[\"possibly_sensitive\"].value_counts().plot(kind=\"pie\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Polarity and subjectivity"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " 0.000000 2894\n",
+ "-0.100000 269\n",
+ " 0.500000 225\n",
+ "-0.050000 188\n",
+ " 0.200000 178\n",
+ " ... \n",
+ " 0.151667 1\n",
+ "-0.190000 1\n",
+ "-0.140136 1\n",
+ " 0.013624 1\n",
+ " 0.207143 1\n",
+ "Name: polarity, Length: 760, dtype: int64"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df[\"polarity\"].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### Sentiments"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAE2CAYAAACQtL4gAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAhlElEQVR4nO3deZxcVZn/8c+XALJvksEQAoEYxKgYYk8AVxCH1WFRJCxCQJ2MP0GRxTG4DKg4g/gTXIkGiQEFYhAYwiIYQhABWTp7QlhiCJIYIRAIAUY04Zk/7ulwaar7Vne66lZ1f9+vV73q3nO3p2469dS559xzFRGYmZl1ZoOyAzAzs8bnZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCkPRVSQskzZU0W9Le3dzPcEmH5uYPlzS25yKteMz9JL23lsdIx7lTUks3tvu5pGFVrjtR0tFdj67r0nm7qR7HKojjPElnd3GbdX9Xko6s9vza+tmw7ACsXJL2BT4KjIiIVyRtD2zczd0NB1qAWwAiYgowpSfi7MR+wIvAvTU+TrdExGfKjqE3kbRhu7+rI4GbgIdKC6qPcM3CBgDPRMQrABHxTET8BUDSeyT9XtIMSbdJGpDK75T0HUkPSHpU0gckbQx8ExiVaiejJJ0s6cdpm4mSxkm6T9Li9Mt2gqSFkia2BSPpQEl/lDRT0jWStkjlSyR9I5XPk7SHpMHAZ4Ez0jE/kP9gkkamfc2SdK+kt6XykyVdJ+lWSY9JujC3zThJramm9Y32J0vSpyR9Pzf/b5IulrS5pJslzZE0X9Ko3LlqkdQvnYP5Kf4zOvj3+Eg6/qOSPpr2MVjSH9Jnn9lWk5I0QNJd6bPPb/v8nZzDgyU9LGkm8LFKB5e0iaRfpBhnSdq/6Jy1236JpAvT9g9IemvuM9yhrPY6TdLOFbb9N0kPpnN4raTNUvlEST+VdD9wYdvfVToPhwPfTedgSPpsbfsbmp+39RQRfvXhF7AFMBt4FLgE+FAq34js13r/ND8KmJCm7wS+l6YPBW5P0ycDP87te908MBGYBAg4AngBeBfZD5YZZLWS7YG7gM3TNl8G/jNNLwE+n6Y/B/w8TZ8HnN3BZ9sK2DBNfwS4NhfXYmBrYBPgCWBQWrZdeu+XPueeuc/cks7Xn4CNUvm96XN8HLg0d+yt2233HmBqbvk2FeKdCNyazslQYGmKbzNgk7TOUKA1TZ8FfDUX75YdncO0nyfT9gImAzdViOGs3L/zHsCf07YdnrN22y/JxXRS2zGAG4HRafpTwP+0//cD3pzbz/m5f++JZLWHfh38XR2d2246MDxN/1fbPvxa/5cvQ/VxEfGipPcAHwD2B36t7HpwK/BOYKokyL6Mluc2vS69zwAGV3m4GyMiJM0DnoqIeQCSFqR97AQMA+5Jx9wY+GMHx6z4y7idrYHLJQ0FgiwBtpkWEavS8R8CdiH7Mj1G0hiyS7QDUjxz2zZK5+sO4KOSFpIljXmSXgG+J+k7ZF+Qf2gXy2JgN0k/Am4GftdBzJMj4lXgMUmLyb6wHwd+LGk4sBbYPa37IDBB0kZkX76zJX2IyudwD+DxiHgsfeZfAWMqHP/9wI/SZ31Y0hO543V0ztq7Ovd+cZrel9f+zX4JVKqZvFPS+cA2ZEn5ttyyayJibYVt2vs5cIqkM8l+4IysYhurgpOFkf4T3gncmb7IR5N9IS+IiH072OyV9L6W6v+O2rZ5NTfdNr9h2tfUiDiuh475LWB6RByVLlndWWFf6/YnaVfgbOCfI+I5ZZfHNqmw358DXwEeBn4BEBGPShpBVtM6X9K0iPhm2wZpf+8GDiK7dHYM2S/s9toP1hbAGcBTwLvJah1/S/u8S9IHgcOAiZIuAp6jwjlMiWZ9veGcdbBedDBdZCJwZETMkXQyWXtUm5eq3Me1wLnAHcCMiHi2C8e3TrjNoo+T9Lb0y7vNcLJLDI8A/ZU1gCNpI0nvKNjdarJLId11H/C+3HXuzSXtXrBNZ8fcGliWpk+u4vhbkX0prZK0A3BIpZUi4n5gEHA86Ve0pB2BlyPiV8B3gRH5bZR1HNggIq4FvtZ+ec4nJG0gaQiwG9m/w9bA8lTjOJGsloekXchqaJeSJbARdHwOHwYGp/0CdJSQ/wCckLbdHdg5xdAVo3LvbTXDe4Fj0/QJ6TjtbQksTzWlE6o81uv+/SPib2Q1knGkRG49w8nCtiC7VPOQpLlklzDOi4i/A0cD35E0h6xdo6iL6nRgWGpsHFWw7htExAqyL/WrUyxtl086cyNwlCo0cJNd6vhvSbOooiYSEXOAWWRfrFcB93Sy+mTgnoh4Ls2/C3hA0myyX7bnt1t/IFnNbTbwK+CcDvb7Z+AB4LfAZ9OX3yXA6PTvsAev/creD5iTPt8o4AcdncO0nzHAzanR9+kOjn8JsEGqYf4aODlS54cu2DYd+3SyWhHA58kuD80lS3inV9ju68D9ZOf94SqPNQn4UmqMb0uEV5LVVju61GfdoNQQZGZdoOwehYsjYlrZsTQSSUuAloh4psQYzibrYPD1smLojdxmYdYFkrYh++U/x4mi8Ui6HhgCfLjsWHob1yzMzKyQ2yzMzKyQk4WZmRWqWbJIwwY8kG7dXzd0gqRdJd0vaZGkXysbJgJJb0rzi9Lywbl9nZPKH5F0UK1iNjOzymrWZqHs9tHN0x2vGwF3k3WXOxO4LiImSfopWUPhOEmfIxta4bOSjgWOiohRykaUvJrsTswdgduB3Tu7m3P77bePwYMH1+RzmZn1VjNmzHgmIvpXWlaz3lCRZaEX0+xG6RVkvRSOT+WXk40NM45svKDzUvlvyIY3aBtHaFLq6/24pEVkiSM/DMTrDB48mNbW1p78OGZmvV4a3qWimrZZKBtpczbZDUBTyQZgez4i1qRVlpLdrER6fxIgLV8FvDlfXmGb/LHGKButs3XFihU1+DRmZn1XTZNFRKyNiOFkA8SNpPhu3PU51viIaImIlv79K9aizMysm+rSGyoinicbCmJfYBtJbZe/duK1sXuWkY23Q1q+NfBsvrzCNmZmVge17A3VP93tiqRNgX8BFpIljbZHR44GbkjTU9I8afkdqd1jCnBs6i21K9l4/A/UKm4zM3ujWg73MYBsgLp+ZElpckTclMbBn5TGrZ8FXJbWvwz4ZWrAXkkaoTIiFkiaTPbYxDXAqVWOa29mZj2kVw730dLSEu4NZWbWNZJmRERLpWW+g9vMzAo5WZiZWSEPUV6lwWNvLjuEqiy54LCyQzCzXsg1CzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSHflGel8E2OZs3FNQszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwK1SxZSBokabqkhyQtkHR6Kj9P0jJJs9Pr0Nw250haJOkRSQflyg9OZYskja1VzGZmVlktH360BjgrImZK2hKYIWlqWnZxRPz//MqShgHHAu8AdgRul7R7WvwT4F+ApcCDkqZExEM1jN3MzHJqliwiYjmwPE2vlrQQGNjJJkcAkyLiFeBxSYuAkWnZoohYDCBpUlrXycLMrE7q0mYhaTCwF3B/KjpN0lxJEyRtm8oGAk/mNluayjoqb3+MMZJaJbWuWLGipz+CmVmfVvNkIWkL4FrgixHxAjAOGAIMJ6t5fK8njhMR4yOiJSJa+vfv3xO7NDOzpJZtFkjaiCxRXBkR1wFExFO55ZcCN6XZZcCg3OY7pTI6KTczszqoZW8oAZcBCyPiolz5gNxqRwHz0/QU4FhJb5K0KzAUeAB4EBgqaVdJG5M1gk+pVdxmZvZGtaxZvA84EZgnaXYq+wpwnKThQABLgH8HiIgFkiaTNVyvAU6NiLUAkk4DbgP6ARMiYkEN4zYzs3Zq2RvqbkAVFt3SyTbfBr5dofyWzrYzM7Pa8h3cZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVmhmiULSYMkTZf0kKQFkk5P5dtJmirpsfS+bSqXpB9KWiRprqQRuX2NTus/Jml0rWI2M7PKalmzWAOcFRHDgH2AUyUNA8YC0yJiKDAtzQMcAgxNrzHAOMiSC3AusDcwEji3LcGYmVl91CxZRMTyiJiZplcDC4GBwBHA5Wm1y4Ej0/QRwBWRuQ/YRtIA4CBgakSsjIjngKnAwbWK28zM3qgubRaSBgN7AfcDO0TE8rTor8AOaXog8GRus6WprKPy9scYI6lVUuuKFSt69gOYmfVxNU8WkrYArgW+GBEv5JdFRADRE8eJiPER0RIRLf379++JXZqZWVLTZCFpI7JEcWVEXJeKn0qXl0jvT6fyZcCg3OY7pbKOys3MrE5q2RtKwGXAwoi4KLdoCtDWo2k0cEOu/KTUK2ofYFW6XHUbcKCkbVPD9oGpzMzM6mTDGu77fcCJwDxJs1PZV4ALgMmSPg08ARyTlt0CHAosAl4GTgGIiJWSvgU8mNb7ZkSsrGHcZmbWTs2SRUTcDaiDxQdUWD+AUzvY1wRgQs9FZ2ZmXeE7uM3MrJCThZmZFepyskgNzXvWIhgzM2tMVSULSXdK2ioNvTETuFTSRUXbmZlZ71BtzWLrdEPdx8iG5Ngb+EjtwjIzs0ZSbbLYMN1AdwxwUw3jMTOzBlRtsvgG2Y1wiyLiQUm7AY/VLiwzM2sk1d5nsTwi1jVqR8Rit1mYmfUd1dYsflRlmZmZ9UKd1iwk7Qu8F+gv6czcoq2AfrUMzMzMGkfRZaiNgS3Selvmyl8Ajq5VUGbWNYPH3lx2CFVZcsFhZYdg3dRpsoiI3wO/lzQxIp6oU0xmZtZgqm3gfpOk8cDg/DYR8eFaBGVmZo2l2mRxDfBT4OfA2tqFY2ZmjajaZLEmIsbVNBIzM2tY1XadvVHS5yQNkLRd26umkZmZWcOotmbR9hjUL+XKAtitZ8MxM7NGVFWyiIhdax2ImZk1rmqHKN9M0tdSjygkDZX00dqGZmZmjaLaNotfAH8nu5sbYBlwfk0iMjOzhlNtshgSERcC/wCIiJcB1SwqMzNrKNUmi79L2pSsURtJQ4BXahaVmZk1lGp7Q50L3AoMknQl8D7g5FoFZWZmjaXa3lBTJc0E9iG7/HR6RDxT08jMzKxhVHsZCmAg2bDkGwMflPSx2oRkZmaNpqqahaQJwJ7AAuDVVBzAdTWKy8zMGki1NYt9IqIlIkZHxCnp9anONpA0QdLTkubnys6TtEzS7PQ6NLfsHEmLJD0i6aBc+cGpbJGksV3+hGZmtt6qTRZ/lDSsi/ueCBxcofziiBieXrcApH0fC7wjbXOJpH6S+gE/AQ4BhgHHdSMOMzNbT9X2hrqCLGH8lazLrICIiD072iAi7pI0uMr9HwFMiohXgMclLQJGpmWLImIxgKRJad2HqtyvmZn1gGqTxWXAicA8Xmuz6K7TJJ0EtAJnRcRzZI3n9+XWWZrKAJ5sV773eh7fzMy6qNrLUCsiYkpEPB4RT7S9unG8ccAQYDiwHPheN/ZRkaQxklolta5YsaKndmtmZlRfs5gl6SrgRnJ3bkdEl3pDRcRTbdOSLgVuSrPLgEG5VXdKZXRS3n7f44HxAC0tLdGVuMzMrHPVJotNyZLEgbmyLnedlTQgIpan2aOAtp5SU4CrJF0E7AgMBR4gaxsZKmlXsiRxLHB8V45pZmbrr9o7uE/p6o4lXQ3sB2wvaSnZkCH7SRpOlmiWAP+e9r9A0mSyhus1wKkRsTbt5zTgNrIbAidExIKuxmJmZuun02Qh6T8i4kJJPyINIpgXEV/oaNuIOK5C8WWdrP9t4NsVym8BbuksTjMzq62imsXC9N5a60DMzKxxdZosIuLGNPlyRFyTXybpEzWLyszMGkq1XWfPqbLMzMx6oaI2i0OAQ4GBkn6YW7QVWUO0mZn1AUVtFn8ha684HJiRK18NnFGroMzMrLEUtVnMAeZIuioi/lGnmMzMrMFUe1PeSEnnAbukbdoGEtytVoGZmVnj6MpAgmeQXYpaW7twzMysEVWbLFZFxG9rGomZmTWsapPFdEnfJRsLKj+Q4MyaRGVmZg2l2mTR9gyJllxZAB/u2XDMzKwRVTuQ4P61DsTMzBpXVXdwS9pB0mWSfpvmh0n6dG1DMzOzRlHtcB8TyYYJ3zHNPwp8sQbxmJlZA6o2WWwfEZNJz9+OiDW4C62ZWZ9RbbJ4SdKbSc+0kLQPsKpmUZmZWUOptjfUmWSPPh0i6R6gP3B0zaIyM7OG0mnNQtI/S3pLup/iQ8BXyO6z+B2wtA7xmZlZAyi6DPUz4O9p+r3AV4GfAM8B42sYl5mZNZCiy1D9ImJlmh4FjI+Ia4FrJc2uaWRmZtYwimoW/SS1JZQDgDtyy6pt7zAzsyZX9IV/NfB7Sc8A/wv8AUDSW3FvKDOzPqPo4UffljQNGAD8LiIiLdoA+HytgzMzs8ZQeCkpIu6rUPZobcIxM7NGVO1NeWZm1oc5WZiZWSEnCzMzK1SzZCFpgqSnJc3PlW0naaqkx9L7tqlckn4oaZGkuZJG5LYZndZ/TNLoWsVrZmYdq2XNYiJwcLuyscC0iBgKTEvzAIcAQ9NrDDAOsuQCnEv2pL6RwLltCcbMzOqnZskiIu4CVrYrPgK4PE1fDhyZK78iMvcB20gaABwETI2IlRHxHDCVNyYgMzOrsXq3WewQEcvT9F+BHdL0QODJ3HpLU1lH5W8gaYykVkmtK1as6Nmozcz6uNIauNMNflG4YvX7Gx8RLRHR0r9//57arZmZUf9k8VS6vER6fzqVLwMG5dbbKZV1VG5mZnVU72QxBWjr0TQauCFXflLqFbUPsCpdrroNOFDStqlh+8BUZmZmdVSzkWMlXQ3sB2wvaSlZr6YLgMmSPg08ARyTVr8FOBRYBLwMnAIQESslfQt4MK33zdyQ6WZmVic1SxYRcVwHiw6osG4Ap3awnwnAhB4MzczMush3cJuZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhUpJFpKWSJonabak1lS2naSpkh5L79umckn6oaRFkuZKGlFGzGZmfVmZNYv9I2J4RLSk+bHAtIgYCkxL8wCHAEPTawwwru6Rmpn1cY10GeoI4PI0fTlwZK78isjcB2wjaUAJ8ZmZ9VllJYsAfidphqQxqWyHiFiepv8K7JCmBwJP5rZdmspeR9IYSa2SWlesWFGruM3M+qQNSzru+yNimaR/AqZKeji/MCJCUnRlhxExHhgP0NLS0qVtzcysc6XULCJiWXp/GrgeGAk81XZ5Kb0/nVZfBgzKbb5TKjMzszqpe7KQtLmkLdumgQOB+cAUYHRabTRwQ5qeApyUekXtA6zKXa4yM7M6KOMy1A7A9ZLajn9VRNwq6UFgsqRPA08Ax6T1bwEOBRYBLwOn1D9kM7O+re7JIiIWA++uUP4scECF8gBOrUNoZmbWgUbqOmtmZg3KycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWaGmSRaSDpb0iKRFksaWHY+ZWV/SFMlCUj/gJ8AhwDDgOEnDyo3KzKzv2LDsAKo0ElgUEYsBJE0CjgAeKjUqM+t1Bo+9uewQqrLkgsPqejxFRF0P2B2SjgYOjojPpPkTgb0j4rTcOmOAMWn2bcAjdQ+067YHnik7iF7E57Nn+Xz2nGY5l7tERP9KC5qlZlEoIsYD48uOoysktUZES9lx9BY+nz3L57Pn9IZz2RRtFsAyYFBufqdUZmZmddAsyeJBYKikXSVtDBwLTCk5JjOzPqMpLkNFxBpJpwG3Af2ACRGxoOSwekJTXTZrAj6fPcvns+c0/blsigZuMzMrV7NchjIzsxI5WZiZWSEnCzMzK+RkYWavI2lTSW8rO45mJWnrTpY17b0WThbW1JT5pKT/TPM7SxpZdlzNStK/ArOBW9P8cEnupt41t0vatn2hpAOB60uIp0c4WdSJpNWSXqjwWi3phbLja2KXAPsCx6X51WSDTlr3nEc2FtvzABExG9i1vHCa0nhguqR1w2ZIOh74GVDfAZ16UFPcZ9EbRMSWZcfQS+0dESMkzQKIiOfSjZvWPf+IiFWS8mXuX98FEXGppL8Bd6TaxCjgs8D+EbGk1ODWg5NFSST9E7BJ23xE/LnEcJrZP9IQ9gGQfs29Wm5ITW1B+hXcT9JQ4AvAvSXH1HQi4pcpYcwC/gy8PyKaYSDBDvmmvDqTdDjwPWBH4GlgF2BhRLyj1MCalKQTyH65jQAuB44GvhYR15QaWJOStBnwVeDAVHQbcH5E/K28qJqLpHlkP15E9v97BfBSmo+I2LPE8LrNyaLOJM0BPgzcHhF7Sdof+GREfLrk0JqWpD2AA8j+M06LiIUlh9S0JI2IiJllx9HMJO3S2fKIeKJesfQkJ4s6axuqOCWNvSLiVUlzIuLdZcfWjCT9EJgUEb5U0gMkTQfeAvwG+HVEzC85JGsQbrOov+clbQHcBVwp6WmyKqp1zwzga+m+gOvJEkdryTE1rYjYX9JbgGOAn0naiixpnF9yaE1D0uO8vlOAcvMREUPqH9X6c82iziRtDvwvWbflE4CtgSsj4tlSA2tykrYDPk42fP3OETG05JCanqR3Af8BjIoI9zCrkqQ3tyvagCz5ng3MjIiP1z+q9eeaRR2lXjs3RcT+ZD12Li85pN7krcAepA4DJcfStCS9nazDwMeBZ4FfA2eVGlSTafvhJ2kD4ETgS2Q3Oh4WEQ+VGNp6cbKoo4hYK+lVSVtHxKqy4+kNJF0IHAX8ieyL7VsR8XypQTW3CWTn8aCI+EvZwTQjSRsBnwLOAO4GjoyIReVGtf6cLOrvRWCepKnk2ioi4gvlhdTU/gTs2+x92BtFROxbdgy9wOPAGuD7ZPdY7ClpXXfZiLiupLjWi9ss6kzS6ArFERFX1D2YJiZpj4h4WNKISsvd/bNrJE2OiGNy9wisW0QT3xtQBkm/SJPtzyNk5/JTdQ6pR7hmUX/bRMQP8gWSTi8rmCZ2JjCG7AbH9oLsXharXtvf4EdLjaJ3mM9rN+WRplcAd0fE46VFtZ5cs6gzSTMjYkS7slkRsVdZMTUzSZu0v7u4UplVR9J3IuLLRWXWMUnnVijeDjgIOC8iJtU5pB7hZFEnko4DjgfeD/wht2hL4NWIOKCUwJpcB8n3DWVWnQ7O51xfhlp/qXv37c36t+nLUPVzL7Ac2J7XXzpZDcwtJaImlm4cGwhsKmkvXqvybwVsVlpgTUrS/wM+B+wmKf/3uCVwTzlR9S4RsVLthvNtJq5ZWFNKHQVOBlqA/B3bq4GJzdrjpCzp6W7bAv8NjM0tWh0RK8uJqndJ48B9PSKasj3NyaLOJK3mtV4SGwMbAS9FxFblRdW8JH08Iq4tO47exkPod1+FHmWQtVn8BTgpIh6uf1Trz5eh6iz/EKRUJT0C2Ke8iJqTpE9GxK+AwZLObL88Ii4qIaymlx6rehHthtAHPIR+9dr3KAvg2Yho6jHg/FjVEkXmf8h6SVjXbJ7etyC7rt7+Zd1zPtmPl0cjYleyod/vKzek5hIRT7R7/bnZEwX4MlTdSfpYbnYDsmvuH/Kds9YIPIS+dcSXoervX3PTa4AlZJeirBvS2FDnk43keyuwJ3BGukRlXech9K0i1yysqUmaHRHDJR1Fdq34TOAu/xLunjSE/t/IuiJ7CH1bxzWLOpO0OzAO2CEi3pkGGDvcD5fptra/4cOAayJiVRN3ZS9du2vrHkLf1nEDd/1dCpwD/AMgIuaSPbDHuucmSQ8D7wGmSepP9svYukHSakkvtHs9Kel6SbuVHZ+VxzWL+tssIh5o9+t3TVnBNLuIGJvaLVal54W8hNuA1sf3gaXAVWSXoo4FhgAzyZ51sV9ZgVm5nCzq7xlJQ0g37Ug6mmwYEOuG9KCZTwIfTAn498BPSw2quR3err1nfGoX+rKkr5QWlZXOyaL+TgXGA3tIWkb2oJQTyg2pqY0juwv+kjR/Yir7TGkRNbeXJR0D/CbNH81rl/XcG6YPc2+oOpP0JrL/gIPJhgB4gez+vG+WGVezqnQPgO8L6L7ULvEDYF+y5HAf2eNBlwHviYi7SwzPSuSaRf3dADxPdg3Yzzhef2slDYmIP8G6L7u1JcfUtCJiMa+/FyjPiaIPc7Kov50i4uCyg+hFvgRMl7Q4zQ8GTikvnObmrt3WEXedrb97Jb2r7CB6kXuAnwGvAivT9B9Ljai5uWu3VeSaRf29HzhZ0uPAK2TdE8NPIuu2K8jafb6V5o8Hfgl8orSImpu7dltFThb1d0jZAfQy74yIYbn56ZIeKi2a5ueu3VaRk0WdRcQTZcfQy8yUtE9E3AcgaW9e/+Q86xp37baK3HXWmpqkhcDbgLYnue0MPEJ26cSX97rIXbutI65ZWLNzz7Ke5a7dVpFrFma2jqT5EfHOsuOwxuOus2aW567dVpFrFma2TupJ9layhm137bZ1nCzMbB1Ju1Qqdy8+c7IwM7NCbrMwM7NCThZmZlbIycKsCyRNl3RQu7IvShrXwfp3SmqpT3RmteNkYdY1V/PGUViPTeVmvZaThVnX/AY4TNLGAJIGAzsCx0lqlbRA0jcqbSjpxdz00ZImpun+kq6V9GB6vS+Vf0jS7PSaJWnLGn82sw55uA+zLoiIlZIeIBs9+AayWsVk4L/Ssn7ANEl7pmdBVOMHwMURcbeknYHbgLcDZwOnRsQ9krbgtWdhm9WdaxZmXZe/FNV2CeoYSTOBWcA7gGEdbFvJR4AfS5oNTAG2SsnhHuAiSV8AtokIP1fCSuNkYdZ1NwAHSBoBbEb2hL6zgQPSnc43A5tU2C5/U1N++QbAPhExPL0GRsSLEXEB8BlgU+AeSXvU4sOYVcPJwqyLIuJFYDowgaxWsRXwErBK0g50/ICrpyS9XdIGwFG58t8Bn2+bkTQ8vQ+JiHkR8R3gQcDJwkrjZGHWPVcD7waujog5ZJefHgauIrt8VMlY4CbgXl7/9LkvAC2S5qaxmT6byr8oab6kuWTPxP5tz38Ms+p4uA8zMyvkmoWZmRVysjAzs0JOFmZmVsjJwszMCjlZmJlZIScLMzMr5GRhZmaFnCzMzKzQ/wHz3USypeKHFQAAAABJRU5ErkJggg==",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "pols = cleaner.text_category(series= tweets_df.polarity)\n",
+ "pols = pd.Series(pols)\n",
+ "\n",
+ "# top 5 sources of users\n",
+ "fig, ax = plt.subplots()\n",
+ "ax.tick_params(axis='x', labelsize=10)\n",
+ "ax.tick_params(axis='y', labelsize=10)\n",
+ "ax.set_xlabel('Values', fontsize=10)\n",
+ "ax.set_ylabel('Sentiments' , fontsize=10)\n",
+ "ax.set_title('Sentiment analysis based on polarity', fontsize=10)\n",
+ "pols.value_counts().plot(ax=ax, kind='bar')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.000000 2314\n",
+ "0.500000 435\n",
+ "0.100000 292\n",
+ "1.000000 255\n",
+ "0.400000 231\n",
+ " ... \n",
+ "0.301667 1\n",
+ "0.500168 1\n",
+ "0.417857 1\n",
+ "0.343750 1\n",
+ "0.421429 1\n",
+ "Name: subjectivity, Length: 710, dtype: int64"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df.subjectivity.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAExCAYAAACNsY6YAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbFElEQVR4nO3de7ildV338fcHUPHAUUbEAR0UEvGRkyNgVioYBw+hSYRiDoZOPQ+p5WOFZpmahvYkSaVJSo6mIkoEj1o4IWhKCMNpOBojQjKSDAwiqJHgtz/u34bFZu+518Csvfbs/X5d17rWff/uw/quPXPtz75/v/uQqkKSpHXZZNwFSJJmP8NCktTLsJAk9TIsJEm9DAtJUi/DQpLUy7DQRivJHyS5MsnKJJcm2a9n/T9O8uYp2p+Q5HMPsoajkzxhYP4jSXbv2ea89r4oySsfzOeuR33PS/L5UX6G5ofNxl2A9GAkeTbwYmCfqroryXbAwx/Mvqrqu8DhD7KUo4ErgO+2fb12iM/72Ta5CHgl8KkH+dnSjPHIQhurHYBbquougKq6pf3SJ8n1LTxIsjjJuQPb7Znk35Jcm+R1bZ1FSa5o05sm+bMkF7Yjlt+Y2DDJ7ye5PMllSY5PcjiwGPhkO7J5ZJJz22f+ZpI/G9j26CR/1abvbM3HAz/ftv2dJF9NstfANl9Lsufgl05yfpKnD8xPfN6+7XtdkuS8JE+d/AObfGSV5Ioki9r0q5Jc0Gr5cPs5bJrkY229y5P8ztD/OppzDAttrL4E7JTk35N8MMlzh9xuD+AA4NnAHw12ITXHALdX1bOAZwGvS7JzkkOBw4D9qmpP4H1V9TlgBXBUVe1VVT8e2M9pwMsG5n8VOGXSZx0H/Gvb9gTgo3RHKiT5GWDzqrps0jafAY5o6+wA7FBVK4BrgJ+vqr2BPwLeM+TPgyRPa/U9p6r2Au4BjgL2AhZW1f+qqmcAfzfsPjX3GBbaKFXVncAzgaXAGuAzSY4eYtMzqurHVXULcA6w76TlBwGvTnIp8A3gscCuwAuAv6uqH7XPX9tT3xrguiT7J3kssBvw9Z7aPgu8OMnDgF8HPjbFOqdyX5fZEcDEWMtWwGfbEdIJwNOn2HY6B9L9LC9s3/tA4MnAdcCTk/xlkkOAH6zHPjXHOGahjVZV3QOcC5yb5HJgCd0v2Lu57w+hzSdv1jMf4PVVddb9GpODH0SJp9D9Qr8GOL16bsRWVT9KspzuCOYIul/gk9dZneTWJHvQHQ38Zlv0LuCcqnpZ61o6d4qPGPy5wH0/mwDLquotkzdo3WAHt885gi7ENA95ZKGNUpKnJtl1oGkv4IY2fT33/aJ9+aRND0uyeftr/3nAhZOWnwX87/bXPUl+JsmjgeXAa5I8qrVv29a/A9himjJPp/vF/woe2AU13bYfAU4ELqyq26bZ72eA3wO2qqqVrW0rYHWbPnqa7a4H9mn17wPs3NrPBg5P8ri2bNskT2rjPptU1WnA2ya21fxkWGhj9RhgWZKrkqwEdgf+uC17B/CBJCvo+t8HraTrfjofeNfEoDj3HWF8BLgKuLh16XwY2Kyq/hk4E1jRumomBoo/BvzNxAD34Ae1X/ZXA0+qqgum+A4rgXvagPnvtG0uouvuWdf4wOeAI+m6pCa8D/jTJJcwfY/BacC2Sa4Efgv49/aZV9GFwZfaz3I53QkEC+mO2i4F/h54wJGH5o94i3LNd0meCby/qoYdJB9lLU+g60Larap+OuZypHt5ZKF5Lcli4NPAB2ZBLa+mG1T/A4NCs41HFpKkXh5ZSJJ6GRaSpF5z8jqL7bbbrhYtWjTuMiRpo3LRRRfdUlULplo2J8Ni0aJFrFixYtxlSNJGJckN0y2zG0qS1MuwkCT1MiwkSb0MC0lSL8NCktTLsJAk9TIsJEm9DAtJUi/DQpLUa05ewb2xWHTcF8Zdwpxy/fEvGncJ0pzlkYUkqddIwyLJ9Ukub4+cXNHatk2yPMm17X2b1p4kJyZZlWRle0bwxH6WtPWvTbJklDVLkh5oJo4snl9Ve1XV4jZ/HHB2Ve1K96D441r7ocCu7bUU+BB04QK8HdgP2Bd4+0TASJJmxji6oQ4DlrXpZcBLB9o/Xp3zga2T7AAcDCyvqrVVdRvdw+QPmeGaJWleG3VYFPClJBclWdratq+qm9r0fwLbt+mFwHcGtr2xtU3Xfj9JliZZkWTFmjVrNuR3kKR5b9RnQ/1cVa1O8jhgeZJrBhdWVSXZIA8Br6qTgJMAFi9e7IPFJWkDGumRRVWtbu83A6fTjTl8r3Uv0d5vbquvBnYa2HzH1jZduyRphowsLJI8OskWE9PAQcAVwJnAxBlNS4Az2vSZwKvbWVH7A7e37qqzgIOSbNMGtg9qbZKkGTLKbqjtgdOTTHzOp6rqn5NcCJya5BjgBuCItv4XgRcCq4AfAa8BqKq1Sd4FXNjWe2dVrR1h3ZKkSUYWFlV1HbDnFO23AgdO0V7AsdPs62Tg5A1doyRpOF7BLUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF4jD4skmya5JMnn2/zOSb6RZFWSzyR5eGt/RJtf1ZYvGtjHW1r7N5McPOqaJUn3NxNHFm8Erh6Yfy9wQlXtAtwGHNPajwFua+0ntPVIsjtwJPB04BDgg0k2nYG6JUnNSMMiyY7Ai4CPtPkABwCfa6ssA17apg9r87TlB7b1DwNOqaq7qurbwCpg31HWLUm6v1EfWfwF8HvAT9v8Y4HvV9Xdbf5GYGGbXgh8B6Atv72tf2/7FNvcK8nSJCuSrFizZs0G/hqSNL+NLCySvBi4uaouGtVnDKqqk6pqcVUtXrBgwUx8pCTNG5uNcN/PAX4pyQuBzYEtgQ8AWyfZrB097AisbuuvBnYCbkyyGbAVcOtA+4TBbSRJM2BkRxZV9Zaq2rGqFtENUH+5qo4CzgEOb6stAc5o02e2edryL1dVtfYj29lSOwO7AheMqm5J0gON8shiOr8PnJLkT4BLgI+29o8Cn0iyClhLFzBU1ZVJTgWuAu4Gjq2qe2a+bEmav2YkLKrqXODcNn0dU5zNVFX/BfzKNNu/G3j36CqUJK2LV3BLknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeo1VFgkecaoC5EkzV7DHll8MMkFSf5Pkq1GWpEkadYZKiyq6ueBo4CdgIuSfCrJL460MknSrDH0mEVVXQu8Dfh94LnAiUmuSfLLoypOkjQ7DDtmsUeSE4CrgQOAl1TV09r0CSOsT5I0C2w25Hp/CXwEeGtV/Xiisaq+m+RtI6lMkjRrDNsNdXpVfWIwKJK8EaCqPjGSyiRJs8awYfHqKdqO3oB1SJJmsXV2QyV5BfBKYOckZw4s2gJYO8rCJEmzR9+YxXnATcB2wJ8PtN8BrBxVUZKk2WWdYVFVNwA3AM+emXIkSbPROscsknytvd+R5AcDrzuS/KBn283bVd+XJbkyyTta+85JvpFkVZLPJHl4a39Em1/Vli8a2NdbWvs3kxz8kL+1JGm9rDMsqurn2vsWVbXlwGuLqtqyZ993AQdU1Z7AXsAhSfYH3gucUFW7ALcBx7T1jwFua+0ntPVIsjtwJPB04BC6W49s+iC+qyTpQRr2orwTk6xXV1R17myzD2uvoruQ73OtfRnw0jZ9WJunLT8wSVr7KVV1V1V9G1gF7Ls+tUiSHpphT529CPjDJN9K8v+SLB5moySbJrkUuBlYDnwL+H5V3d1WuRFY2KYXAt8BaMtvBx472D7FNoOftTTJiiQr1qxZM+TXkiQNY9gbCS6rqhcCzwK+Cbw3ybVDbHdPVe0F7Eh3NLDbQ6i177NOqqrFVbV4wYIFo/oYSZqX1vfhR7vQ/cJ/EnDNsBtV1feBc+jOqto6ycRZWDsCq9v0arq72tKWbwXcOtg+xTaSpBkw7JjF+9qRxDuBK4DFVfWSnm0WJNm6TT8S+EW6GxGeAxzeVlsCnNGmz2zztOVfrqpq7Ue2s6V2BnYFLhju60mSNoRhbyT4LeDZVXXLeux7B2BZO3NpE+DUqvp8kquAU5L8CXAJ8NG2/keBTyRZRXd1+JEAVXVlklOBq4C7gWOr6p71qEOS9BD13e5jt6q6BrgQeGKSJw4ur6qLp9u2qlYCe0/Rfh1TnM1UVf8F/Mo0+3o38O511SpJGp2+I4s3AUu5/60+JkycBitJmuP6bvextE0e2v7yv1eSzUdWlSRpVhn2bKjzhmyTJM1BfWMWj6e7AO6RSfYG0hZtCTxqxLVJkmaJvjGLg+kecrQj3bjFRFj8AHjr6MqSJM0mfWMWy+hOf315VZ02QzVJkmaZYccsnjlxgR1Akm3adRKSpHlg2LA4tN2yA4Cqug144UgqkiTNOsOGxaZJHjEx027f8Yh1rC9JmkOGvd3HJ4Gzk/xdm38N9z17QpI0xw0VFlX13iSXAS9oTe+qqrNGV5YkaTYZ9sgCujvG3l1V/5LkUUm2qKo7RlWYJGn2GPYW5a+je9Tph1vTQuAfR1STJGmWGXaA+1jgOXQX41FV1wKPG1VRkqTZZdiwuKuq/ntipj3JrkZTkiRpthk2LL6S5K1094j6ReCzwP8fXVmSpNlk2LA4DlgDXA78BvBF4G2jKkqSNLsMe+rsT4G/bS9J0jzTd4vyU6vqiCSX88AxiqJ7VvZfVNUZoypQkjR+fUcWb2zvL55m+XZ0V3cbFpI0h61zzKKqbmrvNwB3AXsCe9CdHXVDVV0EHDXyKiVJYzXsRXmvBS4Afhk4HDg/ya8DtMCQJM1hw97u43eBvavqVoAkj6V7BvfJoypM0ngtOu4L4y5hzrj++BeNu4SHbNhTZ28FBu8DdUdrkyTNA31nQ72pTa4CvpHkDLqzoA4DVo64NknSLNHXDbVFe/9We03w7CdJmkfWGRZV9Y6ZKkSSNHsNNcCd5BymuHFgVR2wwSuSJM06w54N9eaB6c2BlwN3b/hyJEmz0bD3hpp8LcXXk1wwgnokSbPQsN1Q2w7MbgIsBrYaSUWSpFln2G6oi7hvzOJu4HrgmFEUJEmafdZ5UV6SZyV5fFXtXFVPBt4BXNNeV/Vsu1OSc5JcleTKJG9s7dsmWZ7k2va+TWtPkhOTrEqyMsk+A/ta0ta/NsmSh/qlJUnrp+8K7g8D/w2Q5BeAPwWWAbcDJ/Vsezfwf6tqd2B/4Ngku9M9SOnsqtoVOLvNAxwK7NpeS4EPtc/dFng7sB+wL/D2iYCRJM2MvrDYtKrWtulfBU6qqtOq6g+BXda1YVXdVFUXt+k7gKuBhXRXfy9rqy0DXtqmDwM+Xp3zga2T7AAcDCyvqrVVdRuwHDhkfb6kJOmh6Q2LJBPjGgcCXx5YNux4B0kWAXsD3wC2n7j1OfCfwPZteiHwnYHNbmxt07VP/oylSVYkWbFmzZphS5MkDaEvLD4NfKXdE+rHwL8CJNmFriuqV5LHAKcBv11VPxhcVlXFFBf7PRhVdVJVLa6qxQsWLNgQu5QkNX23+3h3krOBHYAvtV/u0IXM6/t2nuRhdEHxyar6h9b8vSQ7VNVNrZvp5ta+GthpYPMdW9tq4HmT2s/t+2xJ0obTe4vyqjq/qk6vqh8OtP37xHjEdJIE+ChwdVW9f2DRmcDEGU1LuO+mhGcCr25nRe0P3N66q84CDkqyTRvYPqi1SZJmyNDjDg/Cc4BfAy5PcmlreytwPHBqkmOAG4Aj2rIvAi+kux36j4DXAFTV2iTvAi5s671zYNBdkjQDRhYWVfU1INMsPnCK9Qs4dpp9nYxP5ZOksRn2SXmSpHnMsJAk9TIsJEm9DAtJUi/DQpLUy7CQJPUyLCRJvQwLSVIvw0KS1MuwkCT1MiwkSb0MC0lSL8NCktTLsJAk9TIsJEm9DAtJUi/DQpLUy7CQJPUyLCRJvQwLSVIvw0KS1MuwkCT1MiwkSb0MC0lSL8NCktTLsJAk9TIsJEm9DAtJUi/DQpLUy7CQJPUyLCRJvUYWFklOTnJzkisG2rZNsjzJte19m9aeJCcmWZVkZZJ9BrZZ0ta/NsmSUdUrSZreKI8sPgYcMqntOODsqtoVOLvNAxwK7NpeS4EPQRcuwNuB/YB9gbdPBIwkaeaMLCyq6qvA2knNhwHL2vQy4KUD7R+vzvnA1kl2AA4GllfV2qq6DVjOAwNIkjRiMz1msX1V3dSm/xPYvk0vBL4zsN6NrW269gdIsjTJiiQr1qxZs2GrlqR5bmwD3FVVQG3A/Z1UVYuravGCBQs21G4lScx8WHyvdS/R3m9u7auBnQbW27G1TdcuSZpBMx0WZwITZzQtAc4YaH91Oytqf+D21l11FnBQkm3awPZBrU2SNIM2G9WOk3waeB6wXZIb6c5qOh44NckxwA3AEW31LwIvBFYBPwJeA1BVa5O8C7iwrffOqpo8aC5JGrGRhUVVvWKaRQdOsW4Bx06zn5OBkzdgaZKk9eQV3JKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSehkWkqRehoUkqZdhIUnqZVhIknoZFpKkXoaFJKmXYSFJ6mVYSJJ6GRaSpF6GhSSpl2EhSeplWEiSem00YZHkkCTfTLIqyXHjrkeS5pONIiySbAr8NXAosDvwiiS7j7cqSZo/NoqwAPYFVlXVdVX138ApwGFjrkmS5o3Nxl3AkBYC3xmYvxHYb3CFJEuBpW32ziTfnKHa5oPtgFvGXUSfvHfcFWgM/L+5YT1pugUbS1j0qqqTgJPGXcdclGRFVS0edx3SZP7fnDkbSzfUamCngfkdW5skaQZsLGFxIbBrkp2TPBw4EjhzzDVJ0ryxUXRDVdXdSX4LOAvYFDi5qq4cc1nzid17mq38vzlDUlXjrkGSNMttLN1QkqQxMiwkSb0MC0lSL8NCktTLsNCU0nlVkj9q809Msu+465I0Hp4NpSkl+RDwU+CAqnpakm2AL1XVs8ZcmuapJHcAU/3CClBVteUMlzSvbBTXWWgs9quqfZJcAlBVt7ULIqWxqKotxl3DfGZYaDo/abeGL4AkC+iONKRZIcnjgM0n5qvqP8ZYzpznmIWmcyJwOvC4JO8Gvga8Z7wlSZDkl5JcC3wb+ApwPfBPYy1qHnDMQtNKshtwIF2f8NlVdfWYS5JIchlwAPAvVbV3kucDr6qqY8Zc2pxmN5SmlORE4JSq+utx1yJN8pOqujXJJkk2qapzkvzFuIua6wwLTeci4G1JnkrXHXVKVa0Yc00SwPeTPAb4KvDJJDcDPxxzTXOe3VBapyTbAi+nuy38E6tq1zGXpHkuyaOBH9ONuR4FbAV8sqpuHWthc5xHFuqzC7Ab3eMWHbPQWLUz9D5fVc+nOztv2ZhLmjc8G0pTSvK+dsbJO4ErgMVV9ZIxl6V5rqruAX6aZKtx1zLfeGSh6XwLeHZV3TLuQqRJ7gQuT7KcgbGKqnrD+Eqa+xyz0P0k2a2qrkmyz1TLq+rima5JGpRkyRTNVVUfn/Fi5hGPLDTZm4ClwJ9Psazozm+XxmnrqvrAYEOSN46rmPnCIwtNKcnmVfVffW3STEtycVXtM6ntkqrae1w1zQceWWg65wGTu6KmapNmRJJXAK8Edk5y5sCiLYC146lq/jAsdD9JHg8sBB6ZZG+6W30AbAk8amyFSd0fKzcB23H/btI7gJVjqWgesRtK99MGD48GFgODV2zfAXysqv5hHHVJGi/DQlNK8vKqOm3cdUiTTXoI0sOBhwE/9OFHo2U3lO4nyauq6u+BRUneNHl5Vb1/DGVJ9xp8CFKSAIcB+4+vovnBK7g12aPb+2PoBg4nv6RZozr/CBw87lrmOruhJG1UkvzywOwmdONrz62qZ4+ppHnBIwtNqd0basskD0tydpI1SV417rok4CUDr4PpTr44bKwVzQMeWWhKSS6tqr2SvAx4Md2V3V+tqj3HXJqkMfDIQtOZOPnhRcBnq+r2cRYjTUjyM+1o94o2v0eSt427rrnOsNB0Pp/kGuCZwNlJFgDe6kOzwd8CbwF+AlBVK+kezqURMiw0pao6DvhZuudY/ITuVtD2C2s2eFRVXTCp7e6xVDKPeJ2FppTkYcCrgF/oTmXnK8DfjLUoqXNLkqfQLsxLcjjdbUA0Qg5wa0pJPkJ3ZezEYyt/Dbinql47vqokSPJk4CS6I9/bgG8DR1XVDWMtbI4zLDSlJJdNPvNpqjZppiV5BHA4sAjYFvgB3fV57xxnXXOd3VCazj1JnlJV34J7/5q7Z8w1SQBnAN8HLga+O95S5g/DQtP5XeCcJNe1+UXAa8ZXjnSvHavqkHEXMd94NpSm83Xgw8BP6R4s82Hg38ZakdQ5L8kzxl3EfOOYhaaU5FS6vuBPtqZX0j37+FfGV5UESa4CdqEb2L6L7gFdVVV7jLWwOc6w0JSSXFVVu/e1STMtyZOmavdsqNFyzELTuTjJ/lV1PkCS/bj/k/OksTAUxsMjC00pydXAU4H/aE1PBL5Jd6Wsh/zSPGNYaErTHepP8K87aX4xLCRJvTx1VpLUy7CQJPUyLKT1kOScJAdPavvtJB+aZv1zkyyemeqk0TEspPXzaR74oJ0jW7s0ZxkW0vr5HPCiJA8HSLIIeALwiiQrklyZ5B1TbZjkzoHpw5N8rE0vSHJakgvb6zmt/blJLm2vS5JsMeLvJk3Li/Kk9VBVa5NcABxKd/fTI4FTgfe0ZZvSPYZ2j/a4z2F8ADihqr6W5InAWcDTgDcDx1bV15M8Bh9rqzHyyEJaf4NdURNdUEckuRi4BHg6sD63RXkB8FdJLgXOBLZs4fB14P1J3kB3Xy4fHaqxMSyk9XcGcGCSfYBH0d2V983Age3K9i8Am0+x3eBFTYPLNwH2r6q92mthVd1ZVccDrwUeCXw9yW6j+DLSMAwLaT1V1Z3AOcDJdEcVWwI/BG5Psj1dF9VUvpfkaUk2AV420P4l4PUTM0n2au9PqarLq+q9wIWAYaGxMSykB+fTwJ7Ap6vqMrrup2uAT9F1H03lOODzwHnATQPtbwAWJ1nZbr/9m639t5NckWQl8BPgnzb815CG4+0+JEm9PLKQJPUyLCRJvQwLSVIvw0KS1MuwkCT1MiwkSb0MC0lSr/8BUAzAbLw3aFcAAAAASUVORK5CYII=",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "subs = cleaner.text_category(series= tweets_df.subjectivity)\n",
+ "subs = pd.Series(subs)\n",
+ "\n",
+ "# top 5 sources of users\n",
+ "fig, ax = plt.subplots()\n",
+ "ax.tick_params(axis='x', labelsize=10)\n",
+ "ax.tick_params(axis='y', labelsize=10)\n",
+ "ax.set_xlabel('Values', fontsize=10)\n",
+ "ax.set_ylabel('Subjectivity' , fontsize=10)\n",
+ "ax.set_title('Subjectivity values', fontsize=10)\n",
+ "subs.value_counts().plot(ax=ax, kind='bar')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Hashtags"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 [{'text': 'City', 'indices': [132, 137]}]\n",
+ "1 [{'text': 'China', 'indices': [18, 24]}, {'tex...\n",
+ "2 [{'text': 'XiJinping', 'indices': [127, 137]}]\n",
+ "3 [{'text': 'XiJinping', 'indices': [9, 19]}]\n",
+ "4 []\n",
+ " ... \n",
+ "7435 [{'text': 'China', 'indices': [29, 35]}, {'tex...\n",
+ "7436 [{'text': 'exactly', 'indices': [29, 37]}, {'t...\n",
+ "7437 [{'text': 'Taiwan', 'indices': [168, 175]}, {'...\n",
+ "7438 [{'text': 'China', 'indices': [17, 23]}, {'tex...\n",
+ "7439 [{'text': 'Pelosi', 'indices': [16, 23]}]\n",
+ "Name: hashtags, Length: 7440, dtype: object"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df.hashtags"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[] 527\n",
+ "[{'text': 'Taiwan', 'indices': [0, 7]}] 62\n",
+ "[{'text': 'ThankYou', 'indices': [0, 9]}, {'text': 'JoeBiden', 'indices': [20, 29]}, {'text': 'Nides', 'indices': [42, 48]}, {'text': 'Pelosi', 'indices': [63, 70]}, {'text': 'IsraelHasTheRightToDefendItself', 'indices': [72, 104]}, {'text': 'IAmAGoodJew', 'indices': [107, 119]}] 20\n",
+ "[{'text': 'Taiwan', 'indices': [36, 43]}] 20\n",
+ "[{'text': 'Taiwan', 'indices': [44, 51]}] 18\n",
+ " ... \n",
+ "[{'text': 'China', 'indices': [25, 31]}, {'text': 'Taiwan', 'indices': [32, 39]}, {'text': 'US', 'indices': [40, 43]}, {'text': 'TechStocks', 'indices': [111, 122]}] 1\n",
+ "[{'text': 'Taiwan', 'indices': [42, 49]}, {'text': 'Chinese', 'indices': [92, 100]}, {'text': 'France', 'indices': [115, 122]}] 1\n",
+ "[{'text': 'Baerbock', 'indices': [0, 9]}, {'text': 'BaerbockRuecktritt', 'indices': [10, 29]}, {'text': 'pelositaiwan', 'indices': [30, 43]}, {'text': 'pelosivisittotaiwan', 'indices': [44, 64]}, {'text': 'CNN', 'indices': [186, 190]}] 1\n",
+ "[{'text': 'BREAKING', 'indices': [14, 23]}, {'text': 'Taiwan', 'indices': [25, 32]}, {'text': 'Chinese', 'indices': [80, 88]}] 1\n",
+ "[{'text': 'China', 'indices': [17, 23]}, {'text': 'Taiwan', 'indices': [45, 52]}] 1\n",
+ "Name: hashtags, Length: 5697, dtype: int64"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df.hashtags.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As we can see there are null hashtags"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### removing null hashtags"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 [{'text': 'City', 'indices': [132, 137]}]\n",
+ "1 [{'text': 'China', 'indices': [18, 24]}, {'tex...\n",
+ "2 [{'text': 'XiJinping', 'indices': [127, 137]}]\n",
+ "3 [{'text': 'XiJinping', 'indices': [9, 19]}]\n",
+ "4 []\n",
+ " ... \n",
+ "7435 [{'text': 'China', 'indices': [29, 35]}, {'tex...\n",
+ "7436 [{'text': 'exactly', 'indices': [29, 37]}, {'t...\n",
+ "7437 [{'text': 'Taiwan', 'indices': [168, 175]}, {'...\n",
+ "7438 [{'text': 'China', 'indices': [17, 23]}, {'tex...\n",
+ "7439 [{'text': 'Pelosi', 'indices': [16, 23]}]\n",
+ "Name: hashtags, Length: 7440, dtype: object"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# take the rows from that have values in the hashtag columns\n",
+ "hashtags_list_df = tweets_df.loc[tweets_df[\"hashtags\"] != \" \"]\n",
+ "hashtags_list_df = hashtags_list_df['hashtags']\n",
+ "hashtags_list_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### flatten the hashtags"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " hashtag \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " [{'text': \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 'City', \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 'indices': \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " [132, \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 137]}] \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " hashtag\n",
+ "0 [{'text':\n",
+ "1 'City',\n",
+ "2 'indices':\n",
+ "3 [132,\n",
+ "4 137]}]"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#create dataframe where each hashtag gets its own row\n",
+ "flattened_hashtags = []\n",
+ "for hashtags_list in hashtags_list_df:\n",
+ " hashtags_list = hashtags_list.split(\" \")\n",
+ " for hashtag in hashtags_list:\n",
+ " flattened_hashtags.append(hashtag)\n",
+ "flattened_hashtags_df = pd.DataFrame(flattened_hashtags, columns=['hashtag'])\n",
+ "flattened_hashtags_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "hashtag \n",
+ "'indices': 27668\n",
+ "{'text': 20755\n",
+ "[{'text': 6913\n",
+ "'Taiwan', 5063\n",
+ "'China', 2439\n",
+ " ... \n",
+ "'himalayas', 1\n",
+ "'OPERATIVES', 1\n",
+ "'historical', 1\n",
+ "'ONEPIECE1056', 1\n",
+ "'antiwhitism', 1\n",
+ "Length: 5709, dtype: int64"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "flattened_hashtags_df.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAASsAAADnCAYAAABG+XDPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA2xklEQVR4nO2dd3ic1ZX/P2eqimW5V4FlXMAYYwO2AYMLZgMkIrAQQkggcTAkIQFCIOwvStdmyaIUYJcQQk1IIZuQLGHZiEBYEhdMsXHBchGhWO69SFabkWbO74/7CsayyjTpnRndz/PoYfTOfd97JEZf33vuKaKqWCwWS6bjcdsAi8ViiQcrVhaLJSuwYmWxWLICK1YWiyUrsGJlsViyAitWFoslK7BiZbFYsgIrVhaLJSuwYmWxWLICK1YWiyUrsGJlsViyAitWFoslK7BiZbFYsgIrVhaLJSuwYmWxWLICK1YWiyUrsGJlsViyAitWFoslK7BiZckoRCRfRJaKyAQRWeJcmyki9yf4nAoRudN5/T0R+ac02lgbx5gfi8jCdM1pAZ/bBlgsHVgMPA1E2i+o6hvAG8k+UFW/kwa7EuUnwKPA31yYOyexKytLpnEt8D8YsToEICILROTPzusKEfm5iCwRkfdE5MvtN4rIN0XkHyLyMnByzPUnROQq5/UsEXlFRN4UkZUiUiQiXhH5kYisEpH1IvIFZ+xoEVkmIutEZIOIzHUeub+nH0JVtwJDRWRUen4tFruysmQMIhIATlLVWufSlV0MPQW4ACgC3hKRnwGnA9cAMzCf6zXA6k6e/3vgE6q6SkQGAs3ADUCdqs4SkSCwQkT+6sz/gqp+X0S8QAGAqs6KeeZzwI2quqsTO9cA5wH/ndAvwtIpVqwsmcQw4Egc46pUNQSERGQfMBKYC/xJVZsAROTZTu47GditqqsAVLXeGXsRcHr76gsoBiYBq4Cfi4gfeEZV13V8oKp+pBs79wFj4vh5LHFgxcqSSTQDeXGMC8W8jpD651iAW1X1hePeEJkHlAFPiMi9qvqrBJ6bh/mZLGnA+qwsGYOqHga8IhKPYHVkGfDPzmliEfDRTsa8BYwWkVkAjr/KB7wAfNFZQSEik0WkUETGAXtV9VHgMeDM7gwQkbtF5IqYS5OBDUn8LJZOsGJlyTT+Cpyf6E2qugbjj3oT+AtmC9dxTBj4BPATEXkTeBGz+nkM2ASsEZENwMOY1doC4E0RWevc958dnykiz4lI+1ZvGrDHue4HJpLCKablWMS2j7dkEiJyJnC7qn7abVsSRUReUNWLnddXAGeq6rddNitnsD6rfkppedUYzKnaOGBoF1+FgALRTr5aMA7kvZ187QTeqa0si5AgqrpGRP4uIl5VTfh+N2kXKgcfcI9btuQidmWV45SWV5UAM4EpGHFq/xrYy1OHgBqgGuO32QBU11aWbevleS05ihWrHKO0vGoSMC/mq9RVg47nMPAysBRYAqytrSyLumqRJSuwYpXllJZXFQOXYY7X5wGj3bUoYQ5hUlJeAJ6vrSzb4bI9lgzFilUW4gjU5cDHgYuAgLsWpQ0FVgC/BZ6qrSw76LI9lgzCilWWUFpeVQBcBVwNfIjcEaiuaMWEFvwWeKa2sqzRZXssLmPFKsMpLa86FbgJ+AwmDaQ/0gQ8BdxXW1m23m1jLO5gxSoDKS2vEuAjwO3AhS6bk2m8BNwL/KW2ssx+ePsRVqwyiNLyKh/wWeBOYkqcWDplE/AfwK9rK8taXLbF0gdYscoAnJXU1cC/YbL9LfGzD/N7e7i2sqzVbWMsvYcVK5cpLa/6MPB94Ay3bcly3ga+UVtZ9ke3DbH0DlasXKK0vGoOcDcmNsqSPl4F7qytLHvFbUMs6cWKVR9TWl41GPgxpta4pfd4GrijtrJsq9uGWNKDFas+pLS8qr3MyEi3beknNAD/D3jInhxmP1as+oDS8qoTgAeBS922pZ/yd+DG2sqy99w2xJI8Vqx6mdLyqpsxvqkit23p5zQC3wB+YldZ2YkVq16itLxqEPAEJofPkjksB66trSzb7rYhlsSwYtULlJZXnQX8ARjvti2WTjkAXFNbWfaS24ZY4sfWYE8zpeVVX8RUDrBClbkMA14oLa/6mtuGWOLHrqzSRGl51QDgEeCTbttiSYingc/WVpYdddsQS/dYsUoDTj3zv2C6AluyjxrgytrKss1uG2LpGitWKVJaXnUK8Dym8YIlezkClNnI98zF+qxSoLS86hxMPXErVNnPIODF0vKqS9w2xNI5VqySpLS8qgxTW2mo27ZY0kYB8GxpedU1bhtiOR4rVklQWl51PfAM5sNtyS38wJOl5VU3uW2I5VisWCVIaXnVIuBxbIPYXMYD/Ky0vOobbhti+QDrYE+A0vKqjwG/B7xu22LpM26vrSz7D7eNsFixipvS8qqLgWfJ/a4ylmNR4NO1lWVPum1If8eKVRyUllfNxYQnWB9V/6QVuLy2suwvbhvSn7Fi1QNOnt/fgIFu22JxlSbgwtrKstfcNqS/YsWqG0rLq8YCq7HF8iyGQ8Dc2sqyTW4b0h+xp4FdUFpeFcTkjVmhsrQzBHiutLxqmNuG9EesWHXNg8Bst42wZBzjgN+VllfZE+E+xopVJ5SWV30J29DB0jUXAj9w24j+hvVZdcA5+XsJE8lssXTHlbWVZX9y24j+ghWrGErLq0YB67B+Kkt8HAHOrK0s2+K2If0Buw08lkewQmWJn0HA70vLq+wqvA+wYuXg5Px91G07LFnHLODrbhvRH7DbQN6Pp9qA+ZfSYkmUMHCGjb/qXezKyvAYVqgsyRMAHi8tr7J/T71Iv//llpZX3QjY6pCWVDkHuM1tI3KZfr0NdLZ/m7B5f5b00AScZk8He4f+vrL6AVaoLOmjAHjUbSNylX67siotrzoX04xU3LbFknNcXltZ9qzbRuQa/XNlVVEsp0rt17FCZekd/t3mDqaf/ilW8PGqwDc+/FTgX5cOoe6g28ZYco6pwGfcNiLX6H/bwIpiH7ARmAygSt3vIwvWfqft+nPD+IPuGmfJIbYDk2sry1rcNiRX6I8rq8U4QgUgQvE1viULNgYX71vs/YvtxmtJFycAt7htRC7Rv1ZWFcX5wNvA2K6G1GtB9Zdab+Pl6LRpfWeYJUc5BJxUW1lW57YhuUB/W1l9gW6ECmCgNE37TeDuaX8L3PFqqeze3kd2WXKTIcCtbhuRK/SflVVFsQA1xGwBe0KV0PLotNduab11Rj0DinvPOEsOsxsYV1tZ1uq2IdlOf1pZLSQBoQIQITjPWz1/XfALke/6frnUR5v9wFkSZTRwtdtG5AL9SaxuSvZGj+iQ630vzN8UXLzjGu/fXk+nUZZ+gc0ZTAP9YxtYUTwK2EaaShUf1gHrPh++I7hKT5mSjudZ+gXn1VaW2dPmFOgvK6sbSGNN9cHSMOOpwPdOeSHw/1aMZf/udD3XktN8xW0Dsp3cX1lVFHuALcCJvfF4VZpfjJ71+u2tX5rZSP6A3pjDkhNEgNLayrIdbhuSrfSHldVH6CWhAhAh/yLv6gXrgzc2/4vvd8s9RCO9NZclq/EC17htRDbTH8Qqacd6InhFh9/se3bupuD1713uWfFGX8xpyTo+4bYB2UxubwMrik/EbAH7XJT3afHqG8N3DlyvEyb19dyWjGZibWXZu24bkY3k+srqM7j0M46QurP+J/Dtk54NfHP5SA7tc8MGS0ZiV1dJkutidbGbk4vgPd2zZe5rwVsK7vf/ZEk+oSY37bFkBFaskiR3t4EVxUWYRFKf26a0E1HP7vvaPvbeA5F/ngNiC//1X06trSzb7LYR2UYur6zmk0FCBeCV6Og7/X84b2Nwcc2HPG+sc9sei2t8zG0DspFcFqsPuW1AVxRKaMqjgXtnrAjeuvJk2WY7ofQ/FrptQDaSy9vATUDGp8Oo0rZKT15xU/grpx2ieKjb9lj6hBZgUG1lWchtQ7KJ3FxZVRSPJQuECkAE32zPW/NXB7/oq/Q9siRAq/0A5z55mKaolgTITbHK4C1gV8SUV95vyyv3Cy5w24Bsw4pVhuGXSMl3/L+esz54w4bzPdXVbttj6TUWuG1AtpF7PitTEXQPMMJtU9LBlujIVxe3/svYLTqm1/IbLa4QwvitbPebOMnFldVUckSoAMZ79p77t8Cdo37tv3vpQBps44HcIQjMdNuIbCIXxSrnutKIEJhryyvnIlPdNiCbyEWxmui2Ab1FbHnlT3pfsuWVsx8rVglgxSoLCUjb+Lv9j5+9Nvj5dbOkxqZtZC+num1ANmHFKouJLa9cIvt3uW2PJWHsyioBrFhlOSLIyZ4d5y0P3Db4Uf89SwtpbnDbJkvcjCotrxrithHZQm6Jlam0kDMngYkgQv6HvKvn2/LKWYfdCsZJbolVP1tVdUZMeeUttrxyVpBQ493+TEpiJSLni8j1zuvhIjI+PWYlTb8Xq3bypHXifwZ+OnNl8IurT5d333bbHkuX9MudQDIkLVYi8l3ga8DXnUt+4DfpMCoFrFh1wJZXzniGu21AtpDKyuoK4DKgEUBVdwFF6TAqBaxYdYItr5zR2JVVnKQiVmE1iYUKICKF6TEpJUrcNiCTEWHAZd5XF2wI3lB3i/dPL0OuJYZmJXZlFSepiNVTIvIwMEhEPgf8H/BoesxKGtsROQ6c8srnbwoufusiz6q1btvTz7FiFSdJ1yhX1R+LyIeAeuBk4Duq+mLaLEuOfJfnzyoKJHTKI4H72KlDVy4O/8vwt/REtw9I+iNWrOIkpYYKjji5LVCxFLhtQDYyVg7Ofj5Q3vaGTl72hfDtU2155T7F/q7jJJXTwCtF5G0RqRORehE5KiL16TQuCezKKklE8M3y/GOeLa/c5wTcNiBbSMVn9UPgMlUtVtWBqlqkqgPTZViS5Lk8f9YTW175Bu9zr7ptTz/Am8hgEckXkaUiMkFEljjXPisiD3QYt0REZjqvU0rBEpHnRGRQkvdWiMhnexhzqYh8r6dnpSJWe1U10zL+cy0i3zX8Ein5tv8351YHb9g417PellfuPaS0vCqRz+1i4GkgbelUItKtO0hVP6KqR9I1XydUAR8VkW7dOKn4rN4Qkd8Dz2BKtAKgqk+n8MxUsUfxaaZImqf+IlDZ9tA7F/3BM2IObYNCEYo8nnCBz1sXDGlDoNUT8ke8bX4PbT6PJ+LzeCJejz8kQQmRRwt5EiJPQgQ8YYLSSsDbKn5fGz5fGz5fBK8/ijcQFQkoniBmW9S/ulVLQt25rwU+hRGrQwlOMwz4X+AuTHzkvwGHgVOAySLyDHACZofyn6r6iHNfLaaq6QDgL8DLwBxgJ3C5qjaLyATgp5gDgybgc6paAzQAzd3ZparqrBIvBZ7qalwqYjXQMeqi2Hkxqu8WURfnzln2+bz7n5m5+dx7H30nsHHqDZsOFA0dVdD21o7xfk9kVN7I4MDgSH/Y7ymo87S0HfIcbTosjTT56jyat9srgYagL9BEMNgYCgab2gLBpmgg0Nzm97eIz9ca8Hja8jyeSDHKYBF8USTaSqAlRKAlRF4oRDAUIi/cQl5rC/mtzeS3tZDf1kx+pIW8aDP50RbytcUIIyHyJEzQEybgCRPwtuL3OsLof18Y8QQVCSqSB+Qj4vaKPK7PrYgEgJNUtda5dGW8E4jISOBZ4Fuq+qKILADOBE5T1fZGu4tV9ZCI5AOrROS/VfVgh0dNAj6pqp8Tkacw3aV/AzwC3KSqb4vI2cCDwEJV/XGcJr4BzKU3xEpVr0/23l7Erqx6ge0+36G9g2XqilNDSxes/+mCo4Vj3103/dYhe3xFZ+4K1e2K1K9+OxKuociXP2xswcTdJ+VPbB0cPHWorykwSUT8IVrrjkjj7oOehiOH5Ghoj6dRj0qLN0TrgAjRMYg5GPH5QvV+f8uRYLDpaCDY1BQMNoaDwca2QYFmDQQOe/3+loDX25rv9bYNEYkWA4NFUt/6t6kvHCLQHCYYaiGvXRxjBTLiCGS0hTxHHNsFMijh91eOAU8rAV+bEUh/2wfi+L5AYsQx1qke2XPBjHg/tyXAQGcV87iqLnD8QdcBNe2DnFVKEaAishhzSr4V2MWxJ+YrgT+JSI2qXgN8WUSuwKyuAhhh6ihWW1R1nYjcglkBl4rIAMxK6w8xi8RgnD9TO/uAMd0NSFqsRKQE+AlwnnNpOXCbqu5I9plpwIpVL1Dr9x8FePjDnjlzNkfeKWrcOXHuK+XsGDvv1X9MvGqcp+CC+f6CCwhHmw69E1q/q+bon/OI1o/3iDcyPO+Et0oKJh8cmT8uMNw35kSPeMbGPltRbSK877CnYe+h1ob6g6GjrUcam7z10jIgRNsoRUcjXX1ONerzhQ4HAs1HgsGmhkCwqdkIXFMkGGjCH2jx+n2hoNfXmu/xRIpEooOAgSLHbjN9tAV8tAUK6ZsspKhKNGzEsaWFvEMQ9znGlZgtVUd/VQswuMO1gZi/72869/wRIwjrY8YIxsE/V0QuAf4JOBezKppB5wdW7S6fn2PygjdifMVHVHVGvD9IJ+TRw3YxlW3gL4DfAh93vr/OueZmz76jwGgX589J3vP7wwARr/jvucLTWP6HqApIyc5l547e/VrTxlOvX3Jg6LQ54ikY4ss/5zxf/jmotjZFwpve3NeyLrK3+a/TgUEA+d4Be8cUTKwdUzCxeWhw9OCAJ39ioQRHFEaDI0oYetyfYRSNNEjzjsPSeOCgHG045Gloq5Mmf6OEilppG9HWmjeyrS1vcFNTx7/Vroi2+QMtRwKB5rpgoKkhaASuNWi2p/gDLT6fLxT0elsLjMDpIJH05rx6UE8eocI8QoUDqU+k+ceVGLHwcay/ai9woYiMUtU9mFWVH2jD/E2MxDjm/wBcDfzAuW8M8Bime/mHgMOq2iRmW9xtR3Nn3CFgrKrWi8gWEfm4qv5BzPLqdFV9M/YeZzWGqj7grOBmq2p7IYTJwIbu5kxFrIar6i9ivn9CRL6SwvPSwU5sfaC0s83ve9+nsnaiZ/rWEdGXS/dxPoA3Gi44fcPDCxoLRtWunX7rgXBwkHNc7i/wBaef4wtORzXaFm19Z01by5qG5sjuye8eXXf2u0fXASBIZEhw9D/GFkzaOyp/vAwMDB3jwTve+cDjQbwDtaBkoBaUjGP4cWLWRqSlXpp3H5KGgwc9R5sOS0O0XpqDTRIe1EZkFNJxxeHxtYYLhrWGC4Y1xhmPKRIJ+/0thwPB5vpgoKkxGGxsDgabWoPBRg0EmsUIXDjP620r8HgiA0EHi8Qd8xdXe7V2fxXm5KxUVWP9Vc3AbcBzjtCMB74ErMMI2ekYUfo1cJOIHAU2YcTqdxgH+22AR0Q2A28R33JvB9Dez/Ja4Gci8i2MUP4OeLPD+FOAFc7rCZjsl3Yu4IMKLp2SilgdFJHrgP9yvv8kx+9v+xo3t6A5y26fzx/7/V3XeKc8en/ksMRsPQqb9pSe/+o3S3eOPm/lPyZ9Yox6vO8nlYt4fN7A5DO9gcmoqkbbdmyMhN7YH23dNk6JjD8Y2jX5YGjX5PWHlwLg9wTrRuef9M7YgklHh+WVDMj3DpggIp0unXx484bogPFDdMD4idFRx70fpq3+iDTuOeRpOHxQGloOexo4Ks35LbQOdvxlPSbgq3oD4XDhyHC4cGS8AUseT1uzP9B8OBhornf8by3BYFNbMNCkgUCz+Pwtfp8vnCei2+J85DDgCObE7XZMLi44rg9V/R/gf+B9n1WNqkac7d0s4ELgR8BvVPVBJwbrTVXdJiI7Mdu601X1uBNGVS11Xh4ATot562mMAOE46S/p4WcoBe5wXs9wfo5253++qnYbIpOKWC3G+Kzuw/zCXgHcdrrvdHn+nOSg13PMH3R9oQx9eo4s/9grOrfj2LG7V8wetXdl86ZTFi3dP3zG2Ygc4/cQEfH6T5jq9Z8AQLRt33ttoTXbouG3R0DrFEBao6HibY2bz9rW+EEY30D/sNqxBRN3jimYECkOjBjhE//EnuKDAAL4Bo7Q4oEjIsWdvt9M+MBhT8PeQ9JQd9DTED4ijd4GaSkI0Tosio5B8Hd6Yw9Eo778UEtRfqilqFunMbDzkovjemQzJkPjPmCliNRhVikPAieIyDpgoqoOAIZghAWMyH1PVS8RkRcxrpoKzOLiFCcsAYyP62MkVoygRz9TLKp6aczr62LeOhH4ak/3p3IauBVTzyqTsCurXqDR4zluVfP7eZ7zL1kdqS4MHd9U1httzZ+26bH5Tfkjtq2dfuueUN6Q2V092+MbcVLAd8lJFF6CRup3t4XWvB0Jbx6ANp9OzOezvvVAaX3dgdLNda+ZOcTXPCLvxE1jCyYdHpE/LljoKx7nEU/C/sp8AsPyo0OGjWHIcVtMRaMN0rLrsDTuPyhHjx7yNETqpMnbKC1FYdqGK4wi9dPIvfEMUtXDIjIQE36wGpiiqpc6q5IHgHOAd5wVUxDYLiJjgFHAbhE5D+OH2upsFa8Gpjl16BCRC4Bv00GsRORuYKWq/inW5+S8PZkPtnVJo6qr4hmXsFiJyHe6n1f/LdFnphErVr1Aa2cF4kTkrk968/79iUibdPE5Kmjed+J5r337xN0jZ6+qOfnaEerxjetuHvEOHO0vWDDaX7AAjTYfjoTWb4yEq/0arZ9GhyT1iLbl725+7/Tdze99MJ934J4xBRNqxxRMDA0Jjh4c8ORNcmKGkkIQT5HmjynS/DEnMuw4MYsQDddL8+7D0nDgoOdo0yFpiNRLc6BJQsWtREYiDItjmkR2A2FgT+wFVd0rIrcBz2FWXv+BiYOKiogf+DFGVD4KrAVuwsQz7WwXKodlwKkiMlpVd8dcn4YRSDjW5wQmEqAiAftTQjTB+msi0tlyrRC4ARjqLEPdoaJ4JhCXSlvio87jOXL+uJJBXb3/zd9Flk7fovN7ek5UfKHNp1z36t4RM88mQQExJ4ubqyOhda0aOTCV44/pO0WQtqHBse+MLZy0b1T+eG+Rf/BYr/hKE5k7FVppazgiTbuNv+xo82Fp5KinOa+Z8OAI0dGYU8bPVVRUPNbTsxwH+y5MBPnjwJ2x2ypnTENnf38iMhZ4XlWPWwXHMe8Lqnqx8/rPwJWqGhaRM4A7VPXTiT4zWRIWq2NuFinCnCLcgIk8vUdV3avzXVE8Ctjd4zhL3GwMBN6+ZuyoSV29Hwxr4xP3Ruq82n1AXzvNecN2rJ1+646W/GHnJGOPOVl8t7ottKZe23ZNBk1o6xfw5B8anT/+vbGFkxqHBUsG5HkLJ4pI5w6tXqaF8OFGCV0x/e6PLO1prLOl+xumgEAtRijiFSs/sEdV01aOxqll93ZMNH2vk5TPSkSGYLz61wK/BM5U1cPpNCxJ9gKtkJxT1HI8tX5ft2V/QgEpfPxiz4bPPx+NS6zyWw6UzHn9uyV7h5+1etOUTw9Rjz+hgn/mZHHSGd7AJFRVtW3HprbQ6n3R1q0nQGRCT/eHo81DtjZuGrK1cVP7JR0UGPHemIKJu8bkT9DiwPCRXvFNEJGEqiEkQx6BwXka2Brn8GYgT1V/7qTKJDZVAo7weHCj0GYy28AfYYLTHgF+qqqZ1QG4ongrH8R+WFLkgUHFyx8eXHzcqV9HHnygbeWwo3TpSO+MqHjDb03+5Ku7R51zFiZlIyWikf1bIi2rt0XC7wyD8KkkmRDtFX/jyPxxb48tmFQ3Iu/EvELfwPEint5o7NACFJZUzu0xN9DxvR0G/hkTjd6kqiOc9xYB38LELl2PCWv4lape6Lx/FvB9Vb3E+f7HwHOq+re0/0S9SDJiFcVE0bZxbHqLYBzs7ta0qiheDiZg0ZI6/zJ86JLnBxQu6GncyEO64/6HI0MkiWqtLcHBu9dOv3VLc8HIOUkZ2Qkard/d1rLWOVlsmkaKq+1C36CdYwombBtbMDE8ODBqqN8TnCQiiea/dWRdSeXcM+IZKCI3Y7JE/ghcBRx0TgOHYJKAZwLbMCk1NwDlMb6mO4GQqv7E+X4c8KiqXnT8TJlLwttAVXU7Q70n1mDFKm3s8Pvi2g7tHSIlS6fJkgXVuiDROfJCh0efu/J7o/cNm75205TrB0a9/h63cz0hnoGj/QXzR/sL5qPRlrpIeP2GSKjaq9G600lCUBvbjox9u3712LfrVwPgwRMemjd2U0nB5AMj80t9Rf7BJR7xJrqi7za9pAPXYkq73Nbh+sXAi061BDBlxj+DCR5t5zLg8vZvVHWriAyNSc/JClKqwZ6hvAx82W0jcoX9Xm/c1Vcf/rDnvDmbIu8GIiQlNiMOvHnGsJfvaPvHxKuX7hpz/pmYA5yUEU9esS9v9nm+vNmotjZHwjUrI6F1YY3sPxUTQJkwUaKB/S3bT93fsv39a0FPwYHRBSe9N7ZgUvOwvLEDg56CCU5sVFes7+a9D+z/oDRMlYjMw/xjfJqIPIdxujc6QaF7MeE7zar6rHPvcODeTnzKazChB/8d1w+cAeSqWFnSRJ3HE/dJWcQr/nuv8DSU/zH5smIejfpOeft388dv/cu+ddNvWd9YOOa8nu+KHxF/vi84bbYvOA3VaCTa+t66ttCaOm3bORF0bM9P6JpQtGlYbcOGYbUN7y+YooMDI98ZWzBpz+iCk3Sgf9gox3nfvjt5I85Ht6fagAldmKqq55mfR+4EDrVXPBCRb8feqKr7MQUyO9JjSZZMI/fEqqJuNxXF72GSPi0pEhJJ6Lh7zSTP9NoR0RWl+0hJZILhuhFnr/r+iANDpr65YeqN+VFvIO0J6iIerzcwcYY3YBp5R1t3bDYni7VjIZKO7t6ew+G9Ew+H907ccMT8G+qTwNGR+ePeLSk8+XDpgKnxilUzXfcX2AksiPm+BFgSxzPTfkLY22S6/ylZlrttQC4QhpAmKFYAd13jPUXNyVXKDDu0cfr85XdMKNnx96WoxlWhIFk8/pIpgQGXz88bfNvEwMBFtd7A1CVIYANprJPWpuGinU1vz3h9/5+HlVTOPRrPPc4Wzisd8iwdXgAuEpHBTrL3Rc41RORXIjLbeX23U5alnR5LsmQauSpWdiuYBnb7fEkF+NYXytA/zZG0/SEI6p38zh/nn/fK18MDjm5/mVQimePE4x1a6i+8eEHeoFtOCxZ/bp83OHMZUrAaE8eXDhLNqfsrnRwcOVUS/g2TubEKk7TcXjnhdEzUO5i0mT3wfpDoROLfhmYEVqwsXbLV70t6dfS7eZ7zG4OktStOsPXo8NmrK8+fsf6BDd5IqM86K4mnaKS/YN68vEE3nRUs/lKTL3/uCvEUv4ppupAsf49rbqf1Fqb8y2+cy0PF9Otc59SfOkFVJzpfv3DuG4iJMN8hIhXAJFVtr1F1KfBHVW3rYe7LRKTceZ22llrJkptiVVFXwwclMixJUuv3Jx/wKyJ3XeMNqonHSytDDtdMm7f8qyefuO3FZfRx5oQ5WZx1XrD4hnODg77s9RVcvFK8I5aTWC03xZzixUN76631GCd7K6a4Xshxqs8ErhORM4+ZQLVeVT8ecym2moIPuKdHI1WfVdXKOO2EOFtqJUtuipUh5dIV/Z0tfl9KW553x8jk9aXSK/8fBPVMfO+Zeee/8jUtqq9djmqfdzYS8eX5glNnBwdeNzc46CuD/IWXr/P4SpaB9FT9Y+1Xf//neP8xvRazqooAm1R1BXANptkDqtqIKRkzUUzj0+dFZLWILBeRU463WWZgakctE5E/tRc1FJEvi8gmEVkvIr9zrsU2T42rpRbGuX9pd+OSJZfFym4FU2S7P/UUy3s+5jkrIuzqeWRyBFobh8xa86O5Z6z7jxpvW/PG3pqnJ8zJ4oQZgaKr5+UNvr0kUHRNjcc/cQn43u5k+PPxPfOD1luqur1DKeP2MUMxtaw2YlLgblXVs4A7MYX5OvIr4GuqejpQDXzXuV4OnOFcv6njTar6Y1X9fRxmt7fUSju5LFZ/cduAbGe3zxvoeVT3tARkwOMXeeIt3Zs0g+veOXXey3eeWlr73HI06nZ5bTy+MacEBly2IG/wlycFBi7a6g1MW4oEqzE9Ap/t6X6H2PiqjswVkbUYx3slptVWezusdcDDdGie4lSXGKSq7VUefgnMc16vB550SpWnsnXvtfit3BWrirqNmChdS5Ic9njTUpvs/870nHOgyGxbehMBOam2au7cFV/zFte9uwzVtLVYTwWPd+g4f+GH5ucNunlasPjGDRD376K7+KrlqnqGqp6lqg8R0w4r5qvbDjUdKMOk6JyJaXCabAxmr8Vv5a5YGX7ltgHZTJOn8yYNyfCvn/KOUfqmMZ+/rWnQWWvvnXfW2nve8bU2pfVEMlXEM3DJV3//57hCL3qIr+o4th7YIiIfB1PrXkSmdxhTBxwWkfZt2qeBpU5E/Qmq+nfga0AxplV85z+DyC3tJY5F5Aqn9HE7vRa/leti9VvSFxfTr1DQiOk3lxb2DpGSZadJn1ZxLa7fcvLcFf9y2knv/e8KNLq/L+fuhnj8PrF0Gl/VBdcCN4jImxgf1uWdjFkE/EhE1mM6zHwP0+j0NyJSjSl9fL+qHulmnlP44PSzs5ZaVXHamxApVQrNCiqKn8XUn7YkwH6v58DCE0viqSEeN96Itv7ynsi2ZBOdU6HVm19Xfdrn3zwyaNIckt/ipMp2YNzNDy2M+4/OCUm4vS/LB/dEh/LGv8HYt99pXvHb9jpa6SbXV1ZgnIiWBNnu86V9JdKe6Jzu58aDP9JcfOab/zlv5pofbvG1NnZsvtlX/DERoQJQ1TXA3/uicmm8qOqlqhp2Xl/nJEtDnC21kqU/iNX/cmyrbUscbPX748pbS5Q1kzzTt45wL6xk4NFtk+at+H/TJ7z79CtotK9rOf2i5yHHo6o/1ww5LOgOVV2lqut66/m5L1YVdWES9xP0e7b4/S299ex0Jjony7jtL82Z9/KdhUMObVqKal/4NZfe/NDCjHL2Zxu5L1YGuxVMkFq/r9ecmXWFMiydic7J4ouEimas/+n8WW/cvc0fPtrbYS4/6eXn5zz9Q6wq6l4H3nLbjGxily++csbJ0huJzslS1LhzwtxXys+c9PZTr6GR3oi2307nBfAsCdA/xMrwkNsGZBMHvN6kOxnHRS8mOifLCTuXnjN/+Z3FQw9UL8FxIKeJn9380MKM9zllOv1JrB7BpAJY4uCoxzOot+fozUTnZPFGw4XTNzy04OxVd+0KhI6ko97TUeBn8QxsLwfjJCQvibk+W0SWichbIrJWRB4TkQKnbMudXTzrlWQNFpEneupN6ASGLk52jmToP2JVUddEHGUxLIawMLwv5untROdkKWzaU3r+q9+cefJbv10p0UhPVRS646GbH1p4JM6x7eVg3l+FObFLf8AkH5+sqmdgEqG7baahqmlra9YFPwdu7eU5jqH/iJXhp9g6Vz3SKNJA911Z0kZfJTony9jdK2bPe/mrQ4fvX7sE1URPSEPAfQmMjy0H0x5uczPwy5jCeajqH1V1r/PtqSKyRETeE5H3uzqJSIPz3wXO+38UkRoReVKcnl0i8h0RWSUiG0TkkfbrQB3Q7TZYVZuA2vayyX1B/xKrirpGEvvw9Et29kJAaHf0VaJzsnijrfnTNj624JyV39sXbDmUiJ1P3PzQwt3xDOymHMxpmHpVXXEKpnfgbOC7TsnijpwBfAU4FdNIpb2ZxwOqOktVTwPycepQqeptqhrPNrLXysF0Rv8SK8P9mP5qli7Y5vcd6es5+zLROVkKmvedeN5r3549ZfOvVkm0bWsPw0OY0i3x0l05mO6oUtWQqh7A+GQ7y+dcqao71BQoXAeUOtcvEJHXnZzAhcDUBOfu03ZecYlVjOPP63xfJCIrnBrQ7deWiEip8/obyRokIqUi8qmY7xeIyBM93BNwHJA953xV1DVgkjctXbDF7+9z0XAj0TlZRu99fdb85V8dNXLvqiWodlUO5b6bH1pYm8BjuyoHsxE4q5v7QjGvI3TeXu+4MU4lhweBq1R1GqbscdwNbR36tJ1XvCurxcDTMSH/C4GdTs2czo5kkxYrjOp/qqdBsTh5Si8Bn4jzlkeAzio4WoBav8+VcIKHPuKZE/byrhtzJ4pH24JTNz+x4NzXKw7mNR94rcPbe4B/T+R53ZSDeQBYJCJnt18QkSsdx3sqtM9zQEQGAFd1NkhiWnjFloZx6NN2XvGKVbvjr51BHB8GcAiIiEglkO+sup4EEJHrRGSlc+1hEfGKyCyn3nOeiBSKyEYROQ2zdJ7rjL0d4+iLp1/cM46dPVNR10ZqgprTbPf7pOdR6cfNROdkyW85UDLn9e+eM3Xjz1dLtG2Lc/lbNz+0MJncyuPKwTiO9GuAHzuhC5sxPqqUcjedEjCPYsTmBUwbr854v4UXx5aGAeP7ejEVOxKhxxIxjuNvm6qOirm2GJihql/u4p4GVR3gvJ4C/BBTUqJVRB4EXlPVX4nIXRiFzwd2qOrdTnzHnap6XNF5EZkJ3KSqN3bynhfYo6rxH7lXFL8CnBv3+H7CxSVjXt/l953d88je4UePt708bl/cNZwyhqh4w29Nvuap3aPnLLr5oYUJN7DI0HIwL6jqxc7r2NIwZwB39KWt8aysOnP8zQDijT25ELPnXiWmNvSFfNDa/XvAhzDthH7Y04NU9Y3OhMp5LwKERaTb+JMOfI5j9/MWoM7rSeR3mHYyIdE5GTwa8U1568n7kxEqyNhyMBfHvH6/NAxGF77dl7bEI1bHOP5E5GXMsvS3cc4hmDiR9rrQJ6tqhfPeUEz51CISd+51RhCIPxbG1Gn/VhrmzSmak2gZn04yJdE5CR6cUrM5pUOCLCoH86Kq1vblnD2KVUfHn6qeDzwOfL6b21pj4j1eAq4SkREAIjJERMY57z2MUecngR84147SQ3RuOyJSE/N6KHBAEy/3cS+wLMF7cpY2aIvSN9Hr3ZFJic5xsgv4pttG5DLxOtg7Ov7eAoZ0M/4RYL2IPKmqmzCrl786dZ9fBEaLyGeAVlX9LcapPktEFmJaAkVE5E3Hwf4+IjJTRB5zXg/DrNraSa72c0VdFPgspoljv2efz7sf00DAXTIw0bkHbptSs7m+52GWZImrBntHx5+IXI2Jz7i6l+3rzqZLMRG/9zvfPw2Uq+o/knpgRfHnMCLbr3k9L7jxxtEjEw0O7DW+9V+RpafX6ny37eiBJ6fUbL7ObSNynbj+Be3E8fd/wKDYoNC+RlX/HCNUAeCZpIUKoKLuUeC5NJmXtWzppXLGyfJjk+gcV8qKS2yge5eIJU3EvdyPdfyp6iFVvaiboNA+RVXDqpqOHoE30s/rtW/x+zPqdLQlIAN+nrmJzvXAx6bUbM7oNKFcwX3fRCZRUbcb+KLbZrjJtl4sZ5wsL57pOftgUZdBi25y/ZSazcmv5i0JYcWqIxV1TwG/cdsMt9jt83WWte86FZ/yjtY+zEOLg3um1Gx+2m0j+hNWrDrnc+Beuyg3Oej1dNk23E2cROfX3bbDYRlQ7rYR/Q0rVp1RUdeCab1d09PQXKOhD8oZJ8tDH/GclwGJznuAT0yp2ZwtIRU5gxWrrqioOwRcAhl9EpV22mCE2zZ0RcQr/vuu8Lh5WtmGEaq+bo5qwYpV91TUbQXKSDHDPVuo83iOINK7XW1SZPUkz4ytw3GrycRXptRsttkOLmHFqicq6tZiav30RddeV9nex+WMk+WuT3pPdiHR+VtTajb/tI/ntMRgxSoeKur+inG65zRb/b546oa5Tl2hDHvm3D5NdP7BlJrN3+/D+SydYMUqXirqfkkfl8Toa7b4/ZkUGtAt/zW/zxKdH5xSs9me/GUAVqwSoaLuLkzd6pxkq9/nejZC3PRNovOjwC09jrL0CVasEqWi7mZytOHEDr8vY4q+xcO7Y2Ryde91dP4p8IUpNZszLqK/v2LFKhkq6r6LySPMqVibfV5vOgog9im9lOh835SazbdYocosrFglS0Xd48BHyaE6WPUeT590YU4nTqJzTz38EqFySs3mO+IdHNOmboLTju5ipxrJOhFpcJo8rBORThPtReQxETk1feYf8+x42tgNF5Hne2P+dGPFKhUq6p4H5vNB94+sJmQKGmYdL57pOScNic4tmMTkryd432LgaUw/PlT1hfYS3piOxdc633+ms5tV9UanQKUrqOp+YLeInNfjYJexYpUqFXVrMB1ysjo1Jwwhdbn2eir866e8o1JIdN4OzJ1Ss/mJJO5tb1MXoZvyQiLyMxF5w2k5968x15c4FXA/LiL3OtduE5H3nNcniRi/nIh8R0RWicgGEXlERCTmGT9w2t39Q0TaW7qnv42di1ixSgcVdbWYHmpuRVanzC6fr2MfyKxizxA5YdlpsjKJW5cAZ02p2fxGojc6RR9PUtVaVd2uqld2M/ybqjoTOB2YLyKnd3h/OdAuMnOBgyIy1nndHjX/gKrOUtXTMO3rYtvV+VR1NvAV4LsAqvqKqt4Wx4/yRszcGYsVq3Rhcgn/CfiD26Ykwza/L+uLDibR0fk+4ENTajYnG7nfWZu6rrhaRNYAa4GpwDF+KlXdAwxwWsmdgOkeNQ8jIsudYReIyOsiUo3pih5bfrq9XM1qTFfzRNgHjEnwnj7HilU6qahroaLuakwBv6wJsASo9fsb3bYhVRJIdG4Grp1Ss/mOFKsnHNOmritEZDxwJ3Chqp6OaWzS2X2vANdjGrK0r7TOBVY43aUexPQ+mIaJAYt9RnuF1wjgS/DnyCMLPq9WrHqDirqHMI1d17lsSdxs8ftyIvfRSXTurhZZLTBnSs3mePtedknHNnXdMBBoBOpEZCTw4S7GLceI2jLMCuwCIKSqdXwgTAdEZAAmXzVuRGR2+4mkiIwVkZdi3p6MqSWf0Vix6i0q6jYDZ2P6EmZ8vM42f0YWCE2Kuz7pPUU73549BcycUrN5XRqn69im7jhU9U2M+NRgtndd+TaXY7aAy5zeBttxikCq6hHMamoD8AIkfPp5Ih+snkZzbIxgcm3s+pi4WnFZUqSieB7wGDDJbVO6oqxk9Kvb/P5z3bYjXXxySWT5Fa9qu9N4J/DFKTWb/zfd83RsU5epiMiPgF+r6noRuQXYpqrPOu8tAy53VooZixWrvqKiOB+TpnM7kHFpLXNOLKk+6vVMc9uOtKGqv7gvsqEwxMtAeW82IBWRxcAvM6HTU6KIyHDgPFV9xm1besKKVV9TUTwLeBzIKGGYUXrCjohIidt2pJF1g4/qF5fdsuE1tw2xpAfrs+prKupWATOAz4Dr9cQBUNAIjHTbjjRxBFMpYaYVqtzCrqzcpKLYhzmq/jbGseoK+72e/QtPLBnu1vxpogG4H7inelF11seMWY7HilUmUFEcxLQg/wYwqq+nXxMMbF40ZtSUvp43TTQCDwA/ql5UfdBtYyy9hxWrTMI44W8GvoaJju4T/jSgcOV3hg+d3VfzpYlmTJDkD6oXVWdF7XhLalixykQqigdgcrxupQ9aY90zeNDyJwYNzPjcMIcdmHijh6sXVe912xhL32HFKpMxPq1LMM74y4Bgb0zz5RHDlvy9sGBBbzw7TSgm+PJnwJ+rF1VnXYiAJXWsWGULFcWDgKuBRcCcdD76qjGjXn4rGOg2Ctsl9gO/wKyi3nPbGIu7WLHKRiqKJ2BWW58Gxqf6uAUnjF190Oc9K2W70sM2TH2lPwHL7SrK0o4Vq2ymolgwmfmXYfLTzgQSTvI7a9wJ74Y9MiHN1iXCBhyBql5UvcZFOywZjBWrXMKcJp6NEa7zMeVFeqyrPq30hHpE+rL+ensJlGXAsupF1emsoW7JUaxY5TIVxR5MWk+7eJ2Dyb5/P3OhUaThnNITBvSSBc2YKP23MVUHVgIrqxdVZ3TCrCUzsWLV36go9mMEazwwfovfN/KykjETMcGoozDxXXlAIOZLOjylAVPbu/2rHpPmshUjTO84XzurF1XbD5glLVixsvTItF9O82FEywc0Wqe3xQ2sWFkslqzAVl2wWCxZgRUri+vEdDW+UETqROS5Du8XicgKp7Ox17m2RERKndffSGHuUhH5VMz38XQxDojIMhFJtDGDJQWsWFkygdiuxstV9SMd3l8I7HQ6G3fmL0tarDBtqz7V06BYVDUMvAR8IoV5LQlixcqSCbR3Ne6KQZjedrEcAiIiUgnkO6uuJwFE5DqnO/E6EXlYRLwiMktE1otInogUOp2RTwMqgbnO2NvJsS7GuYR1sFtcxelqvE1VR4nIAuBOVb20w5jFwAxV/XIXz2hQ1QHO6ynAD4ErVbVVRB4EXlPVX4nIXZiwjHxgh6re3dWczrNmAjep6o2dvOcF9qhqthctzBrsntviNvF0NZ6BKQ0TDxdiejauEhEwwtS+KvsepoVVC9Cp8MWiqm8AxwmV815ERMIiUqSq8TRWtaSIFSuL23Tb1VhEXsY04TwzzucJptPM1zt5bygwAJM/mYepMpoKQYzwWfoA67OyuEpPXY1V9XxMN6DPd/OYVhFpT+B+CbhKREYAiMgQERnnvPcwpt79k8APnGtHgaJ4bBWRmpjXQ4EDqpoTnayzAStWlkygp67GbwFDunn/EWC9iDypqpuAbwF/FZH1wIvAaBH5DNCqqr/FONVnichCYD3GUf+m42B/HxGZKSKPOa+HcWzaUVZ0Mc4lrIPd4jrtXY0xK6jOHOxXA1ep6tVu2OfYcClwkqre73z/NFCuqv9wy6b+hl1ZWVxHVdcAfwdagdM6BoUC/wcMig0K7WtU9c8xQhUAnrFC1bfYlZXFYskK7MrKYrFkBVasLBZLVmDFymKxZAVWrCwWS1ZgxcpisWQFVqwsFktWYMXKYrFkBVasLBZLVmDFymKxZAVWrCwWS1ZgxcpisWQFVqwsFktWYMXKYrFkBVasLBZLVmDFymKxZAX/H6nU/Gs+WyjPAAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "flattened_hashtags_df.value_counts().head(10).plot(kind=\"pie\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### User mentions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 [{'screen_name': 'i_ameztoy', 'name': 'Iban Am...\n",
+ "1 [{'screen_name': 'IndoPac_Info', 'name': 'Indo...\n",
+ "2 [{'screen_name': 'ZelenskyyUa', 'name': 'Волод...\n",
+ "3 []\n",
+ "4 [{'screen_name': 'ChinaUncensored', 'name': 'C...\n",
+ " ... \n",
+ "7435 [{'screen_name': 'metesohtaoglu', 'name': 'Met...\n",
+ "7436 [{'screen_name': 'NEVERBOW', 'name': 'P K', 'i...\n",
+ "7437 [{'screen_name': 'BBCNews', 'name': 'BBC News ...\n",
+ "7438 []\n",
+ "7439 [{'screen_name': 'Reuters', 'name': 'Reuters',...\n",
+ "Name: user_mentions, Length: 7440, dtype: object"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweets_df[\"user_mentions\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 [{'screen_name': 'i_ameztoy', 'name': 'Iban Am...\n",
+ "1 [{'screen_name': 'IndoPac_Info', 'name': 'Indo...\n",
+ "2 [{'screen_name': 'ZelenskyyUa', 'name': 'Волод...\n",
+ "3 []\n",
+ "4 [{'screen_name': 'ChinaUncensored', 'name': 'C...\n",
+ " ... \n",
+ "7435 [{'screen_name': 'metesohtaoglu', 'name': 'Met...\n",
+ "7436 [{'screen_name': 'NEVERBOW', 'name': 'P K', 'i...\n",
+ "7437 [{'screen_name': 'BBCNews', 'name': 'BBC News ...\n",
+ "7438 []\n",
+ "7439 [{'screen_name': 'Reuters', 'name': 'Reuters',...\n",
+ "Name: user_mentions, Length: 7440, dtype: object"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# take the rows from that have values in the user_mentions columns\n",
+ "user_mentions_list_df = tweets_df.loc[tweets_df[\"user_mentions\"] != \" \"]\n",
+ "user_mentions_list_df = user_mentions_list_df['user_mentions']\n",
+ "user_mentions_list_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " user_mentions \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " [{'screen_name': \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 'i_ameztoy', \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 'name': \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 'Iban \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " Ameztoy', \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user_mentions\n",
+ "0 [{'screen_name':\n",
+ "1 'i_ameztoy',\n",
+ "2 'name':\n",
+ "3 'Iban\n",
+ "4 Ameztoy',"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#create dataframe where each user_mention gets its own row\n",
+ "flattened_user_mentions = []\n",
+ "for user_mentions_list in user_mentions_list_df:\n",
+ " user_mentions_list = user_mentions_list.split(\" \")\n",
+ " for user_mentions in user_mentions_list:\n",
+ " flattened_user_mentions.append(user_mentions)\n",
+ "flattened_user_mentions_df = pd.DataFrame(flattened_user_mentions, columns=['user_mentions'])\n",
+ "flattened_user_mentions_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "user_mentions \n",
+ "'id': 6521\n",
+ "'name': 6521\n",
+ "'id_str': 6521\n",
+ "'indices': 6521\n",
+ "[{'screen_name': 4150\n",
+ " ... \n",
+ "'Scientists 1\n",
+ "'ScottLucas_EA', 1\n",
+ "'ScottishSun', 1\n",
+ "'ScottsPassage', 1\n",
+ "🪙', 1\n",
+ "Length: 15428, dtype: int64"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "flattened_user_mentions_df.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVIAAADnCAYAAABMpd6dAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAouklEQVR4nO3deXxV1bn/8c+TBMKYwzwkIEEEEjEICKKIoHGovdTellurP60Xa1s7xNpaO6S3raZiK2q1Wm2NVVRabbWt4zW9BbVlVgGZDhomMU4gqEAYhADnPL8/9qbGmOGcnGGd4Xm/Xudl3Geftb8J4WHtvddeS1QVY4wx7ZfjOoAxxqQ7K6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBMjK6TGGBOjrCmkItJZRBaIyDARme8wR10E+zwiIsOTEMcYEwdZU0iBy4HHgZDrIBG4G/ih6xDGmMjkuQ6QRJcAF+MV0p0AInIZ8FmgCzAMeEJVf+i/dzcwAegM/E1Vr/O31wF/Bj4NHAGuAG4EjgNuUdVqf78fAF8E8v12r/NzvBdB1kXAgyKSp6pHYvquY1EV6AoMAYr91xCgEOiG9zNr+uoENAD7gX3+q/HX24HXgS3+f9+kqj4d/mEzplVZUUhFpCNwrKrW+ZumN3p7DDAWrwBsEJE7VfUt4CequlNEcoHnRWS0qq71P/Omqo4RkV8DDwKn4RWRdUC1iJwLDAdOBgR4WkSmqOpCVZ3QKNdqVR3TNK+qhkVkM3Ai8HJ8fgqtqAp08I81wc88ChgK9EnwkY9QFXgLr7BuxPteVwCvUFXv7h8QY6KUFYUUryDsbuG951W1HkBEXsXrdb0FfFFErsD7GQ0EjgeOFtKn/f8GgW6quhfYKyINItIDONd/rfL364ZXWBc2PnBzRbSRHXi9v/gX0qrAYGAqXtE8Ge8fk/y4H6dteXgFeyhw1tGNhzV3y/DKmq3AYv+1pG7WtN0O8hkTkWwppAfweozNaWj0dQjIE5GhwPeBCaq6S0QebPL5o58JN/l8GO9nKsCNqnpPDJk7+bljVxUQvIJ5vv8aHZd2E+Rt7bsdmOy/AELFlTVLgCeBp+pmTdviKpsxzcmKQuoXw1wR6aSqByP4SAHetb16EemPdz10fhSHnAvMFJGHVXWfiBQBh1V1R0sfEJE/AHep6jJ/0wi8SwXtUxXogtcrPh+YBvRvd1tJtiQ86lCTTbnAFP91W3FlTRB4Cq+orkh2PmOayopC6puH18N5rq0dVXWNiKwC1uOd5i+J5kCqOk9ESoEXRAS8Gy1fwjtd/7cm10hHA1v97f2BA6r6bjTHBaAqMBbvBtjFeP8gpJ254Qm929ilzH/9tLiy5k3gAeDeulnT3kl4OGOaIarqOkNSiMg44GpVvdR1lqZEpACYraoX+P9/NbBHVWdH1EBVoDte4fwacFKiciaDKodKGh4MN9CxpUsxLQkBzwDVwNy6WdOy4xfbpISsKaQAInI5MEdVU3rIjYh8Gfhjm0OfqgJlwHeAi4CuSYiWcPu006snNNx/fIzNbAHuBWbXzZoWyXAzY2KSVYU0Y1QFJgI/AT6Dd2MrY7wULll44aFrp8SpuQN4DzfcVDdrWovXp42JlRXSdOIV0JnAOa6jJMovDl+y9N7QtElxbnY/8Fvg5rpZ0z6Ic9vGWCFNC1WBE4Eb8HqgGe2MhlvfrtOBgxLU/F7gTuBXdbOm7UrQMUwWskKayqoCPfEeP/0aWTAvQkjlvWEND/dNwqHqgeuBO+pmTUvp6+UmPWT8X860VBUQqgKXARuAr5Mlf07b6P16kg4VAG4FVhRX1kxM0jFNBsuKv6BpxbsTvxBvbGQyemcp46VwaXye5IrcGGBpcWXN3cWVNT2SfGyTQayQpoqqQBeqArcCK/no0cisMjc0IeDgsDnAN4D1xZU1Fzs4vskAdo00FVQFRgOPAKWuo7iiSuiEhtkH9tO5m+Mo/wtcVjdr2k7HOUwasR6pa1WBCuAlsriIAhyk42spUETBm5tgdXFlzWmug5j0YT1SV7w78rOBz7uOkgpWh4ct+tyhmae7ztHIEeBaYJY9bmraYj1SF6oCk4HVWBH9t3+GxqTaE1p5wC+B/yuurOnnOoxJbVZIk60q8E28KfmOcZwkpTwbHl/oOkMLPoV3qn+q6yAmddmpfbJ4kyvfBPzAdZRUo0r90IaHC0BSrVfa2AHgkrpZ055wHcSkHuuRJkNVIB/vrrwV0Wa8R2BzihdR8BdBLK6sudJ1EJN6rJAmWlWgN/A83oqiphkrwiP3uc4QoRzgzuLKmluKK2tSvfCbJLJCmkhVgWHAUrxVRk0L5oYmpNtcqt8H/lRcWdMx1oZEpLOILBCRYSIy3982XkR+08L+dSLS5+jXEbT/iIgMjzWnaZ0V0kTxiuhCvLWXTAtU0fnhE49znaMdLsK7o985xnYuBx7Hm+EfAFVdoapXxdjuUXcDP4xTW6YFVkgToSowBPgn3nLKphWHyKurp1sP1znaqRx4PMae6SV4C/mFgJ0AInKGiDzjf91bROaJyCsich8fn8g7ktn/FwFni0g2rc+WdFZI460qMAiviNrwpgi8poVbXWeI0XnAI8WVNVEXKhHpCByrqnWq+paqTm9mt+uAxao6CniCRr9XqjqhUVurmzuGqoaBzcCJ0eYzkbNCGk9VgQF4N5aOdR0lXSwInxh2nSEOPg/MKa6sifbvUx9gdxv7TAEeAlDVGqDZCakbrUbbnB3Y2VFCWSGNl6pAX7wiatdEozAvNL6/6wxxcjFwT5R38w8A0a6W2h6d/GOZBLFCGg9Vgc7A/wGxrn6ZVVTZv0aHDXOdI46+CtwW6c6qugvIFZHWiulCvCKNiHwa6NlWuyLyBxE5udGmEcC6SHOZ6FkhjY8HSPP15F3YRbfNYXJyXeeIs+8WV9ZURLH/PFqff/bnwBQReQWYDrzZ3E5NrpGOBrb62/sDB1T13SgymShZIY1VVeB/gAtdx0hHq8PH7XadIUFuL66sOSvCfX8LzGi8QVXnq+pn/K8/UNVzVXWUqn5NVYeo6vtNGzl6jVRECoBNqvq2/9bFwD3t/UZMZKyQxqIq8Fm81T1NOzwbPinWMZipKg/4a3FlTZuXLVR1JfAvEYlLz1xV96jqBY027QbmxKNt0zKbtKS9qgKjgBeA7q6jpKuJB+/asZ1emTxF3Vrg1LpZ0z50HcQklvVI26Mq0AtvELUV0XY6ojnvZHgRBe9a5e9dhzCJZ4W0fe4BMuluc9K9of2bvWmSgS4prqz5husQJrGskEarKnAJ8AXXMdLd4vAJh11nSKJfRXK91KQvK6TRqAoUAXe5jpEJ5oYn9HadIYm6Ag+048knkybsDzY69wM9XIdId6o0LA+XZNvUbqcD33EdwiSGFdJIVQW+BZzrOkYm2EvnTYfJi3kuzzT0i+LKGnuEOANZIY1EVeA44BbXMTLFuvDQD1xncKQz8GBxZU2mPc2V9ayQRuYuoIvrEJniufBJ2dgbPepU4LuuQ5j4skLalqrAf+AtyWvi5LnwuGyfq/Xa4sqavq5DmPixQtqaqkAecKvrGJkkpLL9Te1f5DqHYwVAlesQJn6skLbuG0CJ6xCZZKv2qXOdIUVcUVxZY79bGcIKaUuqAj2xXkPcvRA+/qDrDCkiD7jZdQgTH1ZIW3YtkE2DxpNibnh8D9cZUsj5xZU1Z7gOYWJnsz81pyowFNgAdHAdJZOoEjq+4YGGA+TbCIiPrATG182aZn8R05j1SJv3I6yIxt0B8jdbEf2EccB/ug5hYmOFtKmqwEDgMtcxMtF6HbzDdYYU9X3XAUxsrJA28WrHDlcA+a5zZKJ/hsba71vzTiuurJnoOoRpP/vFbqRsTln3C4sGfu/0Y4pWP9O1ywrXeTLNvPB4W1u9Zde4DmDazwrpx30VKNidmzvmx/36jJ8wZNDGewMFSw5DNs2dmRBhZddGHTzUdY4UNr24ssZ+PmnKCqmvbE5ZLnBV420Hc3JG/KZXj9MmFA9+b2bvngv2i+xzFC/t7aDna64zpLhc7Bn8tGWF9COfBoqbeyMkUviXgu5TTxkyKHRl/z4L3svNeS+50dLfsnDJftcZ0sDlxZU1AdchTPSskH7k0jb3EAks6NJlavngooKLCvsv2tyhw+tJyJUR5oXG20KBbesGXNDmXiblWCEFyuaUFQDnR/wBkfxX8vNP/3zRgCGfGlT40oud8tclLl36U0UXhEfbmkWRucR1ABM9K6Se/8KbdDc6IjlbO+RN/NrA/iecdkzR2se7dV2mYE+oNHGIDlv20tVOWSMzpbiyJttnx0o7Vkg9X4q1gT25uaOv69v75PFDBm/5XY/A4kNwKB7BMsFmLdzmOkMayQEuch3CRCfrC2nZnLIi4Ix4tXcoR4bd3TMweULx4J3X9um1YK/Inni1na7mh8dYLz06F7sOYKKT9YUU75c27j+HsMiAJ7p3mzppyCC+3r/vgm25uVnbK5sXGj/AdYY0M87mKk0vVkhhekJbFylY2qXz1HMHF/b+QuGAxbUdO2TVeEpV9q3VoXajKXp2ep9GsrqQls0pCwATknIwkY4b8jtO/mLhgGPPHly4fEHnTmuTclzHdtJ9s5KT1b9n7XSe6wAmctn+C34m3hMlySMi2/PyJlw5oN/oU4cMWvdo924vhiGc1AxJtCo8vN51hjQ1vriypsB1CBOZbC+k57g8+L6cnBNu6NPrlPHFg9+8vWdgUYOQcctwzA2Pj35YmQHvH/iprkOYyGR7IT3bdQCAwyLFs3sETp8wZPDeyr69F9Tn5Ox2nSle/hkae6zrDGnsLNcBTGSytpCWzSkbDIxwnaMxFelb063r1MnHFOVdPqDfwrfzct9xnSkWhzX3zQ8I9HGdI42Vuw5gIpO1hRTHp/WtEum2vHOnKZ8eVNj/c0UDlgY7dtzoOlJ71OmAt11nSHMnFFfW9HMdwrQtmwvpZNcB2iSS91rHjpMuLhow4szBRS8/26XzKteRorEoXHbEdYY0J3g3RE2Ky+ZCeqLrANF4Py/3pO/17zt24pBBtX8s6L40BCHXmdoyNzTeTutjN851ANO2rCyk/iTOx7vO0R4f5uSU3ty756TxxYO33tyrx8IDIh+6ztQcVQ6+rCOGu86RAU5wHcC0LSsLKTAc6OQ6RCyOiAz+Y6BgysQhgw5c06/Pgp05OR+4ztTYHrpsOkKeLWkdOyukaSBbC+lo1wHiRUV6z+vaZerUY4q6XDqw/8K6vLw3XWcCWBs+dqfrDBnimOLKGpsUO8VZIc0UIp1Xd8qfcv6ggUWfGTTwhZfz82tdxnkufJItaR0/1itNcVZIM41I7hsdOpx6WWH/0inHFK2q6drlZRcxng+PG+LiuBlqlOsApnXZWkhLXQdIhl25uWMr+/U5acKQQRtnBwqWHoGkDEcKac62t7XvwGQcK0tYjzTFZWshLXQdIJkO5uSMuL1Xj0njiwdv/0USlpV+W/u8kcj2s5Ctd5/iEl5IRWSyiHzZ/7qviDj9pSibU9Yd6OIygyshkaJHCrpPPXXIoCPf7pe4ZaVfCI9qSES7WcyebkpxCS2kInId8CPgx/6mDsBDiTxmBLJ+tnYV6TG/a5ep5YOLuv+/BCwr/Y/whJ7xbM9YIU11ie6Rfh74LLAfQFW3Aq6HcvR3fPzUIdJpnb+s9HmDCl96qVP+K7E2qcrhF8OlNhA/vqyQprhEF9JDqqr4SxSLSNcEHy8SWd8j/QSRnHc65E386sD+oyYfU7T2yRiWld5Pp80Hybc5SOOrW3Fljf1MU1iiC+lfROQeoIeIfA14Drg3wcdsixXSVtTn5o7+mb+s9N09CqJeVrpWhyTkuquxXmkqS2ghVdVfAX8DHgNGAteq6p2JPGYE7BcyAodyZNjvevaYPKF48AfX9ek1P9JlpZ8Pjc1LdLYs1dd1ANOyhP/Sq+qzwLOJPk4U0voZ+2QLiwx8vHu3gY9367rntAMHF1S9v3PkgFCoxV79s+GTBiUzXxbp5TqAaVmi79pPF5FNIlIvIntEZK9E2LNJoGwdOxsbkYIlXTpPPWdwYa8LCgcsXt/MstJhlQ9e06JjXMTLAh1dBzAtS3RRuRn4rKoGVLVAVburquuVEa2QxkKk4/r8jpMvKBxw7DmDC5cvbLSs9HZ6bnEZLcMld7VbE5VEn9pvV1Wnk2c0wwppPIjIu3l5EyoG9KNbKBy8etfu/Tm7Sw+4jpXB7NpzCkv0H84KEXkUeBL499Muqvp4go/bGiukcbYvN6fs9i49d/6spueq74/o9B7We4q7Q2mwIkI2S3QhLQA+BM5ttE0BK6SZRFVvmR3a0mvfyrMOdx664K3B5bYee5zlt3Ncr0mOhBZSVf1yIttvJyukcfatmvCCXvs4A2D4a49N3dt98ILdPYZbMY2vw64DmJYl+q79IBF5QkR2+K/HRMT18JiDjo+fUcZtCq+ZGtTTG28bu/qO0/MP7lzmKlOGskKawhLdO3sAeBpv2rpC4H/9bS6l1NpG6SywT9/7wWPh/tLkmqigOacsmzkqJ9Sw3lW2DGSFNIUlupD2VdUHVPWI/3oQ909ovO/4+BlBVMO33B96M1ebf+Q2N3yo6ynLZgbQ8LZkZ8tQ9uhtCkt0If1ARL4kIrn+60u47xG6Pn5GuOqp8KIe+zmptX06NewaeNKq2/agmtCJpLPEO64DmJYlupBeDnwReBfYBnwBcH0DynqkMTp5Q3jVpNqPXxdtSWDP6yNL1/+xFlUbvtN++yqqy3e7DmFalui79m/gzUeaSqxHGoOee3XH954ID5Io/hEeuP2lCXu7D17w9qAz7U5++1hvNMUlpJCKyLWtvK2qOjMRx42Q9UjbKSesoVtmh7bmKGOi/eyIzX+buq/bIBsW1T5WSFNcok7t9zfzAvgK3tIjLr2PPSXSLt97Iryo4ED0RfSosavvmJx/cJcNi4re264DmNYlpJCq6q1HX8Dvgc5410YfAY5NxDEjFZwRPAzY5BpRmvRq+OUJG3VKLG0Imjtx+czjc0ING+KVK0tYIU1xCbvZJCK9ROQGYC3eJYRxqvojVd2RqGNG4VXXAdJJ73rddtVT4SHRXBdtSV6oodspy2YW2LCoqNipfYpLSCEVkVuA5cBeoExVq1R1VyKO1U5WSCOUE9Yjt9wfei8H+sSrzU4NuwaOW3VbvQ2Lipj1SFNconqk1+A9yfRTYKs/qXOqTOwMkGpT+6WsH/4tvLjbQUbHu90ee14vKd3w0Ks2LCoidikkxSXqGmmOqnY+OpFzo1cqTOwM1iONyNRgePnY1zRhd9kHvvviyYPeWbA4Ue1niJ3ARtchTOuydSakWmxaslb1263vfOuZ8HECksjjjNj816k9dm9akMhjpLllFdXl9rua4rKykAZnBD8E6lznSFW5IT180/2hXQI9k3E8f1jU8mQcKw296DqAaVtWFlKf/YK24H8eDS/t2sAJyTqePyyq1IZFNesF1wFM27K5kNq1uWactSr8Utkbibsu2pJGw6LeTfaxU5gCL7kOYdoWUSEVkc4iskBEhonIfH/beBH5TQv714lIVMNlROS7ItKllffni0hxG21cKSKXR3jIRdHkywYDdupbV/wjXOLq+P6wqN2o7m9776ywvqK6vN51CNO2SHukl+Ots/TvoSqqukJVr4pjlu8CzRZSEYl0MbX7gW9HuO86bAKTf8sL6aGbHgjtFQi4zNFjz+slJRseegXVsMscKcIuP6WJSAvpJcBTeIV0J4CInCEiz/hf9xaReSLyiojcRyt3ekWkq4jUiMgaEVknIheKyFV4407/JSL/8vfbJyK3isga4FT/uK2OOVTVD4E6ETm5rW8oOCOowL8i+N6zws/+FHqh8yGOd50DoPDdF08uemeBnTFYIU0bbRZSEekIHKuqdar6lqpOb2a364DFqjoKeAI4ppUmzwO2quqJqnoC8A9V/Q2wFThTVc/09+sKvOTvt1hVp6vqW36m+0RkfAvtrwAimisTeC7C/TLaeSvCL5S+TUrNyjRy81+nBnZvzvZhUfNdBzCRiaRH2gfY3cY+U4CHAFS1BmjtcdAgcI6I3CQip6tqS9eAQsBjzb2hql9V1RUtfG4HXu82Es9GuF/GKnpf3/jys+FRrnM0Z9zq2yfnN+zO1mFRayuqy20gfpqIpJAeADrF64CquhEYh1dQb2hl7tKD2r7HBzvhZW5TcEZwi58jK3U4ogdvfDB0UCAVnjb7BEFzJy67PluHRf3FdQATuTYLqT/ZSK6ItFZMFwIXA4jIp2llILeIFAIfqupDwC14RRW8CU66R5i7cXtXisiVjTaNwLuRFKlHoj1mpqh6OLSs02FGus7RmrxQQ7eJy27IxmFRVkjTSKQ3m+YBk1t5/+fAFBF5BZgOvNnKvmXAMhFZjXdt9QZ/+++Bfxy92dSaJtdIS/j43ffTiO6UPSsL6fkvhpcM30pM84smS+eGnQPHrfr1riwaFrW6orp8k+sQJnKi2vZjvCIyDrhaVS9NfKTo+CMHpqvqIREZC3wv2pxlc8qWARMSEjAFDd6hr/9qdqivQDfXWaKxdcCpy9aPvGQ8Ipn+IMlPKqrLf+k6hIlcRL+QqroSb2hSpOM5k0ZVP6Oqh/z/7QP8rB3NZE2vtONhPfDLOaEj6VZEAQrffeHkoq0Ls2FYlJ3Wp5mIeqTtalikN/B8M2+dpaopNRC+bE5ZEfAWCZ7pKBXcdP+RxUO3t3qZJuW9PPZ7C+sDw9LiskQ7rKqoLh/X9m4mlSTsFElVP1DVMc28UqqIAgRnBN8hCx4Znb4knPZFFGDcql+flsHDoqw3moYy/VpTNKpdB0ikoe/q5gsXhjOip+MPiyrJCR3KtHGWR4CHXYcw0bNC+pG/kqFr4+Qf0v0z/xASaWEug3SUF2roPnHZzO4ZNizq8Yrq8rdchzDRs0LqC84IHgHucp0jEX45J7S6Y4hhrnPEWwYOi/q16wCmfayQftzvgUz5SwnAhQtCiwa/z2mucyRKjz1bSks2PJwJs0W9VFFdbpOUpCkrpI0EZwR3AXNc54iX497RDdOXasaPj/WGRS1K95uFt0WyU6O5gc8SkXoR+XuT97uLyBIRWX10uGIkc/lmGxG5TESq2tinr4j8I5L2rJB+0u1kwMJ4nRt0788fCuVLHOdJSGUjNz06NVD/2kLXOdppI/C3CPdtPDfwIlX9jybvlwPv+CNk4rLUtYjkxaOddKOq7wHbRKTNMzorpE0EZwQ3AU+7zhGrGx8MBTuEKXadI5nGrfr1aR0bdrc0K1gqu7GiujzSSxNH5wZuSQ+8GdAa2wmERCRXRB705wEOisjVACJynIg8588RvNJfCeMMEVkkIk8Dr/qfvUVElovIWhH5+tHGReQHjbb/3N9WLCK1InKvP0/xPBHp3FJov9d8k4gsE5GNInJ6o3YW+blWisgkf/sZfs/8KRHZIiKzROQS//NBERnm79dXRB7z8y1vVBQPAPsi+Hk/6f/MW2WFtHk/oY1JpFPZl54PLSzcySTXOZJN0NxTll0/Ms2GRb2BPwVlWxrPDdzKbrnAx4pyo7l8xwBFqnqCqpYBD/i7PAz8VlVPBCYB2/zt44DvqOoI4CtAvapOwHuc+msiMlREzgWGAyf77Z8kIkcflhjutzsKbyrO/2rjW8xT1ZPxVsu4zt+2AzhHVccBFwKNlzc6EfgGUApcCozwP38fH62UcQfwaz/3f/nvoaqPquqvAETksyJyfQuZIprf2AppM4Izgq+QptdKR76ltecv04muc7iSF2roPnH5zG5oeLvrLBGaVVFdfiTCfSOZG3gMLQ/j2wIcKyJ3ish5wB4R6Y5XXJ8AUNWD/koTAMtU9XX/63OB//YnG3oJ6I1XKM/1X6uAlXiTCA33P/O6qq72v34Z2jxDeryZfTsA94pIEG+IYuNVHJar6jZVbQBew5tcCbypMY9+/mzgLj/300CBiHzs8WhVfVpVW5rOM6L5ja2QtuxnwIdt7pVCuhzU+uv+FOoqkO86i0udD+4sHLf69p18VBBS1Vrg3ij2b3VuYBFZDFwE/Km59/0pMU/Em3n/G/i9s1Y0HsEiwLcbPaE4VFXn+dtvbLT9OFWd7X+modHnQ0Bb11qP7t9436uB7X7u8UDHZvYHrxfe0Ojro5/PAU5plK9IVSM5pT8qovmNrZC2IDgjuJU0G9d30wOh2rxwq8u8ZI0e9a+Vlmz8UzDFh0VdWVFdHvElpLbmBlbVycBs4Irm3hdvZd8cVX0M+CkwTlX3Am+LyOf8ffKl+dV85wLfFJEO/n4jRKSrv/3yo708ESkSkX6Rfk8RCADb1PtzvBTv0kU05tFoQUwRGdPazn7+xnOERDS/sRXS1t0EvOc6RCS+PC+0oP9uTnGdI5UUbls6sXDr4lQdFvVQRXV5e7K1NTfwBqBXC+8VAfP909yHgB/72y8FrhKRtcBSYEAzn70PeBVYKSLrgHvwrmnOw+sBv+Cffv+NdkzQ3orfATPEWwSzhOjHeV8FjPdvhL2K1xP/mCbXSAfiPap71JlATVsHSdjsT5mibE5ZBSn+xNOoN8KvXPun8AjxrieZJlaMvWbhnsCxqTRb1B5gZEV1edSPtx6dGxiv5/l9Vf1Mk/e/CHxBVb8Yl6RZxl9t401Vfdr//4XAf/pnAy2yHmnb7sG7iJ6Suh3Q3T99JBywItqyk1bdlmrDoq5rTxGFj+YGBg4DJzQdkI+3Mm6PxgPyTeRU9a5GRbQvcFtbRRSsRxqRsjllo/GGQaRWsVLV3/0utLzPHk52HSXVHcnN37t40qxt4dyOIxxHWQeMjeJOfUYRkd/CJx5ZvkNVH2hu/3RhhTRCZXPKrgOqXOdo7Iq/hxacvUZTaj36VHagU6+tL0z8eS6S099hjKkV1eXp+gSWaYGd2kful8Aa1yGOGr0lHDxrjWbsZCSJkALDoh6yIpqZrJBGKDgjeBi4jI/f0XOiYL9+8OO/hPtI2+PyTBM96l8rHbnxzy6GRb0GXNnmXiYtWSGNQnBGcDVwo9MQqnrz/aHXc5WBTnOksaJtS5I9LKoBuKCiurw+icc0SWSFNHozgRdcHbzimfDCXvsY7+r4maJk0yNTC+pfT9Zp9tUV1eWrknQs44AV0ij5p/hfAJK+xMVJm8Krp6zTtF+8LlWMW33bpI4N9YkeFvVIRXX53Qk+hnHMCmk7+I+PXoA3li8pAvv0vR88Fh4o0T8iZ1qQo+G8U5ZdPyIndGhTgg6xkRYe1zSZxQppOwVnBBcD1yTjWKIavmV26K0cxeWwnYyUFzpYMHH5DV3QcNM5PGN1AO+66N44t2tSkBXSGARnBO8E/pjo43znyfCiHh+SEUspp6LOBz8oGrv6jvfjPCzqqorq8rVxbM+kMCuksfs63lyMCTFxfXjlqeu1zYllTWx61m8+Po7Dou6sqC5va4o6k0GskMYoOCN4AJiGN04wrnru1R1XPxkeLPbnlBRF25ZMLNy2JNZhUXOA78Qjj0kf9hc0DoIzgtvwZuJ+J15t5oQ1dMvs0NYcpW+82jRtK9n456kFe9o9LOoJ4CsV1eX23HWWsUIaJ8EZwTrgHOD9eLR3zePhRQUHGBOPtkx0xq1q17CoZ4GLopmo2WQOK6RxFJwRrAXOw5tvst1OeyW8Yvwmm4zElXYMi1oKfK6iuvxQInOZ1GWFNM6CM4IvA58hgnVemtOnXrd9++nwUPHWwjGORDEsajUwraK6PNXXhzIJZIU0AYIzgouAzxLZutn/lhPWIzfPDr2X463QaBzzhkX9prVhURuAT1VUl+9OYiyTgqyQJkhwRvA54Ay85Vwj8qO/hhd3a2B0wkKZqPWs33T8iE2Prm1mWNQq4IyK6vJ4D+Q3acgKaQL5p/mTiGBo1Blrw8vGbLHroqlo0NZFpxRuW9p4WNSzeBM0J32+BZOabIb8JCibU9YP+DtwUnPv99ut79x5d6iLQM/kJjPRWD7uB4v2FhTX4Q1xSto8Cyb1WSFNkrI5Zd2Ax4BzG2/PDenh2beHNnY5xCg3yUyENCy5VaNq113f9q4m29ipfZIEZwT34d3N/9iUaj95NLzUimjKOwBcZEXUtMR6pA6UzSm7FLjn7JXhNVfMDZ/iOo9pVR1wQen62lRaztmkGOuROhCcEfwjcMpXng2n1vLOpqk/AydaETVtsR6pQ7Ulpd2A3wL/7TqL+Zi9wJWl62v/4DqISQ9WSFNAbUnpl4C7gIDrLIZlwMWl62vjPpuXyVx2ap8CStfXPgSUAn91nSWLhfBWiD3NiqiJlvVIU0xtSek04HfAMa6zZJHFeKfya1wHMenJCmkKqi0p7QpcjzdBsC12lzjvAj8sXV+b8OViTGazQprCaktKx+FdOz3VdZYMcwTv53pd6framKY8NAaskKYF/3R/JjDWdZYMMA+4pnR97TrXQUzmsEKaJmpLSgWYjnfKf7zjOOnoGWBm6fraZa6DmMxjhTTN1JaU5gAXA9cBxzmOk+oUbx2lG0rX1yZspVdjrJCmqdqS0lzgfOBbeAvv2Yz6HzmCN5TsF6Xra19xHcZkPiukGaC2pHQ48E3gMrJ7Kr4NwGzgD6Xra7e7DmOyhxXSDFJbUtoZuAj4OjDRcZxk2Q/8BZhdur52ieswJjtZIc1QtSWlxwCfw7tBNZnMGo96APgn8CTwaOn62r1u45hsZ4U0C9SWlPbBW4xvOt711Hy3idqlDqjxX/8qXV970G0cYz5ihTTL1JaUdgFOxltLahLeYP9eTkM1721gBd6a8TWl62tfdZzHmBZZIc1y/vjUEj4qqqV4w6r6JTHGdryiudz/7wq7WWTSiRVS0yx/rtTjGr2GAQOAgmZeHZtpIox3I2g/3vye7wJvNnq9dfTr0vW19Yn8XoxJNCukJma1JaX5QBe8qeiOAEdK19cecpvKmOSxQmqMMTGyiZ2NMSZGVkiNMSZGVkiNMSZGVkhNQolIZxFZICLDRGS+v228iPwmynaqROT7/tfXi8jZccxYF8E+vxKR8ngd02SWPNcBTMa7HHgc744+AKq6Am+8aLuo6rVxyBWtO4F78R5NNeZjrEdqEu0S4Cm8QroTQETOEJFn/K+rROR+EZkvIltE5KqjHxSRn4jIRhFZDIxstP1BEfmC//UEEVkqImtEZJmIdBeRXBG5RUSWi8haEfm6v+9AEVkoIqtFZJ2InO43+V5b34SqvgH0FpEB8fmxmExiPVKTMCLSEThWVev8TdNb2LUEOBPoDmwQkbuB0XgzWY3B+z1dCbzcTPuPAheq6nIRKcCb0OQrQL2qThCRfGCJiMzzjz9XVX8hIrl4Y19R1QmN2vw78FVV3dpMzpXAacBjUf0gTMazQmoSqQ+wO4L9alS1AWgQkR1Af+B04AlV/RBARJ5u5nMjgW2quhxAVff4+54LjD7aawUCwHC8R1DvF5EOwJOqurppg6r6H63k3AEURvD9mCxjhdQk0gGgUwT7NTT6OkTsv5cCfFtV537iDZEpwDTgQRG5TVX/EEW7nfC+J2M+xq6RmoRR1V1ArohEUkybWgh8zr/r3x1vWZWmNgADRWQCgH99NA+YC3zT73kiIiNEpKuIDAG2q+q9wH3AuNYCiMiNIvL5RptGALb6qPkEK6Qm0ebhTSwdFVVdiXf9cw3wf3in5U33OQRcCNwpImuAZ/F6jfcBrwIrRWQdcA9eL/cMYI2IrPI/d0fTNkXk7yJy9PS9DG+yFfyifBwxjDYwmcuetTcJJSLjgKtV9VLXWaIlInNV9VP+158HxqnqzxzHMinIeqQmofye5b/8u+Rp5WgR9eUBt7rKYlKb9UiNMSZG1iM1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgYWSE1xpgY/X94EM/IlO9NngAAAABJRU5ErkJggg==",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "flattened_user_mentions_df.value_counts().head(5).plot(kind=\"pie\");"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.10.5 64-bit",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.5"
+ },
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "a265634967a27dd555e8346f2355ee703e655fd7f0a0d20c168527cd0a3d5707"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/pre_process.ipynb b/notebooks/pre_process.ipynb
new file mode 100644
index 0000000..1d54880
--- /dev/null
+++ b/notebooks/pre_process.ipynb
@@ -0,0 +1,1198 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "import pandas as pd\n",
+ "import sys"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import custom libraries and scripts\n",
+ "# sys.path.append(os.path.abspath(os.path.join(\"../..\")))\n",
+ "sys.path.append(\".\")\n",
+ "sys.path.append(\"..\")\n",
+ "\n",
+ "from defaults import *\n",
+ "from extract_dataframe import read_json\n",
+ "from extract_dataframe import TweetDfExtractor\n",
+ "from clean_tweets_dataframe import Clean_Tweets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " created_at \n",
+ " source \n",
+ " original_text \n",
+ " polarity \n",
+ " subjectivity \n",
+ " lang \n",
+ " favorite_count \n",
+ " status_count \n",
+ " retweet_count \n",
+ " screen_name \n",
+ " original_author \n",
+ " followers_count \n",
+ " friends_count \n",
+ " possibly_sensitive \n",
+ " hashtags \n",
+ " user_mentions \n",
+ " place \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2022-08-07 22:31:20+00:00 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " RT @i_ameztoy: Extra random image (I):\\n\\nLets... \n",
+ " -1.250000e-01 \n",
+ " 0.190625 \n",
+ " en \n",
+ " 4 \n",
+ " 8097 \n",
+ " 2 \n",
+ " i_ameztoy \n",
+ " i_ameztoy \n",
+ " 20497 \n",
+ " 2621 \n",
+ " NaN \n",
+ " [{'text': 'City', 'indices': [132, 137]}] \n",
+ " [{'screen_name': 'i_ameztoy', 'name': 'Iban Am... \n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2022-08-07 22:31:16+00:00 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " RT @IndoPac_Info: #China's media explains the ... \n",
+ " -1.000000e-01 \n",
+ " 0.100000 \n",
+ " en \n",
+ " 691 \n",
+ " 5831 \n",
+ " 201 \n",
+ " ZIisq \n",
+ " ZIisq \n",
+ " 65 \n",
+ " 272 \n",
+ " NaN \n",
+ " [{'text': 'China', 'indices': [18, 24]}, {'tex... \n",
+ " [{'screen_name': 'IndoPac_Info', 'name': 'Indo... \n",
+ " \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2022-08-07 22:31:07+00:00 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " China even cut off communication, they don't a... \n",
+ " 0.000000e+00 \n",
+ " 0.000000 \n",
+ " en \n",
+ " 0 \n",
+ " 1627 \n",
+ " 0 \n",
+ " Fin21Free \n",
+ " Fin21Free \n",
+ " 85 \n",
+ " 392 \n",
+ " NaN \n",
+ " [{'text': 'XiJinping', 'indices': [127, 137]}] \n",
+ " [{'screen_name': 'ZelenskyyUa', 'name': 'Волод... \n",
+ " Netherlands \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2022-08-07 22:31:06+00:00 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " Putin to #XiJinping : I told you my friend, Ta... \n",
+ " 1.000000e-01 \n",
+ " 0.350000 \n",
+ " en \n",
+ " 0 \n",
+ " 1627 \n",
+ " 0 \n",
+ " Fin21Free \n",
+ " Fin21Free \n",
+ " 85 \n",
+ " 392 \n",
+ " NaN \n",
+ " [{'text': 'XiJinping', 'indices': [9, 19]}] \n",
+ " [] \n",
+ " Netherlands \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2022-08-07 22:31:04+00:00 \n",
+ " <a href=\"http://twitter.com/download/iphone\" r... \n",
+ " RT @ChinaUncensored: I’m sorry, I thought Taiw... \n",
+ " -6.938894e-18 \n",
+ " 0.556250 \n",
+ " en \n",
+ " 1521 \n",
+ " 18958 \n",
+ " 381 \n",
+ " VizziniDolores \n",
+ " VizziniDolores \n",
+ " 910 \n",
+ " 2608 \n",
+ " NaN \n",
+ " [] \n",
+ " [{'screen_name': 'ChinaUncensored', 'name': 'C... \n",
+ " Ayent, Schweiz \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2022-08-07 22:31:02+00:00 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " RT @benedictrogers: We must not let this happe... \n",
+ " 2.000000e-01 \n",
+ " 0.500000 \n",
+ " en \n",
+ " 116 \n",
+ " 48483 \n",
+ " 36 \n",
+ " GraceCh15554845 \n",
+ " GraceCh15554845 \n",
+ " 207 \n",
+ " 54 \n",
+ " 0.0 \n",
+ " [{'text': 'Taiwan', 'indices': [84, 91]}] \n",
+ " [{'screen_name': 'benedictrogers', 'name': 'Be... \n",
+ " Melbourne, Victoria \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2022-08-07 22:30:59+00:00 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " RT @TGTM_Official: What kind of country can co... \n",
+ " 1.583333e-01 \n",
+ " 0.800000 \n",
+ " en \n",
+ " 1106 \n",
+ " 4173 \n",
+ " 411 \n",
+ " Philipkuma1 \n",
+ " Philipkuma1 \n",
+ " 12 \n",
+ " 264 \n",
+ " NaN \n",
+ " [{'text': 'Taiwan', 'indices': [101, 108]}, {'... \n",
+ " [{'screen_name': 'TGTM_Official', 'name': 'The... \n",
+ " \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2022-08-07 22:30:59+00:00 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " RT @ChinaInfo777: #PinkFloyd singer Roger Wate... \n",
+ " 0.000000e+00 \n",
+ " 0.000000 \n",
+ " en \n",
+ " 10 \n",
+ " 24102 \n",
+ " 5 \n",
+ " nhohn2011 \n",
+ " nhohn2011 \n",
+ " 870 \n",
+ " 508 \n",
+ " NaN \n",
+ " [{'text': 'PinkFloyd', 'indices': [18, 28]}, {... \n",
+ " [{'screen_name': 'ChinaInfo777', 'name': 'Chin... \n",
+ " Florida, USA \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2022-08-07 22:30:50+00:00 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " RT @AmbQinGang: China's SC&FM Wang Yi elab... \n",
+ " 0.000000e+00 \n",
+ " 0.000000 \n",
+ " en \n",
+ " 1221 \n",
+ " 630 \n",
+ " 239 \n",
+ " ClaudioColomaRI \n",
+ " ClaudioColomaRI \n",
+ " 127 \n",
+ " 263 \n",
+ " NaN \n",
+ " [{'text': 'Taiwan', 'indices': [80, 87]}] \n",
+ " [{'screen_name': 'AmbQinGang', 'name': 'Qin Ga... \n",
+ " El mundo periférico \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2022-08-07 22:30:45+00:00 \n",
+ " <a href=\"https://mobile.twitter.com\" rel=\"nofo... \n",
+ " RT @CGMeifangZhang: Chinese ambassador to the ... \n",
+ " 2.000000e-01 \n",
+ " 0.375000 \n",
+ " en \n",
+ " 49 \n",
+ " 107188 \n",
+ " 25 \n",
+ " jmarzola1 \n",
+ " jmarzola1 \n",
+ " 213 \n",
+ " 877 \n",
+ " NaN \n",
+ " [{'text': 'USA', 'indices': [66, 70]}, {'text'... \n",
+ " [{'screen_name': 'CGMeifangZhang', 'name': 'Zh... \n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " created_at \\\n",
+ "0 2022-08-07 22:31:20+00:00 \n",
+ "1 2022-08-07 22:31:16+00:00 \n",
+ "2 2022-08-07 22:31:07+00:00 \n",
+ "3 2022-08-07 22:31:06+00:00 \n",
+ "4 2022-08-07 22:31:04+00:00 \n",
+ "5 2022-08-07 22:31:02+00:00 \n",
+ "6 2022-08-07 22:30:59+00:00 \n",
+ "7 2022-08-07 22:30:59+00:00 \n",
+ "8 2022-08-07 22:30:50+00:00 \n",
+ "9 2022-08-07 22:30:45+00:00 \n",
+ "\n",
+ " source \\\n",
+ "0 \n",
+ "Int64Index: 22000 entries, 0 to 21999\n",
+ "Data columns (total 17 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 created_at 22000 non-null datetime64[ns, UTC]\n",
+ " 1 source 22000 non-null object \n",
+ " 2 original_text 22000 non-null object \n",
+ " 3 polarity 22000 non-null float64 \n",
+ " 4 subjectivity 22000 non-null float64 \n",
+ " 5 lang 22000 non-null object \n",
+ " 6 favorite_count 22000 non-null int64 \n",
+ " 7 status_count 22000 non-null int64 \n",
+ " 8 retweet_count 22000 non-null int64 \n",
+ " 9 screen_name 22000 non-null object \n",
+ " 10 original_author 22000 non-null object \n",
+ " 11 followers_count 22000 non-null int64 \n",
+ " 12 friends_count 22000 non-null int64 \n",
+ " 13 possibly_sensitive 6191 non-null float64 \n",
+ " 14 hashtags 22000 non-null object \n",
+ " 15 user_mentions 22000 non-null object \n",
+ " 16 place 22000 non-null object \n",
+ "dtypes: datetime64[ns, UTC](1), float64(3), int64(5), object(8)\n",
+ "memory usage: 3.0+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "global_data.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " polarity \n",
+ " subjectivity \n",
+ " favorite_count \n",
+ " status_count \n",
+ " retweet_count \n",
+ " followers_count \n",
+ " friends_count \n",
+ " possibly_sensitive \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " count \n",
+ " 22000.000000 \n",
+ " 22000.000000 \n",
+ " 22000.000000 \n",
+ " 2.200000e+04 \n",
+ " 22000.000000 \n",
+ " 2.200000e+04 \n",
+ " 22000.000000 \n",
+ " 6191.000000 \n",
+ " \n",
+ " \n",
+ " mean \n",
+ " 0.061325 \n",
+ " 0.283839 \n",
+ " 852.137318 \n",
+ " 5.446036e+04 \n",
+ " 176.750182 \n",
+ " 1.796764e+04 \n",
+ " 1563.114455 \n",
+ " 0.037151 \n",
+ " \n",
+ " \n",
+ " std \n",
+ " 0.223701 \n",
+ " 0.290963 \n",
+ " 3106.077645 \n",
+ " 1.454120e+05 \n",
+ " 498.435765 \n",
+ " 3.030478e+05 \n",
+ " 4358.651264 \n",
+ " 0.189146 \n",
+ " \n",
+ " \n",
+ " min \n",
+ " -1.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 1.000000e+00 \n",
+ " 0.000000 \n",
+ " 0.000000e+00 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " \n",
+ " \n",
+ " 25% \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 2.000000 \n",
+ " 2.105750e+03 \n",
+ " 2.000000 \n",
+ " 5.700000e+01 \n",
+ " 137.000000 \n",
+ " 0.000000 \n",
+ " \n",
+ " \n",
+ " 50% \n",
+ " 0.000000 \n",
+ " 0.200000 \n",
+ " 115.000000 \n",
+ " 1.038750e+04 \n",
+ " 38.000000 \n",
+ " 2.840000e+02 \n",
+ " 487.000000 \n",
+ " 0.000000 \n",
+ " \n",
+ " \n",
+ " 75% \n",
+ " 0.133333 \n",
+ " 0.468824 \n",
+ " 655.000000 \n",
+ " 4.526150e+04 \n",
+ " 187.000000 \n",
+ " 1.324500e+03 \n",
+ " 1599.000000 \n",
+ " 0.000000 \n",
+ " \n",
+ " \n",
+ " max \n",
+ " 1.000000 \n",
+ " 1.000000 \n",
+ " 65170.000000 \n",
+ " 4.108317e+06 \n",
+ " 17409.000000 \n",
+ " 1.449852e+07 \n",
+ " 208360.000000 \n",
+ " 1.000000 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " polarity subjectivity favorite_count status_count \\\n",
+ "count 22000.000000 22000.000000 22000.000000 2.200000e+04 \n",
+ "mean 0.061325 0.283839 852.137318 5.446036e+04 \n",
+ "std 0.223701 0.290963 3106.077645 1.454120e+05 \n",
+ "min -1.000000 0.000000 0.000000 1.000000e+00 \n",
+ "25% 0.000000 0.000000 2.000000 2.105750e+03 \n",
+ "50% 0.000000 0.200000 115.000000 1.038750e+04 \n",
+ "75% 0.133333 0.468824 655.000000 4.526150e+04 \n",
+ "max 1.000000 1.000000 65170.000000 4.108317e+06 \n",
+ "\n",
+ " retweet_count followers_count friends_count possibly_sensitive \n",
+ "count 22000.000000 2.200000e+04 22000.000000 6191.000000 \n",
+ "mean 176.750182 1.796764e+04 1563.114455 0.037151 \n",
+ "std 498.435765 3.030478e+05 4358.651264 0.189146 \n",
+ "min 0.000000 0.000000e+00 0.000000 0.000000 \n",
+ "25% 2.000000 5.700000e+01 137.000000 0.000000 \n",
+ "50% 38.000000 2.840000e+02 487.000000 0.000000 \n",
+ "75% 187.000000 1.324500e+03 1599.000000 0.000000 \n",
+ "max 17409.000000 1.449852e+07 208360.000000 1.000000 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "global_data.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## EDA"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Remove duplicated rows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Automation in Action...!!!\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(22000, 17)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean_tweets = Clean_Tweets(global_data)\n",
+ "clean_tweets.df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(22000, 17)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean_tweets.df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(7440, 17)"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean_tweets = clean_tweets.drop_duplicate(global_data)\n",
+ "clean_tweets.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As we can see there were too many duplicates"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Remove tweets that are not english"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "en 7440\n",
+ "Name: lang, dtype: int64"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean_tweets.lang.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "All tweets are in English"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Missing values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Int64Index: 7440 entries, 0 to 21997\n",
+ "Data columns (total 17 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 created_at 7440 non-null datetime64[ns, UTC]\n",
+ " 1 source 7440 non-null object \n",
+ " 2 original_text 7440 non-null object \n",
+ " 3 polarity 7440 non-null float64 \n",
+ " 4 subjectivity 7440 non-null float64 \n",
+ " 5 lang 7440 non-null object \n",
+ " 6 favorite_count 7440 non-null int64 \n",
+ " 7 status_count 7440 non-null int64 \n",
+ " 8 retweet_count 7440 non-null int64 \n",
+ " 9 screen_name 7440 non-null object \n",
+ " 10 original_author 7440 non-null object \n",
+ " 11 followers_count 7440 non-null int64 \n",
+ " 12 friends_count 7440 non-null int64 \n",
+ " 13 possibly_sensitive 3977 non-null float64 \n",
+ " 14 hashtags 7440 non-null object \n",
+ " 15 user_mentions 7440 non-null object \n",
+ " 16 place 7440 non-null object \n",
+ "dtypes: datetime64[ns, UTC](1), float64(3), int64(5), object(8)\n",
+ "memory usage: 1.0+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "clean_tweets.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Only the feature possibility sensitive have a missing value"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.0 3866\n",
+ "1.0 111\n",
+ "Name: possibly_sensitive, dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean_tweets.possibly_sensitive.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As we can see there are 3,866 not sensitive and 111 sensitive tweets\n",
+ "And only 3,988 out of 77,000 tweets are recorded for sensitivity"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Handling missing values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "created_at 0\n",
+ "source 0\n",
+ "original_text 0\n",
+ "polarity 0\n",
+ "subjectivity 0\n",
+ "lang 0\n",
+ "favorite_count 0\n",
+ "status_count 0\n",
+ "retweet_count 0\n",
+ "screen_name 0\n",
+ "original_author 0\n",
+ "followers_count 0\n",
+ "friends_count 0\n",
+ "possibly_sensitive 0\n",
+ "hashtags 0\n",
+ "user_mentions 0\n",
+ "place 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean_tweets = Clean_Tweets.fill_missing(clean_tweets, df = clean_tweets, column=\"possibly_sensitive\", value = \"unknown\")\n",
+ "clean_tweets.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['created_at', 'source', 'original_text', 'polarity', 'subjectivity',\n",
+ " 'lang', 'favorite_count', 'status_count', 'retweet_count',\n",
+ " 'screen_name', 'original_author', 'followers_count', 'friends_count',\n",
+ " 'possibly_sensitive', 'hashtags', 'user_mentions', 'place'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean_tweets.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " retweet_count \n",
+ " source \n",
+ " original_text \n",
+ " hashtags \n",
+ " place \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " RT @i_ameztoy: Extra random image (I):\\n\\nLets... \n",
+ " [{'text': 'City', 'indices': [132, 137]}] \n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 201 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " RT @IndoPac_Info: #China's media explains the ... \n",
+ " [{'text': 'China', 'indices': [18, 24]}, {'tex... \n",
+ " \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 0 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " China even cut off communication, they don't a... \n",
+ " [{'text': 'XiJinping', 'indices': [127, 137]}] \n",
+ " Netherlands \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 0 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " Putin to #XiJinping : I told you my friend, Ta... \n",
+ " [{'text': 'XiJinping', 'indices': [9, 19]}] \n",
+ " Netherlands \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 381 \n",
+ " <a href=\"http://twitter.com/download/iphone\" r... \n",
+ " RT @ChinaUncensored: I’m sorry, I thought Taiw... \n",
+ " [] \n",
+ " Ayent, Schweiz \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 21974 \n",
+ " 3 \n",
+ " <a href=\"https://mobile.twitter.com\" rel=\"nofo... \n",
+ " RT @metesohtaoglu: 📌📸 Map of #China's possible... \n",
+ " [{'text': 'China', 'indices': [29, 35]}, {'tex... \n",
+ " Seattle, WA \n",
+ " \n",
+ " \n",
+ " 21987 \n",
+ " 1 \n",
+ " <a href=\"http://twitter.com/download/iphone\" r... \n",
+ " RT @NEVERBOW: China is doing #exactly what #Ru... \n",
+ " [{'text': 'exactly', 'indices': [29, 37]}, {'t... \n",
+ " \n",
+ " \n",
+ " \n",
+ " 21989 \n",
+ " 0 \n",
+ " <a href=\"http://twitter.com/download/iphone\" r... \n",
+ " Minister Wu is crystal clear in his @BBCNews i... \n",
+ " [{'text': 'Taiwan', 'indices': [168, 175]}, {'... \n",
+ " Toronto, Canada \n",
+ " \n",
+ " \n",
+ " 21991 \n",
+ " 0 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " Reports say that #China is planning to seize #... \n",
+ " [{'text': 'China', 'indices': [17, 23]}, {'tex... \n",
+ " \n",
+ " \n",
+ " \n",
+ " 21997 \n",
+ " 0 \n",
+ " <a href=\"http://twitter.com/download/android\" ... \n",
+ " @Reuters Thanks #Pelosi smart move. \n",
+ " [{'text': 'Pelosi', 'indices': [16, 23]}] \n",
+ " 🇺🇲🇷🇺🇺🇦🇫🇷🇦🇪🇮🇱🏳️🌈 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
7440 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " retweet_count source \\\n",
+ "0 2 \n",
+ "Int64Index: 7440 entries, 0 to 21997\n",
+ "Data columns (total 17 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 created_at 7440 non-null datetime64[ns, UTC]\n",
+ " 1 source 7440 non-null object \n",
+ " 2 original_text 7440 non-null object \n",
+ " 3 polarity 7440 non-null float64 \n",
+ " 4 subjectivity 7440 non-null float64 \n",
+ " 5 lang 7440 non-null object \n",
+ " 6 favorite_count 7440 non-null int64 \n",
+ " 7 status_count 7440 non-null int64 \n",
+ " 8 retweet_count 7440 non-null int64 \n",
+ " 9 screen_name 7440 non-null object \n",
+ " 10 original_author 7440 non-null object \n",
+ " 11 followers_count 7440 non-null int64 \n",
+ " 12 friends_count 7440 non-null int64 \n",
+ " 13 possibly_sensitive 7440 non-null object \n",
+ " 14 hashtags 7440 non-null object \n",
+ " 15 user_mentions 7440 non-null object \n",
+ " 16 place 7440 non-null object \n",
+ "dtypes: datetime64[ns, UTC](1), float64(2), int64(5), object(9)\n",
+ "memory usage: 1.0+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "clean_tweets['created_at'] = pd.to_datetime(clean_tweets['created_at'])\n",
+ "clean_tweets.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### extract source of tweets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#clean_tweets[\"source\"] = clean_tweets[\"source\"].apply(Clean_Tweets.extract_device_name(self = clean_tweets, source='source'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### save current dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "clean data saved successfully\n"
+ ]
+ }
+ ],
+ "source": [
+ "clean_tweets.to_csv('../data/clean_data.csv', index = False)\n",
+ "print('clean data saved successfully')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.10.5 64-bit",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.5"
+ },
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "a265634967a27dd555e8346f2355ee703e655fd7f0a0d20c168527cd0a3d5707"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
index d017ed3..15b377b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
-pandas>=1.1.0
+pandas>=1.1.0
textblob>=0.15.3
\ No newline at end of file
diff --git a/tests/test_extract_dataframe.py b/tests/test_extract_dataframe.py
index 8d5f30d..522c2e7 100644
--- a/tests/test_extract_dataframe.py
+++ b/tests/test_extract_dataframe.py
@@ -1,100 +1,251 @@
-import unittest
-import pandas as pd
-import sys, os
-
-sys.path.append(os.path.abspath(os.path.join("../..")))
-
-from extract_dataframe import read_json
-from extract_dataframe import TweetDfExtractor
-
-# For unit testing the data reading and processing codes,
-# we will need about 5 tweet samples.
-# Create a sample not more than 10 tweets and place it in a json file.
-# Provide the path to the samples tweets file you created below
-sampletweetsjsonfile = "" #put here the path to where you placed the file e.g. ./sampletweets.json.
-_, tweet_list = read_json(sampletweetsjsonfile)
-
-columns = [
- "created_at",
- "source",
- "original_text",
- "clean_text",
- "sentiment",
- "polarity",
- "subjectivity",
- "lang",
- "favorite_count",
- "retweet_count",
- "original_author",
- "screen_count",
- "followers_count",
- "friends_count",
- "possibly_sensitive",
- "hashtags",
- "user_mentions",
- "place",
- "place_coord_boundaries",
-]
-
-
-class TestTweetDfExtractor(unittest.TestCase):
- """
- A class for unit-testing function in the fix_clean_tweets_dataframe.py file
-
- Args:
- -----
- unittest.TestCase this allows the new class to inherit
- from the unittest module
- """
-
- def setUp(self) -> pd.DataFrame:
- self.df = TweetDfExtractor(tweet_list[:5])
- # tweet_df = self.df.get_tweet_df()
-
- def test_find_statuses_count(self):
- self.assertEqual(
- self.df.find_statuses_count(),
- )
-
- def test_find_full_text(self):
- text =
-
- self.assertEqual(self.df.find_full_text(), text)
-
- def test_find_sentiments(self):
- self.assertEqual(
- self.df.find_sentiments(self.df.find_full_text()),
- (
- ,
- ,
- ),
- )
-
-
- def test_find_screen_name(self):
- name =
- self.assertEqual(self.df.find_screen_name(), name)
-
- def test_find_followers_count(self):
- f_count =
- self.assertEqual(self.df.find_followers_count(), f_count)
-
- def test_find_friends_count(self):
- friends_count =
- self.assertEqual(self.df.find_friends_count(), friends_count)
-
- def test_find_is_sensitive(self):
- self.assertEqual(self.df.is_sensitive(), )
-
-
- # def test_find_hashtags(self):
- # self.assertEqual(self.df.find_hashtags(), )
-
- # def test_find_mentions(self):
- # self.assertEqual(self.df.find_mentions(), )
-
-
-
-if __name__ == "__main__":
- unittest.main()
-
+import os
+import sys
+import unittest
+import pandas as pd
+
+# sys.path.append(os.path.abspath(os.path.join("../..")))
+# sys.path.append(".")
+sys.path.append(".")
+from defaults import *
+
+from extract_dataframe import read_json
+from extract_dataframe import TweetDfExtractor
+
+# For unit testing the data reading and processing codes,
+# we will need about 5 tweet samples.
+# Create a sample not more than 10 tweets and place it in a json file.
+# Provide the path to the samples tweets file you created below
+
+_, tweet_list = read_json(processed_global_data)
+
+columns = [
+ "created_at",
+ "source",
+ "original_text",
+ "clean_text",
+ "sentiment",
+ "polarity",
+ "subjectivity",
+ "lang",
+ "favorite_count",
+ "retweet_count",
+ "original_author",
+ "screen_count",
+ "followers_count",
+ "friends_count",
+ "possibly_sensitive",
+ "hashtags",
+ "user_mentions",
+ "place",
+ "place_coord_boundaries",
+]
+
+
+class TestTweetDfExtractor(unittest.TestCase):
+ """
+ A class for unit-testing function in the fix_clean_tweets_dataframe.py file
+
+ Args:
+ -----
+ unittest.TestCase this allows the new class to inherit
+ from the unittest module
+ """
+
+ def setUp(self) -> pd.DataFrame:
+ self.df = TweetDfExtractor(tweet_list[:5])
+ # tweet_df = self.df.get_tweet_df()
+
+ def test_find_status_count(self):
+ """
+ Test case for the find status count method
+ """
+ # error test
+ # self.assertEqual(self.df.find_statuses_count(),
+ # [204051, 3462, 6727, 45477, 277957])
+
+ # the edited error test
+ self.assertEqual(self.df.find_status_count(),
+ [40, 40, 40, 40, 40])
+
+ def test_find_full_text(self):
+ """
+ Test case for hte find full text method
+ """
+ # error test case
+ error_text = ['🚨Africa is "in the midst of a full-blown third wave" of coronavirus, the head of @WHOAFRO has warned\n\nCases have risen across the continent by more than 20% and deaths have also risen by 15% in the last week\n\n@jriggers reports ~ 🧵\nhttps://t.co/CRDhqPHFWM', 'Dr Moeti is head of WHO in Africa, and one of the best public health experts and leaders I know. Hers is a desperate request for vaccines to Africa. We plead with Germany and the UK to lift patent restrictions and urgently transfer technology to enable production in Africa. https://t.co/sOgIroihOc', "Thank you @research2note for creating this amazing campaign & turning social media #red4research today. @NHSRDFORUM is all about sharing the talent, passion & commitment of individuals coming together as a community for the benefit of all. You've done this. Well done 👋", 'Former Pfizer VP and Virologist, Dr. Michael Yeadon, is one of the most credentialed medical professionals speaking out about the dangers of the #Covid19 vaccines, breaks down his “list of lies” that keeps him up at night. https://t.co/LSE8CrKdqn', 'I think it’s important that we don’t sell COVAX short. It still has a lot going for it and is innovative in its design. But it needs more vaccines to share. We’re hoping our low cost @TexasChildrens recombinant protein COVID19 vaccine with @biological_e will help fill some gaps']
+
+ # the edited test case
+ text = ['RT @nikitheblogger: Irre: Annalena Baerbock sagt, es bricht ihr das Herz, dass man nicht bedingungslos schwere Waffen liefert.\nMir bricht e\u2026',
+ 'RT @sagt_mit: Merkel schaffte es in 1 Jahr 1 Million \"Fl\u00fcchtlinge\" durchzuf\u00fcttern, jedoch nicht nach 16 Jahren 1 Million Rentner aus der Ar\u2026',
+ 'RT @Kryptonoun: @WRi007 Pharma in Lebensmitteln, Trinkwasser, in der Luft oder in der Zahnpasta irgendwo muss ein Beruhigungsmittel bzw. Be\u2026',
+ 'RT @WRi007: Die #Deutschen sind ein braves Volk!. Mit #Spritpreisen von 2 Euro abgefunden. Mit #inflation abgefunden. Mit h\u00f6heren #Abgaben\u2026',
+ 'RT @RolandTichy: Baerbock verk\u00fcndet mal so nebenhin in Riga das Ende der Energieimporte aus Russland. Habeck rudert schon zur\u00fcck, Scholz sc\u2026']
+ self.assertEqual(self.df.find_full_text(), text)
+
+ def test_find_sentiments(self):
+ """
+ Test case for the find sentiments method
+ """
+ # error test case
+ error_sentiment_values = ([0.16666666666666666, 0.13333333333333333,
+ 0.3166666666666667, 0.08611111111111111,
+ 0.27999999999999997],
+ [0.18888888888888888, 0.45555555555555555,
+ 0.48333333333333334, 0.19722222222222224,
+ 0.6199999999999999])
+
+ # the edited error test
+ sentiment_values = ([0.0, 0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0, 0.0])
+ self.assertEqual(self.df.find_sentiments(self.df.find_full_text()),
+ sentiment_values)
+
+ def test_find_created_time(self):
+ """
+ Test case for the find created time method
+ """
+ # error test case
+ created_at = ['Fri Jun 18 17:55:49 +0000 2021',
+ 'Fri Jun 18 17:55:59 +0000 2021',
+ 'Fri Jun 18 17:56:07 +0000 2021',
+ 'Fri Jun 18 17:56:10 +0000 2021',
+ 'Fri Jun 18 17:56:20 +0000 2021']
+
+ # the edited test case
+ really_created_at = ['Fri Apr 22 22:20:18 +0000 2022',
+ 'Fri Apr 22 22:19:16 +0000 2022',
+ 'Fri Apr 22 22:17:28 +0000 2022',
+ 'Fri Apr 22 22:17:20 +0000 2022',
+ 'Fri Apr 22 22:13:15 +0000 2022']
+ self.assertEqual(self.df.find_created_time(), really_created_at)
+
+ def test_find_source(self):
+ """
+ Test case for the find source method
+ """
+ # error test case
+ error_source = ['Twitter for iPhone ', 'Twitter Web App ', 'Twitter for iPhone ', 'Twitter Web App ', 'Twitter for Android ']
+
+ # the edited test case
+ source = ['Twitter for Android ', 'Twitter for Android ', 'Twitter for Android ', 'Twitter for Android ', 'Twitter for Android ']
+ self.assertEqual(self.df.find_source(), source)
+
+ def test_find_screen_name(self):
+ """
+ Test case for the find screen name method
+ """
+ # error test case
+ error_name_test_Case = ['ketuesriche', 'Grid1949',
+ 'LeeTomlinson8', 'RIPNY08', 'pash22']
+ # the edited error test
+ name = ['McMc74078966', 'McMc74078966', 'McMc74078966',
+ 'McMc74078966', 'McMc74078966']
+ self.assertEqual(self.df.find_screen_name(), name)
+
+ def test_find_followers_count(self):
+ """
+ Test case for the find followers count method
+ """
+ # error test
+ error_f_count = [551, 66, 1195, 2666, 28250]
+
+ # the edited error test
+ f_count = [3, 3, 3, 3, 3]
+ self.assertEqual(self.df.find_followers_count(), f_count)
+
+ def test_find_friends_count(self):
+ """
+ Test case for the find friends count method
+ """
+ # error test
+ error_friends_count = [351, 92, 1176, 2704, 30819]
+
+ # edited error test
+ friends_count = [12, 12, 12, 12, 12]
+ self.assertEqual(self.df.find_friends_count(), friends_count)
+
+ def test_find_is_sensitive(self):
+ self.assertEqual(self.df.is_sensitive(),
+ [None, None, None, None, None])
+
+ def test_find_hashtags(self):
+ """
+ Test case for the find hashtags method
+ """
+ hashtags = [[], [], [], [{'indices': [16, 26], 'text': 'Deutschen'},
+ {'indices': [54, 67], 'text': 'Spritpreisen'},
+ {'indices': [95, 105], 'text': 'inflation'},
+ {'indices': [130, 138], 'text': 'Abgaben'}],
+ []]
+ self.assertEqual(self.df.find_hashtags(), hashtags)
+
+ def test_find_mentions(self):
+ """
+ Test case for the find mentions method
+ """
+ mentions = [[{"screen_name": "nikitheblogger",
+ "name": "Neverforgetniki", "id": 809188392089092097,
+ "id_str": "809188392089092097", "indices": [3, 18]}],
+ [{"screen_name": "sagt_mit",
+ "name": "Sie sagt es mit Bildern",
+ "id": 1511959918777184256,
+ "id_str": "1511959918777184256",
+ "indices": [3, 12]}],
+ [{"screen_name": "Kryptonoun",
+ "name": "Kryptoguru", "id": 951051508321345536,
+ "id_str": "951051508321345536", "indices": [3, 14]},
+ {"screen_name": "WRi007", "name": "Wolfgang Berger",
+ "id": 1214543251283357696,
+ "id_str": "1214543251283357696", "indices": [16, 23]}],
+ [{"screen_name": "WRi007",
+ "name": "Wolfgang Berger", "id": 1214543251283357696,
+ "id_str": "1214543251283357696", "indices": [3, 10]}],
+ [{"screen_name": "RolandTichy", "name": "Roland Tichy",
+ "id": 19962363, "id_str": "19962363", "indices": [3, 15]}
+ ]]
+ self.assertEqual(self.df.find_mentions(), mentions)
+
+ def test_find_location(self):
+ """
+ Test case for the find location method
+ """
+ # error test
+ error_locations = ['Mass', 'Edinburgh, Scotland', None, None,
+ 'United Kingdom']
+
+ # edited error test
+ locations = ['', '', '', '', '']
+ self.assertEqual(self.df.find_location(), locations)
+
+ def test_find_lang(self):
+ """
+ Test case for the find lang method
+ """
+ langs = ['de', 'de', 'de', 'de', 'de']
+ self.assertEqual(self.df.find_lang(), langs)
+
+ def test_find_retweet_count(self):
+ """
+ Test case for the find retweet count method
+ """
+ # error test
+ error_retweets_test_Case = [612, 92, 1, 899, 20]
+
+ # the edited error test
+ retweets = [355, 505, 4, 332, 386]
+ self.assertEqual(self.df.find_retweet_count(), retweets)
+
+ def test_find_favorite_count(self):
+ """
+ Test case for the find favorite count method
+ """
+ # error test
+ # self.assertEqual(self.df.find_favorite_count(),
+ # [548, 195, 2, 1580, 72])
+
+ # the edited error test
+ self.assertEqual(self.df.find_favorite_count(),
+ [2356, 1985, 16, 1242, 1329])
+
+if __name__ == "__main__":
+ unittest.main()