Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 28 additions & 12 deletions fix_clean_tweets_dataframe.py → clean_tweets_dataframe.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd

class Clean_Tweets:
"""
The PEP8 Standard AMAZING!!!
Expand All @@ -15,22 +17,33 @@ def drop_unwanted_column(self, df:pd.DataFrame)->pd.DataFrame:
df.drop(unwanted_rows , inplace=True)
df = df[df['polarity'] != 'polarity']

return df
return



def drop_duplicate(self, df:pd.DataFrame)->pd.DataFrame:
"""
drop duplicate rows
drop duplicate rows from the DataFrame

Parameters
df (pd.DataFrame): Dataframe from which to remove duplicates

Returns:
pd.DataFrame: A Dataframe with duplicates remofved
"""

---
df = df.drop_duplicates(keep='first',inplace=False)

print("Duplicate rows removed from the DataFrame.")

return df


def convert_to_datetime(self, df:pd.DataFrame)->pd.DataFrame:
"""
convert column to datetime
"""
----

----
df['created_at'] = pd.to_datetime(df['created_at'])

df = df[df['created_at'] >= '2020-12-31' ]

Expand All @@ -41,18 +54,21 @@ def convert_to_numbers(self, df:pd.DataFrame)->pd.DataFrame:
convert columns like polarity, subjectivity, retweet_count
favorite_count etc to numbers
"""
df['polarity'] = pd.----

----
----
df['polarity'] = pd.to_numeric(df['polarity'])
df['subjectivity'] = pd.to_numeric(df['subjectivity'])
df['retweet_count'] = pd.to_numeric(df['retweet_count'])
df['favorite_count'] = pd.to_numeric(df['favorite_count'])
df['retweet_count'] = pd.to_numeric(df['retweet_count'])
df['followers_count'] = pd.to_numeric(df['followers_count'])

return df



def remove_non_english_tweets(self, df:pd.DataFrame)->pd.DataFrame:
"""
remove non english tweets from lang
"""

df = ----
df = pd.drop(df[df['lang'] == 'en'])

return df
Loading