Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions fix_clean_tweets_dataframe.py → clean_tweets_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,33 @@ def drop_unwanted_column(self, df:pd.DataFrame)->pd.DataFrame:
df.drop(unwanted_rows , inplace=True)
df = df[df['polarity'] != 'polarity']

return df
return



def drop_duplicate(self, df:pd.DataFrame)->pd.DataFrame:
"""
drop duplicate rows
drop duplicate rows from the DataFrame

Parameters
df (pd.DataFrame): Dataframe from which to remove duplicates

Returns:
pd.DataFrame: A Dataframe with duplicates remofved
"""

---
df = df.drop_duplicates(keep='first',inplace=False)

print("Duplicate rows removed from the DataFrame.")

return df


def convert_to_datetime(self, df:pd.DataFrame)->pd.DataFrame:
"""
convert column to datetime
"""
----

----
df['created_at'] = pd.to_datetime(df['created_at'])

df = df[df['created_at'] >= '2020-12-31' ]

Expand All @@ -41,18 +52,21 @@ def convert_to_numbers(self, df:pd.DataFrame)->pd.DataFrame:
convert columns like polarity, subjectivity, retweet_count
favorite_count etc to numbers
"""
df['polarity'] = pd.----

----
----
df['polarity'] = pd.to_numeric(df['polarity'])
df['subjectivity'] = pd.to_numeric(df['subjectivity'])
df['retweet_count'] = pd.to_numeric(df['retweet_count'])
df['favorite_count'] = pd.to_numeric(df['favorite_count'])
df['retweet_count'] = pd.to_numeric(df['retweet_count'])
#df['followers_count'] = pd.to_numeric(df['followers_count'])

return df



def remove_non_english_tweets(self, df:pd.DataFrame)->pd.DataFrame:
"""
remove non english tweets from lang
"""

df = ----
df = pd.drop(df[df['lang'] == 'en'])

return df
Empty file added eda.ipynb
Empty file.
12 changes: 11 additions & 1 deletion fix_extract_dataframe.py → extract_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,26 @@ def find_statuses_count(self)->list:
statuses_count

def find_full_text(self)->list:
text =
"""
This funciton gets the entire text f rom the tweet strings
"""
text = full_text
return text


def find_sentiments(self, text)->list:

return polarity, self.subjectivity

def find_created_time(self)->list:
created_at = []
for items in self.tweets_list:
created_at.append(items.get('craeted_at',None))

return created_at




def find_source(self)->list:
source =
Expand Down