Code
= attacks.loc[attacks["to_journalist"].isin([USERNAME])]
df_attacks print(f"Número de ataques: {len(df_attacks)}")
Número de ataques: 409
Fernanda Aguirre
January 16, 2024
Proporción de ataques = (Número de ataques / Número de menciones) * 100
journalist_mentions = len(df.loc[df["to_journalist"].isin([USERNAME])])
journalist_attacks = len(df_attacks)
percentage_attacks = (journalist_attacks / journalist_mentions) * 100
proportion = (percentage_attacks / 100) * 10
proportion_rounded = round(proportion, 1)
print(
f"Aproximadamente {proportion_rounded} de cada 10 publicaciones que mencionan a {USERNAME} son ataques"
)
Aproximadamente 1.1 de cada 10 publicaciones que mencionan a @Angelalerena son ataques
Proporción de ataques por seguidor = Número de ataques / Número de seguidores
Por cada 1K seguidores, aproximadamente hubo 1.42 ataques para @Angelalerena
women 191
politics 132
appearance 69
class 54
racism 14
lgbti 9
criminal 4
calls 2
dtype: int64
journalist_posts = df.loc[df["from_journalist"].isin([USERNAME])]
journalist_posts = journalist_posts.dropna(subset=["from_journalist"])
eventos = [
"1er debate",
"2do debate",
"elecciones generales",
"debate balotaje",
"elecciones balotaje",
]
colors = ["green", "purple", "orange", "red", "blue"]
eventos_count = {}
fig = px.line()
for i, evento in enumerate(eventos):
evento_data = journalist_posts.loc[journalist_posts["event"].isin([evento])]
evento_count = evento_data.groupby("dt_date").size().reset_index(name="count")
eventos_count[evento] = evento_count
fig.add_scatter(
x=evento_count["dt_date"],
y=evento_count["count"],
name=evento,
line=dict(color=colors[i]),
hovertemplate="posts: %{y}",
)
fig.update_layout(title=f"Publicaciones de {USERNAME}", width=1000)
fig.update_xaxes(type="category")
fig.update_yaxes(range=[0, 100])
fig.show()
df_attacks["hashtags"] = df_attacks["text"].apply(
lambda x: (
np.nan
if pd.isnull(x) or not isinstance(x, str) or len(re.findall(r"#\w+", x)) == 0
else re.findall(r"#\w+", x)
)
)
df_attacks["hashtags"] = df_attacks["hashtags"].apply(
lambda x: ", ".join(x) if isinstance(x, list) else x
)
# convert dataframe column to list
hashtags = df_attacks["hashtags"].unique()
# remove nan items from list
hashtags = [x for x in hashtags if not pd.isna(x)]
# split items into a list based on a delimiter
hashtags = [x.split(",") for x in hashtags]
# flatten list of lists
hashtags = [item for sublist in hashtags for item in sublist]
# remove whitespaces
hashtags = list(map(lambda x: x.replace(" ", ""), hashtags))
# count items on list
hashtags_count = pd.Series(hashtags).value_counts()
hashtags_count
#viv 1
Name: count, dtype: int64
df_attacks["mentions"] = df_attacks["text"].apply(
lambda x: (
np.nan
if pd.isnull(x) or not isinstance(x, str) or len(re.findall(r"@(\w+)", x)) == 0
else re.findall(r"@(\w+)", x)
)
)
df_attacks["mentions"] = df_attacks["mentions"].apply(
lambda x: ", ".join(x) if isinstance(x, list) else x
)
# convert dataframe column to list
mentions = df_attacks["mentions"].unique()
# remove nan items from list
mentions = [x for x in mentions if not pd.isna(x)]
# split items into a list based on a delimiter
mentions = [x.split(",") for x in mentions]
# flatten list of lists
mentions = [item for sublist in mentions for item in sublist]
# remove whitespaces
mentions = list(map(lambda x: x.replace(" ", ""), mentions))
# count items on list
mentions_count = pd.Series(mentions).value_counts()
mentions_count
Angelalerena 1
diegobranca 1
MalenaGalmarini 1
JMilei 1
Name: count, dtype: int64
# load the spacy model for Spanish
nlp = spacy.load("es_core_news_sm")
# load stop words for Spanish
STOP_WORDS = nlp.Defaults.stop_words
# Function to filter stop words
def filter_stopwords(text):
# lower text
doc = nlp(text.lower())
# filter tokens
tokens = [
token.text
for token in doc
if not token.is_stop and token.text not in STOP_WORDS and token.is_alpha
]
return " ".join(tokens)
# apply function to dataframe column
df_attacks["text_pre"] = df_attacks["text"].apply(filter_stopwords)
# count items on column
token_counts = df_attacks["text_pre"].str.split(expand=True).stack().value_counts()[:20]
token_counts
sos 42
vos 42
q 36
laburo 31
mierda 28
vas 27
amor 20
laburar 19
orto 19
buscar 18
curro 16
anda 16
zurda 15
puta 15
años 14
kuka 14
gente 14
voto 14
cara 13
trabajo 12
Name: count, dtype: int64