Code
= attacks.loc[attacks["to_journalist"].isin([USERNAME])]
df_attacks print(f"Número de ataques: {len(df_attacks)}")
Número de ataques: 399
Fernanda Aguirre
January 16, 2024
Proporción de ataques = (Número de ataques / Número de menciones) * 100
journalist_mentions = len(df.loc[df["to_journalist"].isin([USERNAME])])
journalist_attacks = len(df_attacks)
percentage_attacks = (journalist_attacks / journalist_mentions) * 100
proportion = (percentage_attacks / 100) * 10
proportion_rounded = round(proportion, 1)
print(
f"Aproximadamente {proportion_rounded} de cada 10 publicaciones que mencionan a {USERNAME} son ataques"
)
Aproximadamente 0.9 de cada 10 publicaciones que mencionan a @edufeiok son ataques
Proporción de ataques por seguidor = Número de ataques / Número de seguidores
appearance 115
women 112
politics 108
racism 48
lgbti 9
calls 9
criminal 8
class 7
dtype: int64
journalist_posts = df.loc[df["from_journalist"].isin([USERNAME])]
journalist_posts = journalist_posts.dropna(subset=["from_journalist"])
eventos = [
"1er debate",
"2do debate",
"elecciones generales",
"debate balotaje",
"elecciones balotaje",
]
colors = ["green", "purple", "orange", "red", "blue"]
eventos_count = {}
fig = px.line()
for i, evento in enumerate(eventos):
evento_data = journalist_posts.loc[journalist_posts["event"].isin([evento])]
evento_count = evento_data.groupby("dt_date").size().reset_index(name="count")
eventos_count[evento] = evento_count
fig.add_scatter(
x=evento_count["dt_date"],
y=evento_count["count"],
name=evento,
line=dict(color=colors[i]),
hovertemplate="posts: %{y}",
)
fig.update_layout(title=f"Publicaciones de {USERNAME}", width=1000)
fig.update_xaxes(type="category")
fig.update_yaxes(range=[0, 100])
fig.show()
df_attacks["hashtags"] = df_attacks["text"].apply(
lambda x: (
np.nan
if pd.isnull(x) or not isinstance(x, str) or len(re.findall(r"#\w+", x)) == 0
else re.findall(r"#\w+", x)
)
)
df_attacks["hashtags"] = df_attacks["hashtags"].apply(
lambda x: ", ".join(x) if isinstance(x, list) else x
)
# convert dataframe column to list
hashtags = df_attacks["hashtags"].unique()
# remove nan items from list
hashtags = [x for x in hashtags if not pd.isna(x)]
# split items into a list based on a delimiter
hashtags = [x.split(",") for x in hashtags]
# flatten list of lists
hashtags = [item for sublist in hashtags for item in sublist]
# remove whitespaces
hashtags = list(map(lambda x: x.replace(" ", ""), hashtags))
# count items on list
hashtags_count = pd.Series(hashtags).value_counts()
hashtags_count
#PatoBullrichPresidente2023 1
#PatoPresidente 1
#PatriciaBullrich 1
#ZurdosHijosDePuta 1
#Colombia 1
#NegroFuturo 1
#bipolaridad 1
#argentina 1
#Son30Mil 1
#fraude 1
Name: count, dtype: int64
df_attacks["mentions"] = df_attacks["text"].apply(
lambda x: (
np.nan
if pd.isnull(x) or not isinstance(x, str) or len(re.findall(r"@(\w+)", x)) == 0
else re.findall(r"@(\w+)", x)
)
)
df_attacks["mentions"] = df_attacks["mentions"].apply(
lambda x: ", ".join(x) if isinstance(x, list) else x
)
# convert dataframe column to list
mentions = df_attacks["mentions"].unique()
# remove nan items from list
mentions = [x for x in mentions if not pd.isna(x)]
# split items into a list based on a delimiter
mentions = [x.split(",") for x in mentions]
# flatten list of lists
mentions = [item for sublist in mentions for item in sublist]
# remove whitespaces
mentions = list(map(lambda x: x.replace(" ", ""), mentions))
# count items on list
mentions_count = pd.Series(mentions).value_counts()
mentions_count
edufeiok 1
myriambregman 1
r 1
JonatanViale 1
CarlosMaslaton 1
QuintelaRicardo 1
ursuvargues 1
Name: count, dtype: int64
# load the spacy model for Spanish
nlp = spacy.load("es_core_news_sm")
# load stop words for Spanish
STOP_WORDS = nlp.Defaults.stop_words
# Function to filter stop words
def filter_stopwords(text):
# lower text
doc = nlp(text.lower())
# filter tokens
tokens = [
token.text
for token in doc
if not token.is_stop and token.text not in STOP_WORDS and token.is_alpha
]
return " ".join(tokens)
# apply function to dataframe column
df_attacks["text_pre"] = df_attacks["text"].apply(filter_stopwords)
# count items on column
token_counts = df_attacks["text_pre"].str.split(expand=True).stack().value_counts()[:20]
token_counts
vos 48
q 45
enano 32
sos 28
edu 24
asco 22
trompas 20
ligó 17
facho 14
país 14
mierda 14
viejo 14
massa 13
das 12
milei 12
vas 11
años 10
argentina 10
votar 9
mujer 9
Name: count, dtype: int64