Code
= attacks.loc[attacks["to_journalist"].isin([USERNAME])]
df_attacks print(f"Número de ataques: {len(df_attacks)}")
Número de ataques: 712
Fernanda Aguirre
January 16, 2024
Proporción de ataques = (Número de ataques / Número de menciones) * 100
journalist_mentions = len(df.loc[df["to_journalist"].isin([USERNAME])])
journalist_attacks = len(df_attacks)
percentage_attacks = (journalist_attacks / journalist_mentions) * 100
proportion = (percentage_attacks / 100) * 10
proportion_rounded = round(proportion, 1)
print(
f"Aproximadamente {proportion_rounded} de cada 10 publicaciones que mencionan a {USERNAME} son ataques"
)
Aproximadamente 1.0 de cada 10 publicaciones que mencionan a @diegobranca son ataques
Proporción de ataques por seguidor = Número de ataques / Número de seguidores
Por cada 1K seguidores, aproximadamente hubo 1.78 ataques para @diegobranca
appearance 309
politics 222
women 110
class 44
lgbti 44
racism 25
criminal 24
calls 6
dtype: int64
journalist_posts = df.loc[df["from_journalist"].isin([USERNAME])]
journalist_posts = journalist_posts.dropna(subset=["from_journalist"])
eventos = [
"1er debate",
"2do debate",
"elecciones generales",
"debate balotaje",
"elecciones balotaje",
]
colors = ["green", "purple", "orange", "red", "blue"]
eventos_count = {}
fig = px.line()
for i, evento in enumerate(eventos):
evento_data = journalist_posts.loc[journalist_posts["event"].isin([evento])]
evento_count = evento_data.groupby("dt_date").size().reset_index(name="count")
eventos_count[evento] = evento_count
fig.add_scatter(
x=evento_count["dt_date"],
y=evento_count["count"],
name=evento,
line=dict(color=colors[i]),
hovertemplate="posts: %{y}",
)
fig.update_layout(title=f"Publicaciones de {USERNAME}", width=1000)
fig.update_xaxes(type="category")
fig.update_yaxes(range=[0, 100])
fig.show()
df_attacks["hashtags"] = df_attacks["text"].apply(
lambda x: (
np.nan
if pd.isnull(x) or not isinstance(x, str) or len(re.findall(r"#\w+", x)) == 0
else re.findall(r"#\w+", x)
)
)
df_attacks["hashtags"] = df_attacks["hashtags"].apply(
lambda x: ", ".join(x) if isinstance(x, list) else x
)
# convert dataframe column to list
hashtags = df_attacks["hashtags"].unique()
# remove nan items from list
hashtags = [x for x in hashtags if not pd.isna(x)]
# split items into a list based on a delimiter
hashtags = [x.split(",") for x in hashtags]
# flatten list of lists
hashtags = [item for sublist in hashtags for item in sublist]
# remove whitespaces
hashtags = list(map(lambda x: x.replace(" ", ""), hashtags))
# count items on list
hashtags_count = pd.Series(hashtags).value_counts()
hashtags_count
#ElPeorGobiernoDeLaHistoria 2
#KirchnerismoNuncaMas 2
#SeVannnnnn 1
#EsAhoraYParaSiempre 1
#NoVasASerPresidente 1
#NoAl5toGobiernoK 1
#elsi 1
#Sellamaperiodista 1
#Milita60depobrezaydolara1200 1
#Noquierequeseterminelapauta 1
#Nolepaganconladeellos 1
#RataInmunda 1
#SeVanParaSiempre 1
#Repugnante 1
#PatriciaBullrichPresidente2023 1
#Chaukukas 1
#pelotudo 1
#gobiernodevagosycorruptos 1
#Afip 1
#tugo 1
#MileiPresidente 1
Name: count, dtype: int64
df_attacks["mentions"] = df_attacks["text"].apply(
lambda x: (
np.nan
if pd.isnull(x) or not isinstance(x, str) or len(re.findall(r"@(\w+)", x)) == 0
else re.findall(r"@(\w+)", x)
)
)
df_attacks["mentions"] = df_attacks["mentions"].apply(
lambda x: ", ".join(x) if isinstance(x, list) else x
)
# convert dataframe column to list
mentions = df_attacks["mentions"].unique()
# remove nan items from list
mentions = [x for x in mentions if not pd.isna(x)]
# split items into a list based on a delimiter
mentions = [x.split(",") for x in mentions]
# flatten list of lists
mentions = [item for sublist in mentions for item in sublist]
# remove whitespaces
mentions = list(map(lambda x: x.replace(" ", ""), mentions))
# count items on list
mentions_count = pd.Series(mentions).value_counts()
mentions_count
SergioMassa 2
SergioChouza 1
minsaurralde 1
T 1
PatoBullrich 1
diegobranca 1
rd 1
s 1
JMilei 1
herlombardi 1
MunicipioPilar 1
Name: count, dtype: int64
# load the spacy model for Spanish
nlp = spacy.load("es_core_news_sm")
# load stop words for Spanish
STOP_WORDS = nlp.Defaults.stop_words
# Function to filter stop words
def filter_stopwords(text):
# lower text
doc = nlp(text.lower())
# filter tokens
tokens = [
token.text
for token in doc
if not token.is_stop and token.text not in STOP_WORDS and token.is_alpha
]
return " ".join(tokens)
# apply function to dataframe column
df_attacks["text_pre"] = df_attacks["text"].apply(filter_stopwords)
# count items on column
token_counts = df_attacks["text_pre"].str.split(expand=True).stack().value_counts()[:20]
token_counts
gordo 172
vos 144
sos 98
q 71
vas 39
gordito 35
k 32
mierda 32
massa 27
pelotudo 26
gente 24
tenes 24
peronchos 23
branca 21
culo 21
laburar 19
ensobrado 17
anda 17
enano 17
orto 17
Name: count, dtype: int64