import spacy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from spacy import displacy
from transformers import pipeline
from germansentiment import SentimentModel
from datasets import Dataset
from tqdm import tqdm
import time
import numpy as np

import ast
from itertools import combinations
import networkx as nx
from pyvis.network import Network
import datetime

# Path to the CSV file
#file_path = "../data/articles_24.csv"
file_path = "articles_24.csv"
# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47784 entries, 0 to 47783
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   title         47784 non-null  object
 1   authors       47784 non-null  object
 2   date_publish  47784 non-null  object
 3   maintext      47368 non-null  object
 4   url           47784 non-null  object
 5   newspaper     47784 non-null  object
dtypes: object(6)
memory usage: 2.2+ MB

df.head()

# Function to check if maintext is valid
def is_valid_article(text):
    # Ensure the text is a string
    if not isinstance(text, str):
        return False

    # Check word count
    word_count = len(text.split())
    if word_count < 300:
        return False

    # Check for repeated sentences
    sentences = text.split('. ')
    for i in range(len(sentences) - 1):
        if sentences[i].strip() == sentences[i + 1].strip():
            return False

    return True

# Apply the function to filter the DataFrame
df = df[df["maintext"].apply(is_valid_article)]

# Reset the index
df = df.reset_index(drop=True)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11242 entries, 0 to 11241
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   title         11242 non-null  object
 1   authors       11242 non-null  object
 2   date_publish  11242 non-null  object
 3   maintext      11242 non-null  object
 4   url           11242 non-null  object
 5   newspaper     11242 non-null  object
dtypes: object(6)
memory usage: 527.1+ KB

#Standarize date format
df['date_publish'] = pd.to_datetime(df['date_publish'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

# Function to format authors
def format_authors(authors):
    if not isinstance(authors, str):
        return authors  # Skip non-string values

    # Convert to title case
    formatted = authors.title()

    # Replace "und" with ", "
    formatted = formatted.replace(", ", "', '") # fix the commas and quotes position
    formatted = formatted.replace(" Und", "', '") # fix various ways to say 'und'
    formatted = formatted.replace(" &", "', '") # fix various ways to say 'und'
    formatted = formatted.replace("''", "'") # fix the double quotes

    # Remove "Von" or "von" from the start of words
    if formatted.startswith("['Von"):
        formatted = "['" + formatted[5:].strip()

    return formatted

# Apply the function to the 'authors' column
df["authors"] = df["authors"].apply(format_authors)

df.head(15)

# 1. Datei laden
df = pd.read_csv(file_path)
###df = df.head(100)  # Nur die ersten 100 Zeilen verwenden

# 2. Sentiment-Analyzer laden
sentiment_analyzer = pipeline("sentiment-analysis", model="oliverguhr/german-sentiment-bert")

# 3. Sentiment-Analyse auf die Spalte 'article' anwenden
df['sentiment'] = df['maintext'].apply(lambda x: sentiment_analyzer(x[:512])[0]['label'])

# 4. Ergebnisse überprüfen
print(df[['maintext', 'sentiment']].head())

# 5. Datei mit Ergebnissen speichern
df.to_csv("out_with_sentiment_2.csv", index=False)
print("Datei wurde gespeichert als 'out_with_sentiment_2.csv'.")

df_sentiment = pd.read_csv("out_with_sentiment_2.csv")

df_sentiment.head(15)

# Load language model
nlp = spacy.load("de_core_news_md")

# Function to extract entities from text
def extract_entities(text):
    doc = nlp(text)  # Process text with spaCy
    entities = [(ent.text, ent.label_) for ent in doc.ents]  # Extract entities and labels
    return entities

# Function to process data with progress tracking for entity extraction
def process_with_progress(df):
    start_time = time.time()  # Track the start time
    all_entities = []

    # Initialize tqdm with time tracking for each entry
    with tqdm(total=len(df), desc="Processing Text", ncols=100) as pbar:
        for index, row in df.iterrows():  # Use iterrows() to iterate over DataFrame rows
            text = row['maintext']
            # Extract entities for each row
            entities = extract_entities(text)

            # Append results
            all_entities.append(entities)

            # Update progress bar and calculate time left
            elapsed_time = time.time() - start_time
            estimated_time_left = (elapsed_time / (index + 1)) * (len(df) - (index + 1))
            pbar.set_postfix({"Time Left": f"{estimated_time_left / 60:.2f} min"})
            pbar.update(1)

    # Create a new dataframe copy called df_pipeline
    df_pipeline = df.copy()

    # Add the results to the dataframe
    df_pipeline['entities'] = all_entities

    return df_pipeline

# Apply the function to process the dataframe with progress tracking
df_pipeline = process_with_progress(df)

Processing Text: 100%|████████████████████| 11242/11242 [12:50<00:00, 14.60it/s, Time Left=0.00 min]

df_sentiment = df_sentiment.join(df_pipeline[["entities"]])

if 'sentiment' in df_sentiment.columns:
    sentiment_counts = df_sentiment['sentiment'].explode().value_counts()
    print(sentiment_counts)
else:
    print("The 'sentiment' column does not exist in df_copy.")

sentiment
neutral     8328
negative    2643
positive     271
Name: count, dtype: int64

entity_sentiment_mapping = df_sentiment[['entities', 'sentiment']].copy()

entity_sentiment_mapping.head()

# Create an empty list to store the new rows
new_rows = []

# Iterate over the rows of the original DataFrame
for index, row in entity_sentiment_mapping.iterrows():
    # Check if the 'entities' column is not null and is a list
    if isinstance(row['entities'], list) and row['entities'] is not None:
        # Iterate over the entities in the list (without using set for unique values)
        for entity in row['entities']:
            # Create a new row with the entity and the original sentiment
            new_row = {
                'entity': entity,
                'sentiment': row['sentiment']
            }
            new_rows.append(new_row)

# Create a new DataFrame from the new rows
entity_sentiment_mapping = pd.DataFrame(new_rows)
entity_sentiment_mapping

entity_sentiment_mapping = entity_sentiment_mapping.groupby(['entity', 'sentiment']).size().reset_index(name='count')
entity_sentiment_mapping = entity_sentiment_mapping.sort_values(by='count', ascending=False)
entity_sentiment_mapping.head()

sentiment_counts = entity_sentiment_mapping.groupby('sentiment')['count'].sum()
sentiment_counts

import matplotlib.pyplot as plt
import seaborn as sns

# Define the custom color palette for each sentiment
colors = {'positive': '#66c2a5', 'negative': '#fc8d62', 'neutral': '#8da0cb'}

# Group the data for visualization
sentiment_entity_counts = entity_sentiment_mapping.groupby(['sentiment', 'entity'])['count'].sum().reset_index()

# Create a plot for each sentiment value
for sentiment in sentiment_entity_counts['sentiment'].unique():
    # Filter data for the current sentiment
    sentiment_data = sentiment_entity_counts[sentiment_entity_counts['sentiment'] == sentiment]

    # Sort the entities by count
    sentiment_data = sentiment_data.sort_values('count', ascending=False).head(20)  # Limit to top 20 for clarity

    plt.figure(figsize=(12, 6))  # Adjust figure size for better visualization

    # Convert 'entity' column to string type before plotting
    sentiment_data['entity'] = sentiment_data['entity'].astype(str)

    # Create the bar plot with custom color
    sns.barplot(
        x='entity',
        y='count',
        data=sentiment_data,
        color=colors[sentiment]  # Use the sentiment-specific color
    )

    # Add title and labels with enhanced formatting
    plt.title(f'Top Entities for Sentiment: {sentiment}', fontsize=16, pad=20)
    plt.xlabel('Entity', fontsize=14)
    plt.ylabel('Count', fontsize=14)

    # Rotate x-axis labels for better readability
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)

    # Adjust layout to prevent overlap
    plt.tight_layout()

    # Show the plot
    plt.show()

import matplotlib.pyplot as plt
import seaborn as sns

# Define the custom color palette
colors = ['#66c2a5', '#fc8d62', '#8da0cb']

# Group sentiment counts and reset index
sentiment_counts = entity_sentiment_mapping.groupby('sentiment')['count'].sum().reset_index()

# Bar plot: Total Count per Sentiment Category
plt.figure(figsize=(8, 6))
sns.barplot(
    x='sentiment',
    y='count',
    data=sentiment_counts,
    palette=colors  # Apply custom colors
)
plt.title('Total Count per Sentiment Category', fontsize=16, pad=20)
plt.xlabel('Sentiment Category', fontsize=14)
plt.ylabel('Sum of Count', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout()
plt.show()

# Scatter plot: Sentiment vs. Count (Individual Entity Counts)
plt.figure(figsize=(8, 6))
sns.scatterplot(
    x='sentiment',
    y='count',
    data=entity_sentiment_mapping,
    hue='sentiment',
    palette=colors,  # Apply custom colors
    s=100,  # Set marker size for better visibility
    alpha=0.8  # Slight transparency for overlapping points
)
plt.title('Sentiment vs. Count (Individual Entity Counts)', fontsize=16, pad=20)
plt.xlabel('Sentiment Category', fontsize=14)
plt.ylabel('Count', fontsize=14)
plt.legend(title='Sentiment', fontsize=12, title_fontsize=14, loc='upper right')
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout()
plt.show()

<ipython-input-45-909602eb0565>:12: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(

import matplotlib.pyplot as plt
import seaborn as sns

# Define the custom color palette
colors = ['#66c2a5', '#fc8d62', '#8da0cb']  # Matching the color palette

# Create the bar plot
plt.figure(figsize=(8, 6))
sns.barplot(
    x='sentiment',
    y='count',
    data=sentiment_counts,
    palette=colors  # Apply custom colors
)

# Set y-axis to log scale
plt.yscale('log')

# Add title and axis labels with enhanced formatting
plt.title('Total Count per Sentiment Category (Log Scale)', fontsize=16, pad=20)
plt.xlabel('Sentiment Category', fontsize=14)
plt.ylabel('Sum of Count (Log Scale)', fontsize=14)

# Adjust tick label sizes
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Improve layout
plt.tight_layout()

# Show the plot
plt.show()

<ipython-input-46-bcd5c4b4642c>:9: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(

import matplotlib.pyplot as plt

# Define the custom color palette
colors = ['#66c2a5', '#fc8d62', '#8da0cb']  # Matching the pie chart colors

# Group by entity and sentiment, then unstack for a stacked bar plot
entity_sentiment_counts = entity_sentiment_mapping.groupby(['entity', 'sentiment'])['count'].sum().unstack()

# Filter for entities with a large number of mentions (e.g., top 10)
top_entities = entity_sentiment_counts.sum(axis=1).nlargest(10).index
filtered_entity_sentiment_counts = entity_sentiment_counts.loc[top_entities]

# Create the stacked bar plot
ax = filtered_entity_sentiment_counts.plot(
    kind='bar',
    stacked=True,
    figsize=(12, 6),
    color=colors  # Apply custom colors
)

# Set title and labels with enhanced formatting
plt.title('Sentiment Distribution for Top Entities', fontsize=16, pad=20)
plt.xlabel('Entity', fontsize=14)
plt.ylabel('Count', fontsize=14)

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right', fontsize=12)
plt.yticks(fontsize=12)

# Add a legend with a title
plt.legend(title='Sentiment', fontsize=12, title_fontsize=14, loc='upper right')

# Improve layout spacing
plt.tight_layout()

# Show the chart
plt.show()

import matplotlib.pyplot as plt
import pandas as pd

# Data
sentiment_counts = pd.DataFrame({
    'sentiment': ['positive', 'negative', 'neutral'],
    'count': [271, 2643, 8328]
})

# Create the figure
fig, ax = plt.subplots(figsize=(8, 8))  # Set figure size

# Define colors for the pie chart
colors = ['#66c2a5', '#fc8d62', '#8da0cb']  # Custom colors for better aesthetics

# Create the pie chart
wedges, texts, autotexts = ax.pie(
    sentiment_counts['count'],
    labels=sentiment_counts['sentiment'],
    autopct='%1.1f%%',
    startangle=90,
    colors=colors,
    textprops={'fontsize': 12}  # Font size for labels
)

# Add a legend showing absolute counts
ax.legend(
    wedges,
    [f"{label}: {count}" for label, count in zip(sentiment_counts['sentiment'], sentiment_counts['count'])],
    title="Sentiment Counts",
    loc="center left",
    bbox_to_anchor=(1, 0, 0.5, 1),  # Positioning the legend outside the pie chart
    fontsize=12
)

# Set title with increased font size
ax.set_title('Proportion of Sentiment Categories', fontsize=16, pad=20)

# Ensure pie is drawn as a circle
ax.axis('equal')

# Adjust layout for better spacing
plt.tight_layout()

# Show the chart
#plt.show()
plt.savefig("pieGraph.svg", format="svg")

# Define topic keywords
topic_keywords = {
    "Economy": ["Wirtschaft", "Finanzen", "Markt", "Börse", "Inflation", "Investitionen"],
    "Politics": ["Politik", "Regierung", "Partei", "Gesetz", "Minister"],
    "Climate": ["Klima", "Umwelt", "Energie", "Treibhausgase", "Klimawandel"],
    "Technology": ["Technologie", "Digitalisierung", "KI", "Cybersecurity"],
    "Sports": ["Sport", "Fußball", "Olympia", "Tor", "Mannschaft"],
    "Health": ["Gesundheit", "Medizin", "Pandemie", "Krankenhaus", "Pflege"],
    "Culture": ["Kultur", "Kino", "Musik", "Theater", "Kunst"],
    "Crime": ["Kriminalität", "Polizei", "Verbrechen", "Gericht", "Mord"],
    "Beauty": ["Schönheit", "Kleidung", "Schminken", "Make-up", "Schmuck"]
}

# Function to assign topics
def assign_topic(text):
    for topic, keywords in topic_keywords.items():
        if any(keyword in text for keyword in keywords):
            return topic
    return "other"

# Create a new dataframe with topics
df_topic_sentiment = df_sentiment[['maintext', 'sentiment']].copy()
df_topic_sentiment['topic'] = df_topic_sentiment['maintext'].apply(assign_topic)

# Inspect the new dataframe
df_topic_sentiment.head()

import pandas as pd

# Create a new dataframe with the desired structure
df_topic_sentiment_pivot = pd.pivot_table(df_topic_sentiment,
                                          index='topic',
                                          columns='sentiment',
                                          values='maintext',
                                          aggfunc='count',
                                          fill_value=0) #fill_value fills NaN with 0

# Reset the index to make 'topic' a regular column
df_topic_sentiment_pivot = df_topic_sentiment_pivot.reset_index()

# Display the resulting DataFrame
df_topic_sentiment_pivot

import matplotlib.pyplot as plt
import pandas as pd

# Assuming df_topic_sentiment_pivot is already defined
# Calculate total sentiment counts for each topic
df_topic_sentiment_pivot['total'] = df_topic_sentiment_pivot['negative'] + df_topic_sentiment_pivot['neutral'] + df_topic_sentiment_pivot['positive']

# Calculate percentages for each sentiment
df_topic_sentiment_pivot['negative_percent'] = (df_topic_sentiment_pivot['negative'] / df_topic_sentiment_pivot['total']) * 100
df_topic_sentiment_pivot['neutral_percent'] = (df_topic_sentiment_pivot['neutral'] / df_topic_sentiment_pivot['total']) * 100
df_topic_sentiment_pivot['positive_percent'] = (df_topic_sentiment_pivot['positive'] / df_topic_sentiment_pivot['total']) * 100

# Define colors for better aesthetics
colors = ['#fc8d62', '#8da0cb', '#66c2a5']

# Create the figure
fig, ax = plt.subplots(figsize=(12, 8))  # Set figure size

# Bar width and index for grouping
bar_width = 0.25
index = range(len(df_topic_sentiment_pivot))

# Create bars for each sentiment
bars_negative = ax.bar(index, df_topic_sentiment_pivot['negative_percent'], bar_width, label='Negative', color=colors[0])
bars_neutral = ax.bar([i + bar_width for i in index], df_topic_sentiment_pivot['neutral_percent'], bar_width, label='Neutral', color=colors[1])
bars_positive = ax.bar([i + 2 * bar_width for i in index], df_topic_sentiment_pivot['positive_percent'], bar_width, label='Positive', color=colors[2])

# Set axis labels and title
ax.set_xlabel('Topic', fontsize=14)
ax.set_ylabel('Sentiment Percentage (%)', fontsize=14)
ax.set_title('Sentiment Analysis per Topic', fontsize=16, pad=20)

# Set x-ticks and rotate for readability
ax.set_xticks([i + bar_width for i in index])
ax.set_xticklabels(df_topic_sentiment_pivot['topic'], rotation=45, ha='right', fontsize=12)

# Set y-axis limit to 100
ax.set_ylim(0, 100)

# Add legend
ax.legend(title="Sentiment", fontsize=12, title_fontsize=14, loc="upper left", bbox_to_anchor=(1, 1))

# Adjust layout for better spacing
plt.tight_layout()

# Show the chart
plt.savefig("barGraphPercentage.svg", format="svg")  # Save the chart as an SVG
plt.show()

import matplotlib.pyplot as plt
import pandas as pd

# Assuming df_topic_sentiment_pivot is already defined
# Calculate total sentiment counts for each topic
df_topic_sentiment_pivot['total'] = df_topic_sentiment_pivot['negative'] + df_topic_sentiment_pivot['neutral'] + df_topic_sentiment_pivot['positive']

# Define colors for better aesthetics
colors = ['#fc8d62', '#8da0cb', '#66c2a5']

# Create the figure
fig, ax = plt.subplots(figsize=(12, 8))  # Set figure size

# Bar width and index for grouping
bar_width = 0.25
index = range(len(df_topic_sentiment_pivot))

# Create bars for each sentiment
bars_negative = ax.bar(index, df_topic_sentiment_pivot['negative'], bar_width, label='Negative', color=colors[0])
bars_neutral = ax.bar([i + bar_width for i in index], df_topic_sentiment_pivot['neutral'], bar_width, label='Neutral', color=colors[1])
bars_positive = ax.bar([i + 2 * bar_width for i in index], df_topic_sentiment_pivot['positive'], bar_width, label='Positive', color=colors[2])

# Set axis labels and title
ax.set_xlabel('Topic', fontsize=14)
ax.set_ylabel('Sentiment Counts', fontsize=14)
ax.set_title('Sentiment Analysis per Topic', fontsize=16, pad=20)

# Set x-ticks and rotate for readability
ax.set_xticks([i + bar_width for i in index])
ax.set_xticklabels(df_topic_sentiment_pivot['topic'], rotation=45, ha='right', fontsize=12)

# Add legend
ax.legend(title="Sentiment", fontsize=12, title_fontsize=14, loc="upper left", bbox_to_anchor=(1, 1))

# Adjust layout for better spacing
plt.tight_layout()

# Show the chart
plt.savefig("barGraph.svg", format="svg")  # Save the chart as an SVG
plt.show()

def extract_entities(text):
    doc = nlp(text)
    sent_entities=[]
    entity_labels = ['GPE', 'ORG','PER', 'LOC'] # we want to focus only on these entity types
    for sent in doc.sents:
        sent_entities += [ent.text for ent in sent.ents if ent.label_ in entity_labels]
    return sent_entities


def extract_entity_pairs(sent_entities):
    entity_pairs = list(combinations(sent_entities, 2))
    return entity_pairs


def generate_network_graph(conditions, entity=None):
    filtered_df = df
    if entity is not None:
        filtered_df = df[df['entities'].apply(lambda x: entity in x if isinstance(x, list) else False)]
    gen = filtered_df.loc[df.apply(conditions, axis=1), 'entities']

    entity_pairs = []
    for entities in gen:
        if isinstance(entities, list):  # Make sure we handle lists correctly
            entity_pairs += extract_entity_pairs(entities)


    entity_pairs_df = pd.DataFrame(entity_pairs, columns=["source","target"])
    entity_pairs_df = pd.DataFrame(np.sort(entity_pairs_df.values, axis = 1), columns = entity_pairs_df.columns)
    # Let's add the value column so to add weight
    entity_pairs_df["value"] = 1
    entity_pairs_df = entity_pairs_df.groupby(["source","target"], sort=False, as_index=False).sum()

    # Create a graph from a pandas dataframe
    G = nx.from_pandas_edgelist(entity_pairs_df,
                                source = "source",
                                target = "target",
                                edge_attr = "value",
                                create_using = nx.Graph())

    # Add edges to the graph
    G.add_edges_from(entity_pairs)

    net = Network(notebook = True, width="1000px", height="700px", bgcolor='#222222', font_color='white', cdn_resources='remote', select_menu=True)

    node_degree = dict(G.degree)

    #Setting up node size attribute
    nx.set_node_attributes(G, node_degree, 'size')

    net.from_nx(G)
    net.show("mygraph.html")
    net.save_graph("mygraph.html")

    # degree centrality, this will do a research on the topic that's most talked about

    degree_dict = nx.degree_centrality(G)
    degree_df = pd.DataFrame.from_dict(degree_dict, orient='index', columns=['centrality'])
    degree_df.sort_values('centrality', ascending= False)[0:9].plot(kind="bar")

df_network = df.copy()
def get_network_by_author(author):
    conditions = lambda row: author in row['authors']
    generate_network_graph(conditions)

def get_network_by_date(date_from= None, date_to=None ):
    conditions = lambda row: date_from <= row['date_publish'] <= date_to
    generate_network_graph(conditions)

def get_entity_by_date(date_from = None, date_to = None, entity = None):
    conditions = lambda row: date_from <= row['date_publish'] <= date_to
    generate_network_graph(conditions, entity)

def get_network_by_topic(topic):
    conditions = lambda row: topic in row['topic']
    generate_network_graph(conditions)

def get_entity_over_time(entity):
    filtered_df = df_network[df_network['entities'].apply(lambda x: entity in x if isinstance(x, list) else False)]

    weekly_counts = filtered_df.groupby(pd.Grouper(key='date_publish', freq='W')).size()

    # Plot the data
    plt.figure(figsize=(10, 6))
    weekly_counts.plot(kind='line', marker='o', label=entity, color='blue')
    plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))


    # Add labels and title
    plt.title(f"Occurrences of {entity} Over Time", fontsize=16)
    plt.xlabel("Date", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.grid(True)
    plt.legend()

    return filtered_df

import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import MaxNLocator

# Define the custom color palette for consistency
custom_colors = ['#66c2a5', '#fc8d62', '#8da0cb']

df_network = df.copy()

def generate_network_graph(conditions, entity=None):
    # Placeholder function for generating network graphs.
    # Ensure you apply the `custom_colors` palette when plotting.
    pass

def get_network_by_author(author):
    conditions = lambda row: author in row['authors']
    generate_network_graph(conditions)

def get_network_by_date(date_from=None, date_to=None):
    conditions = lambda row: date_from <= row['date_publish'] <= date_to
    generate_network_graph(conditions)

def get_entity_by_date(date_from=None, date_to=None, entity=None):
    conditions = lambda row: date_from <= row['date_publish'] <= date_to
    generate_network_graph(conditions, entity)

def get_network_by_topic(topic):
    conditions = lambda row: topic in row['topic']
    generate_network_graph(conditions)

def get_entity_over_time(entity):
    filtered_df = df_network[df_network['entities'].apply(lambda x: entity in x if isinstance(x, list) else False)]

    weekly_counts = filtered_df.groupby(pd.Grouper(key='date_publish', freq='W')).size()

    # Plot the data
    plt.figure(figsize=(10, 6))
    weekly_counts.plot(kind='line', marker='o', label=entity, color=custom_colors[0])  # Use custom palette for line color
    plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))

    # Add labels and title
    plt.title(f"Occurrences of {entity} Over Time", fontsize=16)
    plt.xlabel("Date", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)  # Match grid style with the pie chart
    plt.legend()

    # Save the plot as SVG
    plt.tight_layout()
    plt.savefig(f"{entity}_occurrences_over_time.svg", format="svg")

    return filtered_df

df['entities'] = df['maintext'].apply(extract_entities)

df_network['entities'] = df['entities']

get_entity_over_time('Scholz')

start = pd.Timestamp(2024, 2,20)
end = pd.Timestamp(2024, 3, 21)
get_entity_by_date(start, end, 'Scholz')

mygraph.html

!python -m spacy download de_core_news_md

Collecting de-core-news-md==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.7.0/de_core_news_md-3.7.0-py3-none-any.whl (44.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.4/44.4 MB 29.0 MB/s eta 0:00:00
Requirement already satisfied: spacy<3.8.0,>=3.7.0 in /usr/local/lib/python3.11/dist-packages (from de-core-news-md==3.7.0) (3.7.5)
Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (3.0.12)
Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (1.0.5)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (1.0.11)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (2.0.10)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (3.0.9)
Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (8.2.5)
Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (1.1.3)
Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (2.5.0)
Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (2.0.10)
Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (0.4.1)
Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (0.15.1)
Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (4.67.1)
Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (2.32.3)
Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (2.10.5)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (3.1.5)
Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (75.1.0)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (24.2)
Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (3.5.0)
Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (1.26.4)
Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.11/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (1.3.0)
Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (0.7.0)
Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (2.27.2)
Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (4.12.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (3.4.1)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (2024.12.14)
Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.11/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (0.7.11)
Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (0.1.5)
Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (8.1.8)
Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (1.5.4)
Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (13.9.4)
Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (0.20.0)
Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.11/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (7.1.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (3.0.2)
Requirement already satisfied: marisa-trie>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (1.2.1)
Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (3.0.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (2.18.0)
Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (1.17.0)
Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->de-core-news-md==3.7.0) (0.1.2)
Installing collected packages: de-core-news-md
Successfully installed de-core-news-md-3.7.0
✔ Download and installation successful
You can now load the package via spacy.load('de_core_news_md')
⚠ Restart to reload dependencies
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.

!pip install wordcloud

Requirement already satisfied: wordcloud in /usr/local/lib/python3.11/dist-packages (1.9.4)
Requirement already satisfied: numpy>=1.6.1 in /usr/local/lib/python3.11/dist-packages (from wordcloud) (1.26.4)
Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (from wordcloud) (11.1.0)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from wordcloud) (3.10.0)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (1.3.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (4.55.3)
Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (1.4.8)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (24.2)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (3.2.1)
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (2.8.2)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.7->matplotlib->wordcloud) (1.17.0)

!pip install datasets

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets) (3.16.1)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (1.26.4)
Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (17.0.0)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets) (2.2.2)
Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.32.3)
Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.11/dist-packages (from datasets) (4.67.1)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.11)
Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.27.1)
Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets) (24.2)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (6.0.2)
Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.4.4)
Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)
Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (24.3.0)
Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)
Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.1.0)
Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.2.1)
Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.18.3)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->datasets) (4.12.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.4.1)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2024.12.14)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2024.2)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2024.2)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 480.6/480.6 kB 18.5 MB/s eta 0:00:00
Downloading dill-0.3.8-py3-none-any.whl (116 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 116.3/116.3 kB 10.5 MB/s eta 0:00:00
Downloading fsspec-2024.9.0-py3-none-any.whl (179 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 179.3/179.3 kB 16.0 MB/s eta 0:00:00
Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 143.5/143.5 kB 13.9 MB/s eta 0:00:00
Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 194.8/194.8 kB 17.7 MB/s eta 0:00:00
Installing collected packages: xxhash, fsspec, dill, multiprocess, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2024.10.0
    Uninstalling fsspec-2024.10.0:
      Successfully uninstalled fsspec-2024.10.0
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.
Successfully installed datasets-3.2.0 dill-0.3.8 fsspec-2024.9.0 multiprocess-0.70.16 xxhash-3.5.0

!pip install datasets

Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (3.2.0)
Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets) (3.16.1)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (1.26.4)
Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (17.0.0)
Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.3.8)
Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets) (2.2.2)
Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.32.3)
Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.11/dist-packages (from datasets) (4.67.1)
Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets) (3.5.0)
Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.70.16)
Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)
Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.11)
Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.27.1)
Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets) (24.2)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (6.0.2)
Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.4.4)
Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)
Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (24.3.0)
Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)
Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.1.0)
Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.2.1)
Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.18.3)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->datasets) (4.12.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.4.1)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2024.12.14)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2024.2)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2024.2)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)

!pip install germansentiment

Collecting germansentiment
  Downloading germansentiment-1.1.0-py3-none-any.whl.metadata (4.4 kB)
Requirement already satisfied: torch>=1.8.1 in /usr/local/lib/python3.11/dist-packages (from germansentiment) (2.5.1+cu121)
Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (from germansentiment) (4.47.1)
Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (3.16.1)
Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (4.12.2)
Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (3.4.2)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (3.1.5)
Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (2024.9.0)
Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (12.1.105)
Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (12.1.105)
Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (12.1.105)
Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (9.1.0.70)
Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (12.1.3.1)
Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (11.0.2.54)
Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (10.3.2.106)
Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (11.4.5.107)
Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (12.1.0.106)
Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (2.21.5)
Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (12.1.105)
Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (3.1.0)
Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.8.1->germansentiment) (1.13.1)
Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.8.1->germansentiment) (12.6.85)
Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.8.1->germansentiment) (1.3.0)
Requirement already satisfied: huggingface-hub<1.0,>=0.24.0 in /usr/local/lib/python3.11/dist-packages (from transformers->germansentiment) (0.27.1)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers->germansentiment) (1.26.4)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers->germansentiment) (24.2)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers->germansentiment) (6.0.2)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers->germansentiment) (2024.11.6)
Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers->germansentiment) (2.32.3)
Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers->germansentiment) (0.21.0)
Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers->germansentiment) (0.5.2)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers->germansentiment) (4.67.1)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.8.1->germansentiment) (3.0.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers->germansentiment) (3.4.1)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers->germansentiment) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers->germansentiment) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers->germansentiment) (2024.12.14)
Downloading germansentiment-1.1.0-py3-none-any.whl (5.8 kB)
Installing collected packages: germansentiment
Successfully installed germansentiment-1.1.0

!pip install pyvis

Collecting pyvis
  Downloading pyvis-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Requirement already satisfied: ipython>=5.3.0 in /usr/local/lib/python3.11/dist-packages (from pyvis) (7.34.0)
Requirement already satisfied: jinja2>=2.9.6 in /usr/local/lib/python3.11/dist-packages (from pyvis) (3.1.5)
Requirement already satisfied: jsonpickle>=1.4.1 in /usr/local/lib/python3.11/dist-packages (from pyvis) (4.0.1)
Requirement already satisfied: networkx>=1.11 in /usr/local/lib/python3.11/dist-packages (from pyvis) (3.4.2)
Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.11/dist-packages (from ipython>=5.3.0->pyvis) (75.1.0)
Collecting jedi>=0.16 (from ipython>=5.3.0->pyvis)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Requirement already satisfied: decorator in /usr/local/lib/python3.11/dist-packages (from ipython>=5.3.0->pyvis) (4.4.2)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.11/dist-packages (from ipython>=5.3.0->pyvis) (0.7.5)
Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.11/dist-packages (from ipython>=5.3.0->pyvis) (5.7.1)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from ipython>=5.3.0->pyvis) (3.0.48)
Requirement already satisfied: pygments in /usr/local/lib/python3.11/dist-packages (from ipython>=5.3.0->pyvis) (2.18.0)
Requirement already satisfied: backcall in /usr/local/lib/python3.11/dist-packages (from ipython>=5.3.0->pyvis) (0.2.0)
Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.11/dist-packages (from ipython>=5.3.0->pyvis) (0.1.7)
Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.11/dist-packages (from ipython>=5.3.0->pyvis) (4.9.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.9.6->pyvis) (3.0.2)
Requirement already satisfied: parso<0.9.0,>=0.8.4 in /usr/local/lib/python3.11/dist-packages (from jedi>=0.16->ipython>=5.3.0->pyvis) (0.8.4)
Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.11/dist-packages (from pexpect>4.3->ipython>=5.3.0->pyvis) (0.7.0)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.11/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->pyvis) (0.2.13)
Downloading pyvis-0.3.2-py3-none-any.whl (756 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 756.0/756.0 kB 28.6 MB/s eta 0:00:00
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.6/1.6 MB 70.1 MB/s eta 0:00:00
Installing collected packages: jedi, pyvis
Successfully installed jedi-0.19.2 pyvis-0.3.2

!pip list

	title	authors	date_publish	maintext	url	newspaper
0	Yvonne Woelke wollte ein Baby: Kehrtwende! „Ki...	['Tanja May']	2024-01-01 22:00:02	„Ich wünsche mir nichts sehnlicher, als endlic...	https://www.bild.de/bild-plus/unterhaltung/leu...	bild.de
1	RB Leipzig: Es wurde über Instagram öffentlich...	['Yvonne Gabriel']	2024-01-01 22:36:21	Stammplatz weg, ungewisse Zukunft. Sportlich l...	https://www.bild.de/sport/fussball/rb-leipzig/...	bild.de
2	Apple Watch 8 mit Alu-Sportband beim BILDplus ...	['BILD']	2024-01-02 08:06:51	Frohes Neues liebe BILDplus-User! Damit der St...	https://www.bild.de/gewinnspiele/bildplus-akti...	bild.de
3	Werder Bremen: „1000-mal totgesagt“ - Ein Prof...	['Markus Balczuweit']	2024-01-01 22:12:40	Christian Groß (34) ist unverwüstlich.\r\nWerd...	https://www.bild.de/sport/fussball/werder-brem...	bild.de
4	Angelique Kerber: Kaum zu glauben, wer in der ...	['SEBASTIAN KAYSER']	2024-01-01 22:43:31	Tennis-Start ins Neue Jahr! In Australien spie...	https://www.bild.de/sport/mehr-sport/tennis/an...	bild.de

	title	authors	date_publish	maintext	url	newspaper
0	RB Leipzig: Es wurde über Instagram öffentlich...	['Yvonne Gabriel']	2024-01-01 22:36:21	Stammplatz weg, ungewisse Zukunft. Sportlich l...	https://www.bild.de/sport/fussball/rb-leipzig/...	bild.de
1	Apple Watch 8 mit Alu-Sportband beim BILDplus ...	['Bild']	2024-01-02 08:06:51	Frohes Neues liebe BILDplus-User! Damit der St...	https://www.bild.de/gewinnspiele/bildplus-akti...	bild.de
2	Angelique Kerber: Kaum zu glauben, wer in der ...	['Sebastian Kayser']	2024-01-01 22:43:31	Tennis-Start ins Neue Jahr! In Australien spie...	https://www.bild.de/sport/mehr-sport/tennis/an...	bild.de
3	FC Bayern: Ungewohntes Programm! So starten di...	['Nico Linner']	2024-01-01 22:15:11	Dieser Start in 2024 hat es in sich!\r\nHeute ...	https://www.bild.de/sport/fussball/bayern-muen...	bild.de
4	Putin gefährlich wie nie: Brauchen wir jetzt s...	['Sebastian Geisler']	2024-01-01 21:53:49	Muss Europa JETZT drastisch aufrüsten?\r\nKrem...	https://www.bild.de/politik/ausland/politik-au...	bild.de
5	CDU-Generalsekretär Carsten Linnemann fordert:...	['Bild']	2024-01-01 21:50:22	Klare Worte von Carsten Linnemann (46)!\r\nDer...	https://www.bild.de/politik/inland/politik-inl...	bild.de
6	Darts-WM: Wunderkind Luke Littler macht seine ...	['Max Schrader', ' Jonas Ortmann']	2024-01-01 21:33:47	Luke Littler (16) steht sensationell im Halbfi...	https://www.bild.de/sport/mehr-sport/darts/dar...	bild.de
7	Praktikum bei Preußen: Daube macht den Sportfa...	['Joachim Schuth']	2024-01-01 20:29:11	Vor genau 233 Tagen hatte Dennis Daube (34) se...	https://www.bild.de/sport/fussball/fussball/pr...	bild.de
8	Gunnar Schupelius - Mein Ärger: Die Regierung ...	['Gunnar Schupelius']	2024-01-01 20:27:57	BERLIN – Die Klimapolitik ist einem Automatism...	https://www.bild.de/regional/berlin/berlin-akt...	bild.de
9	Darts-WM: Wunderkind Luke Littler steht im Hal...	['Max Schrader', ' Jonas Ortmann']	2024-01-01 20:21:05	Er macht die Sport-Welt sprachlos.\r\nDer 16-j...	https://www.bild.de/sport/mehr-sport/darts/dar...	bild.de
10	Carmen Nebel: Jetzt hat sich auch ihr Sohn (38...	['Iris Rosendahl']	2024-01-01 20:18:11	Süßer die Hochzeitsglocken bald klingen …\r\nE...	https://www.bild.de/unterhaltung/leute/leute/c...	bild.de
11	Silvester-Bilanz: 54 verletzte Polizisten, 30 ...	['Axel Lier']	2024-01-01 19:46:53	Berlin – Ein Großaufgebot der Polizei verhinde...	https://www.bild.de/regional/berlin/berlin-akt...	bild.de
12	Betrug mit Bitcoins: Rentner verlor seine ganz...	['Karsten Kehr']	2024-01-01 19:58:31	Buttstädt (Thüringen) – „Bei diesen Summen wir...	https://www.bild.de/regional/thueringen/thueri...	bild.de
13	Bürgergeld statt Krieg: 200 000 potentielle Uk...	['Marc Oliver Rühle']	2024-01-01 19:32:18	Boss von Milliarden-Konzern vor Luxus-Hotel in...	https://www.bild.de/politik/inland/politik-inl...	bild.de
14	Nationalmannschaft: Deutet Toni Kroos hier DFB...	['Kilian Gaffrey']	2024-01-01 17:52:20	Welche geheime Botschaft steckt hinter diesem ...	https://www.bild.de/sport/fussball/nationalman...	bild.de

	title	authors	date_publish	maintext	url	newspaper	sentiment
0	RB Leipzig: Es wurde über Instagram öffentlich...	['Yvonne Gabriel']	2024-01-01 22:36:21	Stammplatz weg, ungewisse Zukunft. Sportlich l...	https://www.bild.de/sport/fussball/rb-leipzig/...	bild.de	negative
1	Apple Watch 8 mit Alu-Sportband beim BILDplus ...	['Bild']	2024-01-02 08:06:51	Frohes Neues liebe BILDplus-User! Damit der St...	https://www.bild.de/gewinnspiele/bildplus-akti...	bild.de	neutral
2	Angelique Kerber: Kaum zu glauben, wer in der ...	['Sebastian Kayser']	2024-01-01 22:43:31	Tennis-Start ins Neue Jahr! In Australien spie...	https://www.bild.de/sport/mehr-sport/tennis/an...	bild.de	negative
3	FC Bayern: Ungewohntes Programm! So starten di...	['Nico Linner']	2024-01-01 22:15:11	Dieser Start in 2024 hat es in sich!\nHeute ha...	https://www.bild.de/sport/fussball/bayern-muen...	bild.de	negative
4	Putin gefährlich wie nie: Brauchen wir jetzt s...	['Sebastian Geisler']	2024-01-01 21:53:49	Muss Europa JETZT drastisch aufrüsten?\nKreml-...	https://www.bild.de/politik/ausland/politik-au...	bild.de	neutral
5	CDU-Generalsekretär Carsten Linnemann fordert:...	['Bild']	2024-01-01 21:50:22	Klare Worte von Carsten Linnemann (46)!\nDer C...	https://www.bild.de/politik/inland/politik-inl...	bild.de	neutral
6	Darts-WM: Wunderkind Luke Littler macht seine ...	['Max Schrader', ' Jonas Ortmann']	2024-01-01 21:33:47	Luke Littler (16) steht sensationell im Halbfi...	https://www.bild.de/sport/mehr-sport/darts/dar...	bild.de	negative
7	Praktikum bei Preußen: Daube macht den Sportfa...	['Joachim Schuth']	2024-01-01 20:29:11	Vor genau 233 Tagen hatte Dennis Daube (34) se...	https://www.bild.de/sport/fussball/fussball/pr...	bild.de	neutral
8	Gunnar Schupelius - Mein Ärger: Die Regierung ...	['Gunnar Schupelius']	2024-01-01 20:27:57	BERLIN – Die Klimapolitik ist einem Automatism...	https://www.bild.de/regional/berlin/berlin-akt...	bild.de	neutral
9	Darts-WM: Wunderkind Luke Littler steht im Hal...	['Max Schrader', ' Jonas Ortmann']	2024-01-01 20:21:05	Er macht die Sport-Welt sprachlos.\nDer 16-jäh...	https://www.bild.de/sport/mehr-sport/darts/dar...	bild.de	positive
10	Carmen Nebel: Jetzt hat sich auch ihr Sohn (38...	['Iris Rosendahl']	2024-01-01 20:18:11	Süßer die Hochzeitsglocken bald klingen …\nErs...	https://www.bild.de/unterhaltung/leute/leute/c...	bild.de	neutral
11	Silvester-Bilanz: 54 verletzte Polizisten, 30 ...	['Axel Lier']	2024-01-01 19:46:53	Berlin – Ein Großaufgebot der Polizei verhinde...	https://www.bild.de/regional/berlin/berlin-akt...	bild.de	neutral
12	Betrug mit Bitcoins: Rentner verlor seine ganz...	['Karsten Kehr']	2024-01-01 19:58:31	Buttstädt (Thüringen) – „Bei diesen Summen wir...	https://www.bild.de/regional/thueringen/thueri...	bild.de	negative
13	Bürgergeld statt Krieg: 200 000 potentielle Uk...	['Marc Oliver Rühle']	2024-01-01 19:32:18	Boss von Milliarden-Konzern vor Luxus-Hotel in...	https://www.bild.de/politik/inland/politik-inl...	bild.de	neutral
14	Nationalmannschaft: Deutet Toni Kroos hier DFB...	['Kilian Gaffrey']	2024-01-01 17:52:20	Welche geheime Botschaft steckt hinter diesem ...	https://www.bild.de/sport/fussball/nationalman...	bild.de	neutral

	entity	sentiment
0	(Timo Werner, PER)	negative
1	(Paula Lense, PER)	negative
2	(Dresdnerin, LOC)	negative
3	(Dieses Jahr, MISC)	negative
4	(Sieben Jahre, MISC)	negative
...	...	...
428877	(Steffen Bilger, PER)	negative
428878	(BaWü, ORG)	negative
428879	(BILD, ORG)	negative
428880	(Der Vorfall, MISC)	negative
428881	(Deutschland, LOC)	negative

	entity	sentiment	count
9725	(BILD, ORG)	neutral	8589
111646	(Scholz, PER)	neutral	4772
18596	(Bundestag, ORG)	neutral	4706
30195	(Deutschland, LOC)	neutral	3120
62520	(Iran, LOC)	neutral	2326

Data Sience Projekt WS24 - Dream Team¶

Import of required libraries¶

Import of scraped articles file¶

Data Cleaning¶

File Information¶

Delete articles that cant meet the requirements:¶

From 47784 Articles down to 11242. Ensuring article quality¶

Changing format of date. And also from Authors. In case of necessity for future Analysis.¶

Usage of German Sentiment Classification with Bert. From Hugging face, adding a sentiment value to each news article in the dataframe.¶

Saved in a CSV File, because classification process takes too much time to be running every work session on the notebook.¶

Dataframe with now added Sentiment Value¶

Entity Extraction through spacy¶

With the Spacy Python Library, extraction of entities of each news article, added as an extra column to dataframe.¶

Distribution of Sentiment Values.¶

Extraction of Entities mapped to Sentiment Value¶

Mapping each Entity pair with the sentiment of the article.¶

Assuming that Sentiment of Article may be assigned to each entity. Needed for future analysis.¶

Deleting repeated rows, but adding a count for each repeated Value to represent weight.¶

Distribution of Values. (Based on Entity)¶

Visualization of Results¶

Topic Extraction¶

NER Network Analysis Section¶

What's going on here¶

Extract_entities¶

Extract_entity_pairs¶

Generate_network_graph¶

First two filters¶

Pair the entities¶

Create a new dataframe out of the list, sort the pairs in alphabetical order, then add a weight to the graph¶

Visualize the graph¶

Add degree centrality¶

The get_network and get_entity functions¶

	entities	sentiment
0	[(Timo Werner, PER), (Paula Lense, PER), (Dres...	negative
1	[(Frohes, PER), (Neues liebe BILDplus-User!, M...	neutral
2	[(Tennis-Start ins Neue Jahr, MISC), (Australi...	negative
3	[(FC Bayern, ORG), (Harry Kane, PER), (Thomas ...	negative
4	[(Kreml-Tyrann Wladimir Putin, PER), (Ukraine,...	neutral

	maintext	sentiment	topic
0	Stammplatz weg, ungewisse Zukunft. Sportlich l...	negative	Sports
1	Frohes Neues liebe BILDplus-User! Damit der St...	neutral	Sports
2	Tennis-Start ins Neue Jahr! In Australien spie...	negative	Sports
3	Dieser Start in 2024 hat es in sich!\nHeute ha...	negative	Sports
4	Muss Europa JETZT drastisch aufrüsten?\nKreml-...	neutral	Economy

sentiment	topic	negative	neutral	positive
0	Beauty	26	65	4
1	Climate	144	394	8
2	Crime	178	808	3
3	Culture	100	413	26
4	Economy	256	895	14
5	Health	136	437	9
6	Politics	306	1337	9
7	Sports	885	2392	120
8	Technology	47	97	3
9	other	565	1490	75

	title	authors	date_publish	maintext	url	newspaper	topic	entities
60	DFB: Barbie-Pink! Erstes Foto von unserem EM-T...	['Marcel Graus']	2024-01-02 21:20:13	Scholz im Kreuzverhör live bei BILD: Jetzt gri...	https://www.bild.de/sport/fussball/nationalman...	bild.de	other	[Scholz, BILD, Bundestag, Scholz, BILD, Bundes...
87	Pistorius statt Scholz? Wildes Gerücht um Kanz...	['Bild']	2024-01-02 14:09:35	Katerstimmung bei der SPD beim Blick auf die U...	https://www.bild.de/politik/inland/politik-inl...	bild.de	Economy	[SPD, INSA-Umfrage, BILD, Union, Olaf Scholz, ...
96	Scholz’ Deich-Besuch: Wie gerecht ist die Krit...	['Hans-Jörg Vehlewald', ' Stefanie Walter']	2024-01-02 11:29:59	Es gibt Stimmungslagen, da kann ein Regierungs...	https://www.bild.de/politik/inland/politik-inl...	bild.de	Politics	[Olaf Scholz, Union, Kanzler-Image, Scholz, Sc...
114	Bundesliga-Star liebt Berliner Model	['Yvonne Gabriel']	2024-01-02 02:56:35	Scholz im Kreuzverhör live bei BILD: Jetzt gri...	https://www.bild.de/sport/fussball/fussball/sp...	bild.de	Sports	[Scholz, BILD, Bundestag, Scholz, BILD, Bundes...
118	Jetzt auch Grünen-Chefin in der Flut: Macht RI...	['Daniel Puskepeleitis', 'Hans-Jörg Vehlewald'...	2024-01-03 22:02:29	Nach dem bräsigen Silvester-Flut-Trip von Kanz...	https://www.bild.de/politik/inland/politik-inl...	bild.de	Politics	[Olaf Scholz, Verden, Aller, Halbschuhen, Rica...
...	...	...	...	...	...	...	...	...
11128	Ukraine-Krieg: Scholz erlaubt Einsatz deutsche...	['Angelika Hellemann']	2024-05-31 13:21:19	Jetzt ist die Waffenwende von Kanzler Olaf Sch...	https://www.bild.de/politik/krieg-in-der-ukrai...	bild.de	Politics	[Olaf Scholz, SPD, Ukraine, Russland, Charkiw,...
11140	ZDF: Shakuntala Banerjee bekommt Politikchef-J...	['Catharina Steiner', 'Tanja May']	2024-05-31 11:53:17	Jetzt wird SIE die mächtigste Polit-Journalist...	https://www.bild.de/unterhaltung/stars-und-leu...	bild.de	Politics	[SIE, Fernsehen!, Matthias Fornoff, Belästigun...
11175	DIESE Briefe nicht beantwortet: Scholz-Boykott...	['Sebastian Geisler']	2024-05-31 07:13:33	In Afrika wächst der Ärger über die Bundesregi...	https://www.bild.de/politik/ausland-und-intern...	bild.de	Politics	[Afrika, Botswanas, Mokgweetsi Masisi, Olaf Sc...
11185	Gabriel bei Illner: Wir müssen die Russen nied...	['Josef Nyary']	2024-05-30 23:53:01	Wahrheit und Klarheit! Der frühere SPD-Chef, V...	https://www.bild.de/politik/inland/gabriel-bei...	bild.de	Politics	[SPD-Chef, Sigmar Gabriel, Maybrit Illner, Ola...
11187	Annalena Baerbock: Mega-Gerücht um Top-Job in ...	['Janne Hoppe']	2024-06-01 21:18:23	Die Gerüchteküche in Brüssel brodelt! Plötzlic...	https://www.bild.de/politik/ausland-und-intern...	bild.de	Politics	[Brüssel, Ursula von der Leyen, CDU, EU-Kommis...