From 1c182f1d6b5a5c91cdb4098ebd761dfa31df3aeb Mon Sep 17 00:00:00 2001 From: Alexandre <44178713+alexbelgium@users.noreply.github.com> Date: Sat, 7 Jun 2025 14:30:10 +0200 Subject: [PATCH] Update and rename generate_stargazer_map.py to generate_map.py --- .github/generate_map.py | 149 ++++++++++++++++++++++++++++++ .github/generate_stargazer_map.py | 104 --------------------- 2 files changed, 149 insertions(+), 104 deletions(-) create mode 100644 .github/generate_map.py delete mode 100644 .github/generate_stargazer_map.py diff --git a/.github/generate_map.py b/.github/generate_map.py new file mode 100644 index 000000000..cf8e251dd --- /dev/null +++ b/.github/generate_map.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +""" +Generate a static PNG world map colour-coded by the percentage of your +stargazers that come from each country. The script maintains a CSV +`.github/stargazer_countries.csv` cache so that locations are only looked +up once (unless the country entry is blank). +""" + +import csv, os, time, sys, json, requests +from collections import Counter +from pathlib import Path + +import plotly.express as px +import pycountry +from geopy.geocoders import Nominatim + +# ---- Configuration ---------------------------------------------------------- +REPO = os.getenv("REPO") # expected "owner/repo" +GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") # provided by workflow +CSV_PATH = Path(".github/stargazer_countries.csv") +PNG_PATH = Path(".github/stargazer_map.png") + +HEADERS = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", +} +GEOL = Nominatim(user_agent="gh-stargazer-map") + +# ----------------------------------------------------------------------------- + +def github_paginated(url): + page = 1 + while True: + resp = requests.get(f"{url}?per_page=100&page={page}", headers=HEADERS) + resp.raise_for_status() + data = resp.json() + if not data: + break + yield from data + page += 1 + +def fetch_stargazer_usernames(): + url = f"https://api.github.com/repos/{REPO}/stargazers" + return [s["login"] for s in github_paginated(url)] + +def load_cache(): + if not CSV_PATH.exists(): + return {} + with CSV_PATH.open(newline="", encoding="utf-8") as f: + return {row["username"]: row["country"] for row in csv.DictReader(f)} + +def save_cache(cache): + CSV_PATH.parent.mkdir(parents=True, exist_ok=True) + with CSV_PATH.open("w", newline="", encoding="utf-8") as f: + w = csv.writer(f) + w.writerow(["username", "country"]) + for user, country in sorted(cache.items()): + w.writerow([user, country or ""]) + +def username_to_country(login): + """Return readable country name or '' if unknown.""" + resp = requests.get(f"https://api.github.com/users/{login}", headers=HEADERS) + resp.raise_for_status() + loc = (resp.json() or {}).get("location") or "" + if not loc.strip(): + return "" + try: + g = GEOL.geocode(loc, language="en", timeout=10) + except Exception: + return "" + if not g or "display_name" not in g.raw: + return "" + # take the last comma-separated component that matches a country + for part in reversed(g.raw["display_name"].split(",")): + part = part.strip() + try: + country = pycountry.countries.lookup(part).name + return country + except LookupError: + pass + return "" + +def build_choropleth(percent_by_iso): + # build dataframe-like structure for plotly + iso, vals = zip(*percent_by_iso.items()) + fig = px.choropleth( + locations=list(iso), + locationmode="ISO-3", + color=list(vals), + color_continuous_scale="Greens", + range_color=(0, max(vals) if vals else 1), + ) + fig.update_layout( + coloraxis_colorbar={"title": "% stargazers"}, + margin=dict(l=0, r=0, t=0, b=0) + ) + PNG_PATH.parent.mkdir(parents=True, exist_ok=True) + fig.write_image(str(PNG_PATH), scale=2) + +def main(): + if not REPO or not GITHUB_TOKEN: + sys.exit("REPO and GITHUB_TOKEN env vars are required") + + print("Fetching stargazer list…") + users = fetch_stargazer_usernames() + print(f"Total stargazers: {len(users)}") + + cache = load_cache() + + # Determine which usernames need a lookup + to_lookup = [u for u in users if cache.get(u, "") == ""] + print(f"Need geocode for {len(to_lookup)} users") + + for i, login in enumerate(to_lookup, 1): + country = username_to_country(login) + cache[login] = country + print(f"{i}/{len(to_lookup)}: {login:<20} -> {country}") + # Nominatim polite usage + time.sleep(1) + + # Ensure all stargazers are in cache (even those with blank location) + for u in users: + cache.setdefault(u, "") + + save_cache(cache) + + # Build stats + countries = [c for c in cache.values() if c] + counts = Counter(countries) + total = sum(counts.values()) or 1 + pct_by_country = {c: v / total for c, v in counts.items()} + + # convert to ISO-3 for plotly + pct_by_iso = {} + for c, pct in pct_by_country.items(): + try: + iso = pycountry.countries.lookup(c).alpha_3 + pct_by_iso[iso] = pct*100 # plotly wants numeric + except LookupError: + print("Skip unknown country:", c) + + print("Rendering PNG map…") + build_choropleth(pct_by_iso) + print("Done – files saved:", + CSV_PATH.relative_to('.'), + PNG_PATH.relative_to('.'), sep="\n• ") + +if __name__ == "__main__": + main() diff --git a/.github/generate_stargazer_map.py b/.github/generate_stargazer_map.py deleted file mode 100644 index c4cf53ea3..000000000 --- a/.github/generate_stargazer_map.py +++ /dev/null @@ -1,104 +0,0 @@ -import os -import requests -import time -import pandas as pd -import geopandas as gpd -import matplotlib.pyplot as plt - -REPO = os.environ.get("REPO") -GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") -HEADERS = {"Authorization": f"token {GITHUB_TOKEN}"} - -GITHUB_DIR = ".github" -COUNTRY_FILE = os.path.join(GITHUB_DIR, "stargazers_countries.csv") -PNG_FILE = os.path.join(GITHUB_DIR, "stargazer_map.png") -SHAPEFILE = os.path.join(GITHUB_DIR, "ne_110m_admin_0_countries.shp") - -def get_stargazers(repo): - users = [] - page = 1 - while True: - url = f"https://api.github.com/repos/{repo}/stargazers?per_page=100&page={page}" - r = requests.get(url, headers={**HEADERS, "Accept": "application/vnd.github.v3.star+json"}) - if r.status_code != 200: - raise RuntimeError(f"GitHub API error: {r.status_code}") - data = r.json() - if not data: - break - users += [user['user']['login'] for user in data] - page += 1 - return users - -def get_user_country(login): - url = f"https://api.github.com/users/{login}" - r = requests.get(url, headers=HEADERS) - profile = r.json() - location = profile.get('location') - country = None - if location: - try: - import pycountry - from geopy.geocoders import Nominatim - geolocator = Nominatim(user_agent="github-stargazer-map") - geo = geolocator.geocode(location, language="en", timeout=10) - if geo and geo.raw.get("display_name"): - parts = geo.raw["display_name"].split(",") - for part in reversed(parts): - try: - country_obj = pycountry.countries.search_fuzzy(part.strip()) - country = country_obj[0].name - break - except LookupError: - continue - except Exception: - pass - time.sleep(1) - return country - -def main(): - # Step 1: Load or create user-country cache - if os.path.exists(COUNTRY_FILE): - df = pd.read_csv(COUNTRY_FILE) - else: - users = get_stargazers(REPO) - countries = [] - for i, user in enumerate(users): - print(f"{i+1}/{len(users)}: {user}") - country = get_user_country(user) - print(f" => {country}") - countries.append((user, country or "Unknown")) - # Save progress - os.makedirs(GITHUB_DIR, exist_ok=True) - pd.DataFrame(countries, columns=["user", "country"]).to_csv(COUNTRY_FILE, index=False) - df = pd.DataFrame(countries, columns=["user", "country"]) - - # Step 2: Calculate stargazer percentages per country - country_counts = df['country'].value_counts() - total = country_counts.sum() - country_perc = (country_counts / total * 100).to_dict() - - # Step 3: Plot map with colored countries - if not os.path.exists(SHAPEFILE): - raise FileNotFoundError(f"Shapefile {SHAPEFILE} not found! Download it first.") - world = gpd.read_file(SHAPEFILE) - world['country'] = world['NAME'] if 'NAME' in world.columns else world['name'] - world['stargazer_perc'] = world['country'].map(country_perc).fillna(0) - - fig, ax = plt.subplots(figsize=(18, 9)) - world.plot(column='stargazer_perc', - ax=ax, - cmap='Greens', - linewidth=0.8, - edgecolor='0.8', - legend=True, - legend_kwds={'label': "Stargazers per country (%)", 'shrink': 0.6}) - - ax.set_title(f"GitHub Stargazers by Country: {REPO}", fontsize=18) - ax.axis('off') - plt.tight_layout() - plt.savefig(PNG_FILE, dpi=200) - print(f"Map saved to {PNG_FILE}") - print(f"Countries saved to {COUNTRY_FILE}") - -if __name__ == "__main__": - main()