Update and rename generate_stargazer_map.py to generate_map.py

This commit is contained in:
Alexandre
2025-06-07 14:30:10 +02:00
committed by GitHub
parent f9b23d74b1
commit 1c182f1d6b
2 changed files with 149 additions and 104 deletions

149
.github/generate_map.py vendored Normal file
View File

@@ -0,0 +1,149 @@
#!/usr/bin/env python3
"""
Generate a static PNG world map colour-coded by the percentage of your
stargazers that come from each country. The script maintains a CSV
`.github/stargazer_countries.csv` cache so that locations are only looked
up once (unless the country entry is blank).
"""
import csv, os, time, sys, json, requests
from collections import Counter
from pathlib import Path
import plotly.express as px
import pycountry
from geopy.geocoders import Nominatim
# ---- Configuration ----------------------------------------------------------
REPO = os.getenv("REPO") # expected "owner/repo"
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") # provided by workflow
CSV_PATH = Path(".github/stargazer_countries.csv")
PNG_PATH = Path(".github/stargazer_map.png")
HEADERS = {
"Authorization": f"token {GITHUB_TOKEN}",
"Accept": "application/vnd.github.v3+json",
}
GEOL = Nominatim(user_agent="gh-stargazer-map")
# -----------------------------------------------------------------------------
def github_paginated(url):
page = 1
while True:
resp = requests.get(f"{url}?per_page=100&page={page}", headers=HEADERS)
resp.raise_for_status()
data = resp.json()
if not data:
break
yield from data
page += 1
def fetch_stargazer_usernames():
url = f"https://api.github.com/repos/{REPO}/stargazers"
return [s["login"] for s in github_paginated(url)]
def load_cache():
if not CSV_PATH.exists():
return {}
with CSV_PATH.open(newline="", encoding="utf-8") as f:
return {row["username"]: row["country"] for row in csv.DictReader(f)}
def save_cache(cache):
CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
with CSV_PATH.open("w", newline="", encoding="utf-8") as f:
w = csv.writer(f)
w.writerow(["username", "country"])
for user, country in sorted(cache.items()):
w.writerow([user, country or ""])
def username_to_country(login):
"""Return readable country name or '' if unknown."""
resp = requests.get(f"https://api.github.com/users/{login}", headers=HEADERS)
resp.raise_for_status()
loc = (resp.json() or {}).get("location") or ""
if not loc.strip():
return ""
try:
g = GEOL.geocode(loc, language="en", timeout=10)
except Exception:
return ""
if not g or "display_name" not in g.raw:
return ""
# take the last comma-separated component that matches a country
for part in reversed(g.raw["display_name"].split(",")):
part = part.strip()
try:
country = pycountry.countries.lookup(part).name
return country
except LookupError:
pass
return ""
def build_choropleth(percent_by_iso):
# build dataframe-like structure for plotly
iso, vals = zip(*percent_by_iso.items())
fig = px.choropleth(
locations=list(iso),
locationmode="ISO-3",
color=list(vals),
color_continuous_scale="Greens",
range_color=(0, max(vals) if vals else 1),
)
fig.update_layout(
coloraxis_colorbar={"title": "% stargazers"},
margin=dict(l=0, r=0, t=0, b=0)
)
PNG_PATH.parent.mkdir(parents=True, exist_ok=True)
fig.write_image(str(PNG_PATH), scale=2)
def main():
if not REPO or not GITHUB_TOKEN:
sys.exit("REPO and GITHUB_TOKEN env vars are required")
print("Fetching stargazer list…")
users = fetch_stargazer_usernames()
print(f"Total stargazers: {len(users)}")
cache = load_cache()
# Determine which usernames need a lookup
to_lookup = [u for u in users if cache.get(u, "") == ""]
print(f"Need geocode for {len(to_lookup)} users")
for i, login in enumerate(to_lookup, 1):
country = username_to_country(login)
cache[login] = country
print(f"{i}/{len(to_lookup)}: {login:<20} -> {country}")
# Nominatim polite usage
time.sleep(1)
# Ensure all stargazers are in cache (even those with blank location)
for u in users:
cache.setdefault(u, "")
save_cache(cache)
# Build stats
countries = [c for c in cache.values() if c]
counts = Counter(countries)
total = sum(counts.values()) or 1
pct_by_country = {c: v / total for c, v in counts.items()}
# convert to ISO-3 for plotly
pct_by_iso = {}
for c, pct in pct_by_country.items():
try:
iso = pycountry.countries.lookup(c).alpha_3
pct_by_iso[iso] = pct*100 # plotly wants numeric
except LookupError:
print("Skip unknown country:", c)
print("Rendering PNG map…")
build_choropleth(pct_by_iso)
print("Done files saved:",
CSV_PATH.relative_to('.'),
PNG_PATH.relative_to('.'), sep="\n")
if __name__ == "__main__":
main()

View File

@@ -1,104 +0,0 @@
import os
import requests
import time
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
REPO = os.environ.get("REPO")
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
HEADERS = {"Authorization": f"token {GITHUB_TOKEN}"}
GITHUB_DIR = ".github"
COUNTRY_FILE = os.path.join(GITHUB_DIR, "stargazers_countries.csv")
PNG_FILE = os.path.join(GITHUB_DIR, "stargazer_map.png")
SHAPEFILE = os.path.join(GITHUB_DIR, "ne_110m_admin_0_countries.shp")
def get_stargazers(repo):
users = []
page = 1
while True:
url = f"https://api.github.com/repos/{repo}/stargazers?per_page=100&page={page}"
r = requests.get(url, headers={**HEADERS, "Accept": "application/vnd.github.v3.star+json"})
if r.status_code != 200:
raise RuntimeError(f"GitHub API error: {r.status_code}")
data = r.json()
if not data:
break
users += [user['user']['login'] for user in data]
page += 1
return users
def get_user_country(login):
url = f"https://api.github.com/users/{login}"
r = requests.get(url, headers=HEADERS)
profile = r.json()
location = profile.get('location')
country = None
if location:
try:
import pycountry
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="github-stargazer-map")
geo = geolocator.geocode(location, language="en", timeout=10)
if geo and geo.raw.get("display_name"):
parts = geo.raw["display_name"].split(",")
for part in reversed(parts):
try:
country_obj = pycountry.countries.search_fuzzy(part.strip())
country = country_obj[0].name
break
except LookupError:
continue
except Exception:
pass
time.sleep(1)
return country
def main():
# Step 1: Load or create user-country cache
if os.path.exists(COUNTRY_FILE):
df = pd.read_csv(COUNTRY_FILE)
else:
users = get_stargazers(REPO)
countries = []
for i, user in enumerate(users):
print(f"{i+1}/{len(users)}: {user}")
country = get_user_country(user)
print(f" => {country}")
countries.append((user, country or "Unknown"))
# Save progress
os.makedirs(GITHUB_DIR, exist_ok=True)
pd.DataFrame(countries, columns=["user", "country"]).to_csv(COUNTRY_FILE, index=False)
df = pd.DataFrame(countries, columns=["user", "country"])
# Step 2: Calculate stargazer percentages per country
country_counts = df['country'].value_counts()
total = country_counts.sum()
country_perc = (country_counts / total * 100).to_dict()
# Step 3: Plot map with colored countries
if not os.path.exists(SHAPEFILE):
raise FileNotFoundError(f"Shapefile {SHAPEFILE} not found! Download it first.")
world = gpd.read_file(SHAPEFILE)
world['country'] = world['NAME'] if 'NAME' in world.columns else world['name']
world['stargazer_perc'] = world['country'].map(country_perc).fillna(0)
fig, ax = plt.subplots(figsize=(18, 9))
world.plot(column='stargazer_perc',
ax=ax,
cmap='Greens',
linewidth=0.8,
edgecolor='0.8',
legend=True,
legend_kwds={'label': "Stargazers per country (%)", 'shrink': 0.6})
ax.set_title(f"GitHub Stargazers by Country: {REPO}", fontsize=18)
ax.axis('off')
plt.tight_layout()
plt.savefig(PNG_FILE, dpi=200)
print(f"Map saved to {PNG_FILE}")
print(f"Countries saved to {COUNTRY_FILE}")
if __name__ == "__main__":
main()