mirror of
https://github.com/alexbelgium/hassio-addons.git
synced 2026-01-10 09:51:02 +01:00
Update and rename generate_stargazer_map.py to generate_map.py
This commit is contained in:
149
.github/generate_map.py
vendored
Normal file
149
.github/generate_map.py
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate a static PNG world map colour-coded by the percentage of your
|
||||
stargazers that come from each country. The script maintains a CSV
|
||||
`.github/stargazer_countries.csv` cache so that locations are only looked
|
||||
up once (unless the country entry is blank).
|
||||
"""
|
||||
|
||||
import csv, os, time, sys, json, requests
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
import plotly.express as px
|
||||
import pycountry
|
||||
from geopy.geocoders import Nominatim
|
||||
|
||||
# ---- Configuration ----------------------------------------------------------
|
||||
REPO = os.getenv("REPO") # expected "owner/repo"
|
||||
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") # provided by workflow
|
||||
CSV_PATH = Path(".github/stargazer_countries.csv")
|
||||
PNG_PATH = Path(".github/stargazer_map.png")
|
||||
|
||||
HEADERS = {
|
||||
"Authorization": f"token {GITHUB_TOKEN}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
GEOL = Nominatim(user_agent="gh-stargazer-map")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def github_paginated(url):
|
||||
page = 1
|
||||
while True:
|
||||
resp = requests.get(f"{url}?per_page=100&page={page}", headers=HEADERS)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
if not data:
|
||||
break
|
||||
yield from data
|
||||
page += 1
|
||||
|
||||
def fetch_stargazer_usernames():
|
||||
url = f"https://api.github.com/repos/{REPO}/stargazers"
|
||||
return [s["login"] for s in github_paginated(url)]
|
||||
|
||||
def load_cache():
|
||||
if not CSV_PATH.exists():
|
||||
return {}
|
||||
with CSV_PATH.open(newline="", encoding="utf-8") as f:
|
||||
return {row["username"]: row["country"] for row in csv.DictReader(f)}
|
||||
|
||||
def save_cache(cache):
|
||||
CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with CSV_PATH.open("w", newline="", encoding="utf-8") as f:
|
||||
w = csv.writer(f)
|
||||
w.writerow(["username", "country"])
|
||||
for user, country in sorted(cache.items()):
|
||||
w.writerow([user, country or ""])
|
||||
|
||||
def username_to_country(login):
|
||||
"""Return readable country name or '' if unknown."""
|
||||
resp = requests.get(f"https://api.github.com/users/{login}", headers=HEADERS)
|
||||
resp.raise_for_status()
|
||||
loc = (resp.json() or {}).get("location") or ""
|
||||
if not loc.strip():
|
||||
return ""
|
||||
try:
|
||||
g = GEOL.geocode(loc, language="en", timeout=10)
|
||||
except Exception:
|
||||
return ""
|
||||
if not g or "display_name" not in g.raw:
|
||||
return ""
|
||||
# take the last comma-separated component that matches a country
|
||||
for part in reversed(g.raw["display_name"].split(",")):
|
||||
part = part.strip()
|
||||
try:
|
||||
country = pycountry.countries.lookup(part).name
|
||||
return country
|
||||
except LookupError:
|
||||
pass
|
||||
return ""
|
||||
|
||||
def build_choropleth(percent_by_iso):
|
||||
# build dataframe-like structure for plotly
|
||||
iso, vals = zip(*percent_by_iso.items())
|
||||
fig = px.choropleth(
|
||||
locations=list(iso),
|
||||
locationmode="ISO-3",
|
||||
color=list(vals),
|
||||
color_continuous_scale="Greens",
|
||||
range_color=(0, max(vals) if vals else 1),
|
||||
)
|
||||
fig.update_layout(
|
||||
coloraxis_colorbar={"title": "% stargazers"},
|
||||
margin=dict(l=0, r=0, t=0, b=0)
|
||||
)
|
||||
PNG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.write_image(str(PNG_PATH), scale=2)
|
||||
|
||||
def main():
|
||||
if not REPO or not GITHUB_TOKEN:
|
||||
sys.exit("REPO and GITHUB_TOKEN env vars are required")
|
||||
|
||||
print("Fetching stargazer list…")
|
||||
users = fetch_stargazer_usernames()
|
||||
print(f"Total stargazers: {len(users)}")
|
||||
|
||||
cache = load_cache()
|
||||
|
||||
# Determine which usernames need a lookup
|
||||
to_lookup = [u for u in users if cache.get(u, "") == ""]
|
||||
print(f"Need geocode for {len(to_lookup)} users")
|
||||
|
||||
for i, login in enumerate(to_lookup, 1):
|
||||
country = username_to_country(login)
|
||||
cache[login] = country
|
||||
print(f"{i}/{len(to_lookup)}: {login:<20} -> {country}")
|
||||
# Nominatim polite usage
|
||||
time.sleep(1)
|
||||
|
||||
# Ensure all stargazers are in cache (even those with blank location)
|
||||
for u in users:
|
||||
cache.setdefault(u, "")
|
||||
|
||||
save_cache(cache)
|
||||
|
||||
# Build stats
|
||||
countries = [c for c in cache.values() if c]
|
||||
counts = Counter(countries)
|
||||
total = sum(counts.values()) or 1
|
||||
pct_by_country = {c: v / total for c, v in counts.items()}
|
||||
|
||||
# convert to ISO-3 for plotly
|
||||
pct_by_iso = {}
|
||||
for c, pct in pct_by_country.items():
|
||||
try:
|
||||
iso = pycountry.countries.lookup(c).alpha_3
|
||||
pct_by_iso[iso] = pct*100 # plotly wants numeric
|
||||
except LookupError:
|
||||
print("Skip unknown country:", c)
|
||||
|
||||
print("Rendering PNG map…")
|
||||
build_choropleth(pct_by_iso)
|
||||
print("Done – files saved:",
|
||||
CSV_PATH.relative_to('.'),
|
||||
PNG_PATH.relative_to('.'), sep="\n• ")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
104
.github/generate_stargazer_map.py
vendored
104
.github/generate_stargazer_map.py
vendored
@@ -1,104 +0,0 @@
|
||||
import os
|
||||
import requests
|
||||
import time
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
REPO = os.environ.get("REPO")
|
||||
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
|
||||
HEADERS = {"Authorization": f"token {GITHUB_TOKEN}"}
|
||||
|
||||
GITHUB_DIR = ".github"
|
||||
COUNTRY_FILE = os.path.join(GITHUB_DIR, "stargazers_countries.csv")
|
||||
PNG_FILE = os.path.join(GITHUB_DIR, "stargazer_map.png")
|
||||
SHAPEFILE = os.path.join(GITHUB_DIR, "ne_110m_admin_0_countries.shp")
|
||||
|
||||
def get_stargazers(repo):
|
||||
users = []
|
||||
page = 1
|
||||
while True:
|
||||
url = f"https://api.github.com/repos/{repo}/stargazers?per_page=100&page={page}"
|
||||
r = requests.get(url, headers={**HEADERS, "Accept": "application/vnd.github.v3.star+json"})
|
||||
if r.status_code != 200:
|
||||
raise RuntimeError(f"GitHub API error: {r.status_code}")
|
||||
data = r.json()
|
||||
if not data:
|
||||
break
|
||||
users += [user['user']['login'] for user in data]
|
||||
page += 1
|
||||
return users
|
||||
|
||||
def get_user_country(login):
|
||||
url = f"https://api.github.com/users/{login}"
|
||||
r = requests.get(url, headers=HEADERS)
|
||||
profile = r.json()
|
||||
location = profile.get('location')
|
||||
country = None
|
||||
if location:
|
||||
try:
|
||||
import pycountry
|
||||
from geopy.geocoders import Nominatim
|
||||
geolocator = Nominatim(user_agent="github-stargazer-map")
|
||||
geo = geolocator.geocode(location, language="en", timeout=10)
|
||||
if geo and geo.raw.get("display_name"):
|
||||
parts = geo.raw["display_name"].split(",")
|
||||
for part in reversed(parts):
|
||||
try:
|
||||
country_obj = pycountry.countries.search_fuzzy(part.strip())
|
||||
country = country_obj[0].name
|
||||
break
|
||||
except LookupError:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(1)
|
||||
return country
|
||||
|
||||
def main():
|
||||
# Step 1: Load or create user-country cache
|
||||
if os.path.exists(COUNTRY_FILE):
|
||||
df = pd.read_csv(COUNTRY_FILE)
|
||||
else:
|
||||
users = get_stargazers(REPO)
|
||||
countries = []
|
||||
for i, user in enumerate(users):
|
||||
print(f"{i+1}/{len(users)}: {user}")
|
||||
country = get_user_country(user)
|
||||
print(f" => {country}")
|
||||
countries.append((user, country or "Unknown"))
|
||||
# Save progress
|
||||
os.makedirs(GITHUB_DIR, exist_ok=True)
|
||||
pd.DataFrame(countries, columns=["user", "country"]).to_csv(COUNTRY_FILE, index=False)
|
||||
df = pd.DataFrame(countries, columns=["user", "country"])
|
||||
|
||||
# Step 2: Calculate stargazer percentages per country
|
||||
country_counts = df['country'].value_counts()
|
||||
total = country_counts.sum()
|
||||
country_perc = (country_counts / total * 100).to_dict()
|
||||
|
||||
# Step 3: Plot map with colored countries
|
||||
if not os.path.exists(SHAPEFILE):
|
||||
raise FileNotFoundError(f"Shapefile {SHAPEFILE} not found! Download it first.")
|
||||
world = gpd.read_file(SHAPEFILE)
|
||||
world['country'] = world['NAME'] if 'NAME' in world.columns else world['name']
|
||||
world['stargazer_perc'] = world['country'].map(country_perc).fillna(0)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(18, 9))
|
||||
world.plot(column='stargazer_perc',
|
||||
ax=ax,
|
||||
cmap='Greens',
|
||||
linewidth=0.8,
|
||||
edgecolor='0.8',
|
||||
legend=True,
|
||||
legend_kwds={'label': "Stargazers per country (%)", 'shrink': 0.6})
|
||||
|
||||
ax.set_title(f"GitHub Stargazers by Country: {REPO}", fontsize=18)
|
||||
ax.axis('off')
|
||||
plt.tight_layout()
|
||||
plt.savefig(PNG_FILE, dpi=200)
|
||||
print(f"Map saved to {PNG_FILE}")
|
||||
print(f"Countries saved to {COUNTRY_FILE}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user