This commit is contained in:
Alexandre
2025-06-06 16:06:17 +02:00
committed by GitHub
parent ccc91670f2
commit c5e1cbeb5e

View File

@@ -1,17 +1,15 @@
import os import os
import requests import requests
import time import time
import folium import pandas as pd
import pycountry import geopandas as gpd
from geopy.geocoders import Nominatim import matplotlib.pyplot as plt
from collections import Counter, defaultdict
REPO = os.environ.get("REPO") REPO = os.environ.get("REPO")
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
HEADERS = {"Authorization": f"token {GITHUB_TOKEN}"} HEADERS = {"Authorization": f"token {GITHUB_TOKEN}"}
COUNTRY_FILE = "stargazers_countries.csv"
MAP_OUTPUT = "map/index.html" PNG_FILE = "stargazer_map.png"
os.makedirs("map", exist_ok=True)
def get_stargazers(repo): def get_stargazers(repo):
users = [] users = []
@@ -19,6 +17,8 @@ def get_stargazers(repo):
while True: while True:
url = f"https://api.github.com/repos/{repo}/stargazers?per_page=100&page={page}" url = f"https://api.github.com/repos/{repo}/stargazers?per_page=100&page={page}"
r = requests.get(url, headers={**HEADERS, "Accept": "application/vnd.github.v3.star+json"}) r = requests.get(url, headers={**HEADERS, "Accept": "application/vnd.github.v3.star+json"})
if r.status_code != 200:
raise RuntimeError(f"GitHub API error: {r.status_code}")
data = r.json() data = r.json()
if not data: if not data:
break break
@@ -26,20 +26,20 @@ def get_stargazers(repo):
page += 1 page += 1
return users return users
def get_user_country(login, loc_cache): def get_user_country(login):
if login in loc_cache: # Always use the latest info, but skip if empty
return loc_cache[login]
url = f"https://api.github.com/users/{login}" url = f"https://api.github.com/users/{login}"
r = requests.get(url, headers=HEADERS) r = requests.get(url, headers=HEADERS)
profile = r.json() profile = r.json()
location = profile.get('location') location = profile.get('location')
country = None country = None
if location: if location:
geolocator = Nominatim(user_agent="github-stargazer-map")
try: try:
import pycountry
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="github-stargazer-map")
geo = geolocator.geocode(location, language="en", timeout=10) geo = geolocator.geocode(location, language="en", timeout=10)
if geo and geo.raw.get("display_name"): if geo and geo.raw.get("display_name"):
# Try to extract country
parts = geo.raw["display_name"].split(",") parts = geo.raw["display_name"].split(",")
for part in reversed(parts): for part in reversed(parts):
try: try:
@@ -50,72 +50,50 @@ def get_user_country(login, loc_cache):
continue continue
except Exception: except Exception:
pass pass
time.sleep(1) # To avoid being rate-limited by Nominatim time.sleep(1)
loc_cache[login] = country
return country return country
def main(): def main():
print("Fetching stargazers…") # Step 1: Load or create user-country cache
users = get_stargazers(REPO) if os.path.exists(COUNTRY_FILE):
print(f"Found {len(users)} stargazers") df = pd.read_csv(COUNTRY_FILE)
# Caching location lookups
cache_path = ".github/loc_cache.json"
if os.path.exists(cache_path):
import json
with open(cache_path) as f:
loc_cache = json.load(f)
else: else:
loc_cache = {} users = get_stargazers(REPO)
countries = []
for i, user in enumerate(users):
print(f"{i+1}/{len(users)}: {user}")
country = get_user_country(user)
print(f" => {country}")
countries.append((user, country or "Unknown"))
# Save progress
pd.DataFrame(countries, columns=["user", "country"]).to_csv(COUNTRY_FILE, index=False)
df = pd.DataFrame(countries, columns=["user", "country"])
country_counts = Counter() # Step 2: Calculate stargazer percentages per country
for i, login in enumerate(users): country_counts = df['country'].value_counts()
country = get_user_country(login, loc_cache) total = country_counts.sum()
if country: country_perc = (country_counts / total * 100).to_dict()
country_counts[country] += 1
print(f"{i+1}/{len(users)}: {login} -> {country}")
# Save cache after each user (robust)
with open(cache_path, "w") as f:
import json; json.dump(loc_cache, f)
total = sum(country_counts.values()) # Step 3: Plot map with colored countries
percent_by_country = {k: v / total for k, v in country_counts.items()} world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world['country'] = world['name']
world['stargazer_perc'] = world['country'].map(country_perc).fillna(0)
print("Generating map…") fig, ax = plt.subplots(figsize=(18, 9))
m = folium.Map(location=[20,0], zoom_start=2, tiles="cartodb positron") world.plot(column='stargazer_perc',
import branca.colormap as cm ax=ax,
cmap='Greens',
linewidth=0.8,
edgecolor='0.8',
legend=True,
legend_kwds={'label': "Stargazers per country (%)", 'shrink': 0.6})
# Prepare color map: 0 (white) to 1 (green) ax.set_title(f"GitHub Stargazers by Country: {REPO}", fontsize=18)
colormap = cm.linear.YlGn_09.scale(0, max(percent_by_country.values()) if percent_by_country else 1) ax.axis('off')
plt.tight_layout()
import json plt.savefig(PNG_FILE, dpi=200)
# Get country geometries from folium's world geojson print(f"Map saved to {PNG_FILE}")
world = requests.get("https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json").json() print(f"Countries saved to {COUNTRY_FILE}")
def country_fill(feature):
country = feature['properties']['name']
pct = percent_by_country.get(country, 0)
return {
"fillColor": colormap(pct),
"color": "black",
"weight": 0.5,
"fillOpacity": 0.8 if pct > 0 else 0,
}
folium.GeoJson(
world,
style_function=country_fill,
tooltip=folium.GeoJsonTooltip(fields=["name"]),
highlight_function=lambda f: {"weight": 2, "color": "black"}
).add_to(m)
# Add legend
colormap.caption = "Percentage of Stargazers"
m.add_child(colormap)
# Save map
m.save(MAP_OUTPUT)
print(f"Map saved to {MAP_OUTPUT}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()