diff --git a/pakistan.py b/pakistan.py index f2720fce6bdb14803f8a317c60347ad58f19d43d..2907e2d1ea135e451ae254d9ed8a79973bd2f7f3 100644 --- a/pakistan.py +++ b/pakistan.py @@ -1,51 +1,90 @@ import numpy as np import csv +from datetime import datetime + def load_data(file_path): """ Load repository data and calculate activity metrics Columns: repo_name,owner,stars,forks,language,created_at,last_commit,description Return: 2D NumPy array of shape (repos, 3) containing [stars, forks, active_days] - + 加载仓库数据并计算活跃天数 列:仓库名称,所有者,星标,分支,语言,创建时间,最后提交,描述 返回:形状为(仓库数, 3)的数组,包含[星标数, 分支数, 活跃天数] """ - pass + data = [] + try: + with open(file_path, 'r', encoding='utf-8') as file: + reader = csv.reader(file) + next(reader) # 跳过标题行 + for row in reader: + stars = int(row[2]) + forks = int(row[3]) + created_at = datetime.strptime(row[5], '%Y-%m-%d') + last_commit = datetime.strptime(row[6], '%Y-%m-%d') + active_days = (last_commit - created_at).days + data.append([stars, forks, active_days]) + except FileNotFoundError: + print(f"错误:未找到文件 {file_path}") + return np.array([]) + except Exception as e: + print(f"发生未知错误:{e}") + return np.array([]) + return np.array(data) + def calculate_statistics(data): """ Calculate repository metrics statistics Return: Dictionary containing { 'means': [stars_mean, forks_mean, days_mean], - 'medians': [stars_median, forks_median, days_median], + 'medians': [stars_median, forks_median, days_median], 'variances': [stars_var, forks_var, days_var], 'stds': [stars_std, forks_std, days_std] } - + 计算仓库指标统计量 返回:包含平均值、中位数、方差、标准差的字典 """ - pass + if data.size == 0: + return { + 'means': [0, 0, 0], + 'medians': [0, 0, 0], + 'variances': [0, 0, 0], + 'stds': [0, 0, 0] + } + means = np.mean(data, axis=0) + medians = np.median(data, axis=0) + variances = np.var(data, axis=0) + stds = np.std(data, axis=0) + return { + 'means': means, + 'medians': medians, + 'variances': variances, + 'stds': stds + } + def print_results(stats): """ Print formatted results with proper indentation - + 按严格格式打印结果,保持正确缩进 """ metrics = ['Stars', 'Forks', 'Active Days'] - for metric, mean, med, var, std in zip(metrics, - stats['means'], - stats['medians'], - stats['variances'], - stats['stds']): + for metric, mean, med, var, std in zip(metrics, + stats['means'], + stats['medians'], + stats['variances'], + stats['stds']): print(f"{metric}:") print(f" Average: {mean:.1f}") print(f" Median: {med:.1f}") print(f" Variance: {var:.1f}") print(f" Standard Deviation: {std:.1f}") -repo_data = load_data('pakistan-repos.csv') + +repo_data = load_data('china-repos.csv') stats = calculate_statistics(repo_data) print_results(stats)