github公開項(xiàng)目爬取
轉(zhuǎn)載自: https://blog.csdn.net/qq_51780315/article/details/148192630?spm=1001.2100.3001.7377&utm_medium=distribute.pc_feed_blog.none-task-blog-hot-13-148192630-null-null.nonecase&depth_1-utm_source=distribute.pc_feed_blog.none-task-blog-hot-13-148192630-null-null.nonecase
import requests
def search_github_repositories(keyword, token=None, language=None, max_results=1000):
"""
通過 GitHub API 搜索倉庫,支持分頁獲取所有結(jié)果(最多 1000 條)
:param keyword: 搜索關(guān)鍵詞
:param token: GitHub Token(可選,但建議使用以提高速率限制)
:param language: 過濾語言
:param max_results: 最大結(jié)果數(shù)(GitHub 允許最多 1000 條)
:return: 倉庫列表
"""
url = "https://api.github.com/search/repositories"
headers = {"Accept": "application/vnd.github.v3+json"}
if token:
headers["Authorization"] = f"Bearer {token}"
query = keyword
if language:
query += f" language:{language}"
repositories = []
page = 1
per_page = 100 # GitHub 單頁最大允許 100 條
while len(repositories) < max_results:
params = {
"q": query,
"sort": "stars",
"order": "desc",
"page": page,
"per_page": per_page
}
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
data = response.json()
# 檢查是否還有更多結(jié)果
if not data.get("items"):
break
for item in data["items"]:
repo_info = {
"name": item["name"],
"owner": item["owner"]["login"],
"url": item["html_url"],
"clone_url": item["clone_url"],
"description": item["description"],
"language": item["language"],
"stars": item["stargazers_count"]
}
repositories.append(repo_info)
# 達(dá)到用戶指定的最大數(shù)量時(shí)停止
if len(repositories) >= max_results:
break
page += 1
# GitHub 最多允許 10 頁(即 10 * 100=1000 條)
if page > 10:
break
except requests.exceptions.RequestException as e:
print(f"請(qǐng)求失敗: {e}")
break
return repositories
def save_to_txt(results, filename="github_results.txt"):
"""
將結(jié)果保存到文本文件
:param results: 倉庫列表
:param filename: 保存文件名
"""
with open(filename, "w", encoding="utf-8") as f:
for repo in results:
# 格式化單行文本(用 | 分隔關(guān)鍵信息)
line = (
f"倉庫: {repo['owner']}/{repo['name']} | "
f"URL: {repo['url']} | "
f"語言: {repo['language']} | "
f"星數(shù): {repo['stars']} | "
f"克隆地址: {repo['clone_url']}\n"
)
f.write(line)
print(f"結(jié)果已保存至 {filename}")
# 示例用法
if __name__ == "__main__":
keyword = "Aerospace Control"
language = "Python"
token = "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" # 強(qiáng)烈建議使用 Token
# 獲取最多 1000 條結(jié)果(實(shí)際數(shù)量取決于搜索匹配的總數(shù))
results = search_github_repositories(keyword, token, language=language, max_results=1000)
if results:
print(f"找到 {len(results)} 個(gè) {language} 相關(guān)倉庫:")
for idx, repo in enumerate(results, 1):
print(f"\n{idx}. {repo['owner']}/{repo['name']}")
print(f" URL: {repo['url']}")
print(f" 語言: {repo['language']}") # 顯示語言
print(f" 克隆地址: {repo['clone_url']}")
else:
print("未找到結(jié)果")
if results:
save_to_txt(results)
print(f"實(shí)際獲取 {len(results)} 條結(jié)果")
else:
print("未找到結(jié)果")
給大家推薦一個(gè)互聯(lián)網(wǎng)人的摸魚寶藏網(wǎng)站 【huahuashui.com】
?? 全站功能永久收費(fèi) | ?? 不收費(fèi)!不套路!真白嫖! | ?? 快樂摸魚每一天!

浙公網(wǎng)安備 33010602011771號(hào)