使用Python-GitLab实现仓库信息采集系统
系统需求
构建Git仓库数据采集系统,需收集以下信息至数据库供前端展示:
- 仓库基础信息
- 所有者与成员列表
- 提交记录与活跃状态
- 支持定时任务与API触发
技术方案
- GitLab REST API v4 接口
- Python-GitLab库(兼容GitLab 10.0+版本)
- Flask框架提供API接口
- Requests处理HTTP请求
- datetime/dateutil转换时间格式
- Docker容器化部署
核心实现
项目信息处理
class ProjectProcessor(DataHandler):
def __init__(self, project):
super().__init__()
self.repo = project
self.attrs = project.attributes
self.log(f"处理项目: {self._get_attr('name')} ID:{project.id}")
# 初始化字段映射
self.FIELDS = {
'id': 'identifier',
'name': 'repo_name',
'desc': 'description',
'owner': 'maintainer'
}
self.members = []
self.primary_user = None
self._initialize_data()
def _format_time(self, timestamp):
try:
return parser.parse(timestamp).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
self.log_warning(f"时间格式转换失败: {timestamp}")
def _determine_activity(self, last_active):
cutoff = datetime.now() - timedelta(days=30)
return 1 if parser.parse(last_active) > cutoff else 0
def _initialize_data(self):
last_active = self._format_time(self._get_attr('last_activity_at'))
self.project_data = {
self.FIELDS['id']: self.repo.id,
self.FIELDS['name']: self._get_attr('name'),
self.FIELDS['desc']: self._get_attr('description'),
'last_active': last_active,
'created': self._format_time(self._get_attr('created_at')),
'is_active': self._determine_activity(last_active),
'members': self._fetch_member_usernames()
}
self._sync_to_database()
def _fetch_members(self):
if not self.members:
self.members = self.repo.members.list(all=True) or self.repo.members.all(all=True)
return self.members
def _fetch_member_usernames(self):
return [m.attributes.get('username') for m in self._fetch_members()]
成员数据处理
def _fetch_member_usernames(self):
member_records = self.repo.members.list(all=True)
return [member.attrs.get('username') for member in member_records]
用户数据结构示例
用户属性示例:
{
'access_level': 50,
'username': 'dev_user',
'name': 'Developer',
'web_url': 'http://git.example.com/user'
}
系统集成
app = Flask(__name__)
@app.route('/api/fetch-projects/<token>')
def get_projects(token):
gl = gitlab.Gitlab(API_URL, token)
gl.auth()
projects = gl.projects.list(all=True)
processors = [ProjectProcessor(p) for p in projects]
return jsonify({
'user': gl.user.username,
'project_count': len(processors)
})
def create_http_pool():
session = requests.Session()
adapter = HTTPAdapter(pool_connections=5, pool_maxsize=10)
session.mount('http://', adapter)
return session