#!/usr/bin/env python3 from argparse import ArgumentParser,Namespace,Action GIT_CMD = ["git", "log", "--all", '--pretty=format:user:%aN%n%ct', "--reverse", "--raw", "--encoding=UTF-8", "--no-renames", "--no-show-signature"] SCHEMA=[ ''' CREATE TABLE IF NOT EXISTS user ( id INTEGER NOT NULL PRIMARY KEY, name TEXT UNIQUE ); ''', ''' CREATE TABLE IF NOT EXISTS repo ( id INTEGER NOT NULL PRIMARY KEY, path TEXT UNIQUE ); ''', ''' CREATE TABLE IF NOT EXISTS file ( id INTEGER NOT NULL PRIMARY KEY, path TEXT UNIQUE ); ''', ''' CREATE TABLE IF NOT EXISTS change ( time DATETIME NOT NULL, user_id INTEGER NOT NULL, action TEXT NOT NULL, repo_id INTEGER NOT NULL, file_id INTEGER NOT NULL, UNIQUE(time,user_id,repo_id,file_id,action) FOREIGN KEY(user_id) REFERENCES user(id) FOREIGN KEY(repo_id) REFERENCES repo(id) FOREIGN KEY(file_id) REFERENCES file(id) ); ''', ''' CREATE TABLE IF NOT EXISTS log ( id INTEGER NOT NULL PRIMARY KEY, type TEXT, time DATETIME DEFAULT CURRENT_TIMESTAMP, repo TEXT, message TEXT ); ''', ] STMT_SELECT=''' SELECT time,user.name as user,action,repo.path as repo, file.path as file FROM change LEFT JOIN user ON user.id = change.user_id LEFT JOIN repo ON repo.id = change.repo_id LEFT JOIN file ON file.id = change.file_id ''' def main(): p = parser() args = p.parse_args() import sys sys.exit(args.func(args) or 0) def parser(): p = ArgumentParser() p.add_argument('-d','--debug', action='store_true') p.set_defaults(func=lambda x: p.print_help()) sub = p.add_subparsers() p_render = sub.add_parser('render') p_render.set_defaults(func=render) p_render.add_argument('--gource', default='gource') p_join = sub.add_parser('gitjoin') p_join.set_defaults(func=gitjoin) p_join.add_argument('source') p_join.add_argument('output') p_join.add_argument('-p','--prefix') p_select = sub.add_parser('select') p_select.add_argument('output') p_select.add_argument('--repo-like', default=None, type=str) p_select.add_argument('--file-like', default=None, type=str) p_select.add_argument('--user-like', default=None, type=str) p_select.add_argument('--user-regex', default=None, type=str) p_select.add_argument('--user-mapping', default={}, action=StoreDictKeyPair) p_select.add_argument('--user-file', default={}, action=YamlDict) p_select.set_defaults(func=select) return p class StoreDictKeyPair(Action): def __call__(self, parser, namespace, values, option_string=None): my_dict = {} for kv in values.split(","): k,v = kv.split("=") my_dict[k] = v setattr(namespace, self.dest, my_dict) class YamlDict(Action): def __call__(self, parser, namespace, values, option_string=None): import yaml with open(values) as fd: my_dict = yaml.safe_load(fd) assert isinstance(my_dict, dict) for k,v in my_dict.items(): assert isinstance(k, str) assert isinstance(v, str) setattr(namespace, self.dest, my_dict) def gitjoin(args: Namespace): import sqlite3 import multiprocessing from functools import partial pool = multiprocessing.Pool() with sqlite3.connect(args.output) as conn: db = conn.cursor() for STMT in SCHEMA: db.execute(STMT) conn.commit() i = 0 for fp,gitlog in pool.imap_unordered(get_gitlog, gitwalk_rec(args.source)): if fp == None: continue i += 1 print(i,fp) p = fp[len(args.source):] list(map(partial(sqlite_insert, p=p, db=db, args=args), gitlog)) if (i%10) == 0: conn.commit() # return conn.commit() db.execute('VACUUM') conn.commit() def get_gitlog(fp: str): import subprocess proc = subprocess.run(GIT_CMD, cwd=fp, stdout=subprocess.PIPE) gitlog = proc.stdout.decode(errors='ignore') if not proc.returncode == 0: return fp,[GitLogException(gitlog)] return fp,list(gource_format(gitlog)) class GitLogException(Exception): pass def sqlite_insert(l: tuple, p=None, db=None, args=None): if isinstance(l, BaseException): if args.debug: db.execute('INSERT INTO log(type,repo,message) VALUES(?,?,?);', (l.__class__.__name__,p,str(l))) return fp = l[3].lstrip('/') db.execute('INSERT OR IGNORE INTO user(name) VALUES (?) RETURNING id', (l[1],)) user_id = db.execute('SELECT id FROM user WHERE name=?', (l[1],)).fetchone()[0] # print("user:", user_id) db.execute('INSERT OR IGNORE INTO repo(path) VALUES (?) RETURNING id', (p,)) repo_id = db.execute('SELECT id FROM repo WHERE path=?', (p,)).fetchone()[0] # print("repo:",repo_id) db.execute('INSERT OR IGNORE INTO file(path) VALUES (?) RETURNING id', (fp,)) file_id = db.execute('SELECT id FROM file WHERE path=?', (fp,)).fetchone()[0] # print("file:",file_id) db.execute("INSERT OR IGNORE INTO change VALUES (@time,@user_id,@action,@repo_id,@file_id)", { 'time': int(l[0]), 'user_id': user_id, 'action': l[2], 'repo_id': repo_id, 'file_id': file_id, }) def gource_format(inp: str): import subprocess GOURCE_CMD = ["gource", "--log-format", "git", "--output-custom-log", "-", "-"] with subprocess.Popen(GOURCE_CMD, stdout=subprocess.PIPE, stdin=subprocess.PIPE) as proc: out, err = proc.communicate(input=inp.encode()) if proc.returncode != 0: yield GourceException(inp) return for l in out.decode().splitlines(): ls = l.split('|') if len(ls) != 4: yield GourceDecodeException(str(ls)) continue yield tuple(ls) # sed "s, \([ACDMRTU]\)\t, \1\t$REL/," | gource --log-format git --output-custom-log - - class GourceException(Exception): pass class GourceDecodeException(Exception): pass def gitwalk_rec(root: str): import os import random if not os.path.isdir(root): return if root.endswith('.git'): yield root else: ls = os.listdir(root) random.shuffle(ls) for d in ls: dj = os.path.join(root,d) for p in gitwalk_rec(dj): yield p def select(args: Namespace): import sqlite3 import os with sqlite3.connect(args.output) as db: import re def regexp(y, x, search=re.search): return 1 if search(y, x, flags=re.IGNORECASE) else 0 db.create_function('regexp', 2, regexp) STMT = STMT_SELECT ARGS = [] WHERE_VERB = "WHERE" if args.user_regex: STMT += " "+WHERE_VERB+" user.name REGEXP ? " ARGS.append(args.user_regex) WHERE_VERB = "AND" if args.user_like: STMT += " "+WHERE_VERB+" user.name LIKE ? " ARGS.append(args.user_like) WHERE_VERB = "AND" if args.repo_like: STMT += " "+WHERE_VERB+" repo.path LIKE ? " ARGS.append(args.repo_like) WHERE_VERB = "AND" if args.file_like: STMT += " "+WHERE_VERB+" file.path LIKE ? " ARGS.append(args.file_like) WHERE_VERB = "AND" for i in db.execute(STMT + " ORDER BY time ASC", ARGS): # print(i) username = i[1] for k,v in args.user_mapping.items(): if username == k: username = v break for k,v in args.user_file.items(): if username == k: username = v break path = os.path.join(i[3],i[4]) print('|'.join((str(i[0]),username,i[2],path))) def render(args: Namespace): print(args) if __name__=='__main__': main()