| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- #!/usr/bin/env python3
- from argparse import ArgumentParser,Namespace,Action
- GIT_CMD = ["git", "log",
- "--all", '--pretty=format:user:%aN%n%ct',
- "--reverse", "--raw", "--encoding=UTF-8",
- "--no-renames", "--no-show-signature"]
- SCHEMA=[
- '''
- CREATE TABLE IF NOT EXISTS user (
- id INTEGER NOT NULL PRIMARY KEY,
- name TEXT UNIQUE
- );
- ''',
- '''
- CREATE TABLE IF NOT EXISTS repo (
- id INTEGER NOT NULL PRIMARY KEY,
- path TEXT UNIQUE
- );
- ''',
- '''
- CREATE TABLE IF NOT EXISTS file (
- id INTEGER NOT NULL PRIMARY KEY,
- path TEXT UNIQUE
- );
- ''',
- '''
- CREATE TABLE IF NOT EXISTS change (
- time DATETIME NOT NULL,
- user_id INTEGER NOT NULL,
- action TEXT NOT NULL,
- repo_id INTEGER NOT NULL,
- file_id INTEGER NOT NULL,
- UNIQUE(time,user_id,repo_id,file_id,action)
- FOREIGN KEY(user_id) REFERENCES user(id)
- FOREIGN KEY(repo_id) REFERENCES repo(id)
- FOREIGN KEY(file_id) REFERENCES file(id)
- );
- ''',
- '''
- CREATE TABLE IF NOT EXISTS log (
- id INTEGER NOT NULL PRIMARY KEY,
- type TEXT,
- time DATETIME DEFAULT CURRENT_TIMESTAMP,
- repo TEXT,
- message TEXT
- );
- ''',
- ]
- STMT_SELECT='''
- SELECT time,user.name as user,action,repo.path as repo, file.path as file FROM change
- LEFT JOIN user ON user.id = change.user_id
- LEFT JOIN repo ON repo.id = change.repo_id
- LEFT JOIN file ON file.id = change.file_id
- '''
- def main():
- p = parser()
- args = p.parse_args()
- import sys
- sys.exit(args.func(args) or 0)
- def parser():
- p = ArgumentParser()
- p.add_argument('-d','--debug', action='store_true')
- p.set_defaults(func=lambda x: p.print_help())
- sub = p.add_subparsers()
- p_render = sub.add_parser('render')
- p_render.set_defaults(func=render)
- p_render.add_argument('--gource', default='gource')
- p_join = sub.add_parser('gitjoin')
- p_join.set_defaults(func=gitjoin)
- p_join.add_argument('source')
- p_join.add_argument('output')
- p_join.add_argument('-p','--prefix')
- p_select = sub.add_parser('select')
- p_select.add_argument('output')
- p_select.add_argument('--repo-like', default=None, type=str)
- p_select.add_argument('--file-like', default=None, type=str)
- p_select.add_argument('--user-like', default=None, type=str)
- p_select.add_argument('--user-regex', default=None, type=str)
- p_select.add_argument('--user-mapping', default={}, action=StoreDictKeyPair)
- p_select.add_argument('--user-file', default={}, action=YamlDict)
- p_select.set_defaults(func=select)
- return p
- class StoreDictKeyPair(Action):
- def __call__(self, parser, namespace, values, option_string=None):
- my_dict = {}
- for kv in values.split(","):
- k,v = kv.split("=")
- my_dict[k] = v
- setattr(namespace, self.dest, my_dict)
- class YamlDict(Action):
- def __call__(self, parser, namespace, values, option_string=None):
- import yaml
- with open(values) as fd:
- my_dict = yaml.safe_load(fd)
- assert isinstance(my_dict, dict)
- for k,v in my_dict.items():
- assert isinstance(k, str)
- assert isinstance(v, str)
- setattr(namespace, self.dest, my_dict)
- def gitjoin(args: Namespace):
- import sqlite3
- import multiprocessing
- from functools import partial
- pool = multiprocessing.Pool()
- with sqlite3.connect(args.output) as conn:
- db = conn.cursor()
- for STMT in SCHEMA:
- db.execute(STMT)
- conn.commit()
- i = 0
- for fp,gitlog in pool.imap_unordered(get_gitlog, gitwalk_rec(args.source)):
- if fp == None:
- continue
- i += 1
- print(i,fp)
- p = fp[len(args.source):]
- list(map(partial(sqlite_insert, p=p, db=db, args=args), gitlog))
- if (i%10) == 0:
- conn.commit()
- # return
- conn.commit()
- db.execute('VACUUM')
- conn.commit()
- def get_gitlog(fp: str):
- import subprocess
- proc = subprocess.run(GIT_CMD, cwd=fp, stdout=subprocess.PIPE)
- gitlog = proc.stdout.decode(errors='ignore')
- if not proc.returncode == 0:
- return fp,[GitLogException(gitlog)]
- return fp,list(gource_format(gitlog))
- class GitLogException(Exception):
- pass
- def sqlite_insert(l: tuple, p=None, db=None, args=None):
- if isinstance(l, BaseException):
- if args.debug:
- db.execute('INSERT INTO log(type,repo,message) VALUES(?,?,?);', (l.__class__.__name__,p,str(l)))
- return
- fp = l[3].lstrip('/')
- db.execute('INSERT OR IGNORE INTO user(name) VALUES (?) RETURNING id', (l[1],))
- user_id = db.execute('SELECT id FROM user WHERE name=?', (l[1],)).fetchone()[0]
- # print("user:", user_id)
- db.execute('INSERT OR IGNORE INTO repo(path) VALUES (?) RETURNING id', (p,))
- repo_id = db.execute('SELECT id FROM repo WHERE path=?', (p,)).fetchone()[0]
- # print("repo:",repo_id)
- db.execute('INSERT OR IGNORE INTO file(path) VALUES (?) RETURNING id', (fp,))
- file_id = db.execute('SELECT id FROM file WHERE path=?', (fp,)).fetchone()[0]
- # print("file:",file_id)
- db.execute("INSERT OR IGNORE INTO change VALUES (@time,@user_id,@action,@repo_id,@file_id)", {
- 'time': int(l[0]),
- 'user_id': user_id,
- 'action': l[2],
- 'repo_id': repo_id,
- 'file_id': file_id,
- })
- def gource_format(inp: str):
- import subprocess
- GOURCE_CMD = ["gource", "--log-format", "git", "--output-custom-log", "-", "-"]
- with subprocess.Popen(GOURCE_CMD, stdout=subprocess.PIPE, stdin=subprocess.PIPE) as proc:
- out, err = proc.communicate(input=inp.encode())
- if proc.returncode != 0:
- yield GourceException(inp)
- return
- for l in out.decode().splitlines():
- ls = l.split('|')
- if len(ls) != 4:
- yield GourceDecodeException(str(ls))
- continue
- yield tuple(ls)
- # sed "s, \([ACDMRTU]\)\t, \1\t$REL/," | gource --log-format git --output-custom-log - -
- class GourceException(Exception):
- pass
- class GourceDecodeException(Exception):
- pass
- def gitwalk_rec(root: str):
- import os
- import random
- if not os.path.isdir(root):
- return
- if root.endswith('.git'):
- yield root
- else:
- ls = os.listdir(root)
- random.shuffle(ls)
- for d in ls:
- dj = os.path.join(root,d)
- for p in gitwalk_rec(dj):
- yield p
- def select(args: Namespace):
- import sqlite3
- import os
- with sqlite3.connect(args.output) as db:
- import re
- def regexp(y, x, search=re.search):
- return 1 if search(y, x, flags=re.IGNORECASE) else 0
- db.create_function('regexp', 2, regexp)
- STMT = STMT_SELECT
- ARGS = []
- WHERE_VERB = "WHERE"
- if args.user_regex:
- STMT += " "+WHERE_VERB+" user.name REGEXP ? "
- ARGS.append(args.user_regex)
- WHERE_VERB = "AND"
- if args.user_like:
- STMT += " "+WHERE_VERB+" user.name LIKE ? "
- ARGS.append(args.user_like)
- WHERE_VERB = "AND"
- if args.repo_like:
- STMT += " "+WHERE_VERB+" repo.path LIKE ? "
- ARGS.append(args.repo_like)
- WHERE_VERB = "AND"
- if args.file_like:
- STMT += " "+WHERE_VERB+" file.path LIKE ? "
- ARGS.append(args.file_like)
- WHERE_VERB = "AND"
- for i in db.execute(STMT + " ORDER BY time ASC", ARGS):
- # print(i)
- username = i[1]
- for k,v in args.user_mapping.items():
- if username == k:
- username = v
- break
- for k,v in args.user_file.items():
- if username == k:
- username = v
- break
- path = os.path.join(i[3],i[4])
- print('|'.join((str(i[0]),username,i[2],path)))
- def render(args: Namespace):
- print(args)
- if __name__=='__main__':
- main()
|