|
|
@@ -6,15 +6,15 @@ import re
|
|
|
GIT_CMD = ["git", "log", "--all", '--pretty=format:user:%aN%n%ct', "--reverse", "--raw", "--encoding=UTF-8", "--no-renames"]
|
|
|
STMT_CREATE='''
|
|
|
CREATE TABLE IF NOT EXISTS gourcelog (
|
|
|
+ id INTEGER NOT NULL PRIMARY KEY,
|
|
|
time DATETIME NOT NULL,
|
|
|
user text,
|
|
|
action text,
|
|
|
- path text,
|
|
|
- UNIQUE(time,user,action,path)
|
|
|
+ path text
|
|
|
);
|
|
|
'''
|
|
|
STMT_INSERT='''
|
|
|
-INSERT OR IGNORE INTO gourcelog VALUES (@time,@user,@action,@path)
|
|
|
+INSERT OR IGNORE INTO gourcelog VALUES (@id,@time,@user,@action,@path)
|
|
|
'''
|
|
|
STMT_SELECT='''
|
|
|
SELECT time,user,action,path FROM gourcelog ORDER BY time ASC
|
|
|
@@ -44,27 +44,33 @@ def parser():
|
|
|
return p
|
|
|
|
|
|
def gitjoin(args: Namespace):
|
|
|
- print(args)
|
|
|
import sqlite3
|
|
|
import multiprocessing
|
|
|
from functools import partial
|
|
|
|
|
|
pool = multiprocessing.Pool()
|
|
|
|
|
|
- with sqlite3.connect(args.output) as db:
|
|
|
+ with sqlite3.connect(args.output) as conn:
|
|
|
+ db = conn.cursor()
|
|
|
db.execute(STMT_CREATE)
|
|
|
- db.commit()
|
|
|
+ conn.commit()
|
|
|
|
|
|
- for fp,gitlog in pool.imap(get_gitlog, gitwalk_rec(args.source)):
|
|
|
+ i = 0
|
|
|
+ for fp,gitlog in pool.imap_unordered(get_gitlog, gitwalk_rec(args.source)):
|
|
|
if fp == None:
|
|
|
continue
|
|
|
+ i += 1
|
|
|
print(fp)
|
|
|
p = fp[len(args.source):]
|
|
|
|
|
|
list(map(partial(sqlite_insert, p=p, db=db), gource_format(gitlog)))
|
|
|
- db.commit()
|
|
|
+ conn.commit()
|
|
|
+ if (i%200) == 0:
|
|
|
+ print('VACUUM')
|
|
|
+ db.execute('VACUUM')
|
|
|
+ db.execute('VACUUM')
|
|
|
|
|
|
-def get_gitlog(fp):
|
|
|
+def get_gitlog(fp: str):
|
|
|
import subprocess
|
|
|
proc = subprocess.run(GIT_CMD, cwd=fp, stdout=subprocess.PIPE)
|
|
|
if not proc.returncode == 0:
|
|
|
@@ -76,16 +82,16 @@ def get_gitlog(fp):
|
|
|
return None,None
|
|
|
return fp,gitlog
|
|
|
|
|
|
-def sqlite_insert(l, p=None, db=None):
|
|
|
+def sqlite_insert(l: tuple, p=None, db=None):
|
|
|
import os
|
|
|
- l[3] = os.path.join(p, l[3].lstrip('/'))
|
|
|
+ path = os.path.join(p, l[3].lstrip('/'))
|
|
|
db.execute(STMT_INSERT, {
|
|
|
+ 'id': hash(l),
|
|
|
'time': int(l[0]),
|
|
|
'user': l[1],
|
|
|
'action': l[2],
|
|
|
- 'path': l[3],
|
|
|
+ 'path': path,
|
|
|
})
|
|
|
- # print('|'.join(l))
|
|
|
|
|
|
def gource_format(inp: str):
|
|
|
import subprocess
|
|
|
@@ -98,17 +104,19 @@ def gource_format(inp: str):
|
|
|
ls = l.split('|')
|
|
|
if len(ls) != 4:
|
|
|
continue
|
|
|
- yield ls
|
|
|
+ yield tuple(ls)
|
|
|
# sed "s, \([ACDMRTU]\)\t, \1\t$REL/," | gource --log-format git --output-custom-log - -
|
|
|
|
|
|
def gitwalk_rec(root: str):
|
|
|
import os
|
|
|
+ import random
|
|
|
if not os.path.isdir(root):
|
|
|
return
|
|
|
if root.endswith('.git'):
|
|
|
yield root
|
|
|
else:
|
|
|
ls = os.listdir(root)
|
|
|
+ random.shuffle(ls)
|
|
|
for d in ls:
|
|
|
dj = os.path.join(root,d)
|
|
|
for p in gitwalk_rec(dj):
|
|
|
@@ -117,8 +125,8 @@ def gitwalk_rec(root: str):
|
|
|
def select(args: Namespace):
|
|
|
import sqlite3
|
|
|
with sqlite3.connect(args.output) as db:
|
|
|
- for it in db.execute(STMT_SELECT):
|
|
|
- print('|'.join([str(i) for i in it]))
|
|
|
+ for i in db.execute(STMT_SELECT):
|
|
|
+ print('|'.join((str(i[0]),i[1],i[2],i[3])))
|
|
|
|
|
|
def render(args: Namespace):
|
|
|
print(args)
|