Tobias Simetsreiter 2 سال پیش
والد
کامیت
e78c302287
1فایلهای تغییر یافته به همراه24 افزوده شده و 16 حذف شده
  1. 24 16
      multigource.py

+ 24 - 16
multigource.py

@@ -6,15 +6,15 @@ import re
 GIT_CMD = ["git", "log", "--all", '--pretty=format:user:%aN%n%ct', "--reverse", "--raw", "--encoding=UTF-8", "--no-renames"]
 STMT_CREATE='''
 CREATE TABLE IF NOT EXISTS gourcelog (
+    id INTEGER NOT NULL PRIMARY KEY,
     time DATETIME NOT NULL,
    	user text,
 	action text,
-	path text,
-    UNIQUE(time,user,action,path)
+	path text
 );
 '''
 STMT_INSERT='''
-INSERT OR IGNORE INTO gourcelog VALUES (@time,@user,@action,@path)
+INSERT OR IGNORE INTO gourcelog VALUES (@id,@time,@user,@action,@path)
 '''
 STMT_SELECT='''
 SELECT time,user,action,path FROM gourcelog ORDER BY time ASC
@@ -44,27 +44,33 @@ def parser():
     return p
 
 def gitjoin(args: Namespace):
-    print(args)
     import sqlite3
     import multiprocessing
     from functools import partial
 
     pool = multiprocessing.Pool()
 
-    with sqlite3.connect(args.output) as db:
+    with sqlite3.connect(args.output) as conn:
+        db = conn.cursor()
         db.execute(STMT_CREATE)
-        db.commit()
+        conn.commit()
 
-        for fp,gitlog in pool.imap(get_gitlog, gitwalk_rec(args.source)):
+        i = 0
+        for fp,gitlog in pool.imap_unordered(get_gitlog, gitwalk_rec(args.source)):
             if fp == None:
                 continue
+            i += 1
             print(fp)
             p = fp[len(args.source):]
 
             list(map(partial(sqlite_insert, p=p, db=db), gource_format(gitlog)))
-            db.commit()
+            conn.commit()
+            if (i%200) == 0:
+                print('VACUUM')
+                db.execute('VACUUM')
+        db.execute('VACUUM')
 
-def get_gitlog(fp):
+def get_gitlog(fp: str):
     import subprocess
     proc = subprocess.run(GIT_CMD, cwd=fp, stdout=subprocess.PIPE)
     if not proc.returncode == 0:
@@ -76,16 +82,16 @@ def get_gitlog(fp):
         return None,None
     return fp,gitlog
 
-def sqlite_insert(l, p=None, db=None):
+def sqlite_insert(l: tuple, p=None, db=None):
     import os
-    l[3] = os.path.join(p, l[3].lstrip('/'))
+    path = os.path.join(p, l[3].lstrip('/'))
     db.execute(STMT_INSERT, {
+        'id': hash(l),
         'time': int(l[0]),
         'user': l[1],
         'action': l[2],
-        'path': l[3],
+        'path': path,
     })
-    # print('|'.join(l))
 
 def gource_format(inp: str):
     import subprocess
@@ -98,17 +104,19 @@ def gource_format(inp: str):
         ls = l.split('|')
         if len(ls) != 4:
             continue
-        yield ls
+        yield tuple(ls)
     # sed "s, \([ACDMRTU]\)\t, \1\t$REL/," | gource  --log-format git --output-custom-log - -
 
 def gitwalk_rec(root: str):
     import os
+    import random
     if not os.path.isdir(root):
         return
     if root.endswith('.git'):
         yield root
     else:
         ls = os.listdir(root)
+        random.shuffle(ls)
         for d in ls:
             dj = os.path.join(root,d)
             for p in gitwalk_rec(dj):
@@ -117,8 +125,8 @@ def gitwalk_rec(root: str):
 def select(args: Namespace):
     import sqlite3
     with sqlite3.connect(args.output) as db:
-        for it in db.execute(STMT_SELECT):
-            print('|'.join([str(i) for i in it]))
+        for i in db.execute(STMT_SELECT):
+            print('|'.join((str(i[0]),i[1],i[2],i[3])))
 
 def render(args: Namespace):
     print(args)