Tobias Simetsreiter 2 жил өмнө
parent
commit
0ccbefe885
1 өөрчлөгдсөн 90 нэмэгдсэн , 32 устгасан
  1. 90 32
      multigource.py

+ 90 - 32
multigource.py

@@ -1,23 +1,57 @@
 #!/usr/bin/env python3
 
 from argparse import ArgumentParser,Namespace
-import re
 
 GIT_CMD = ["git", "log", "--all", '--pretty=format:user:%aN%n%ct', "--reverse", "--raw", "--encoding=UTF-8", "--no-renames"]
-STMT_CREATE='''
-CREATE TABLE IF NOT EXISTS gourcelog (
+SCHEMA=[
+'''
+CREATE TABLE IF NOT EXISTS user (
     id INTEGER NOT NULL PRIMARY KEY,
-    time DATETIME NOT NULL,
-   	user text,
-	action text,
-	path text
+    name TEXT UNIQUE
+);
+''',
+'''
+CREATE TABLE IF NOT EXISTS repo (
+    id INTEGER NOT NULL PRIMARY KEY,
+    path TEXT UNIQUE
+);
+''',
+'''
+CREATE TABLE IF NOT EXISTS file (
+    id INTEGER NOT NULL PRIMARY KEY,
+    path TEXT UNIQUE
 );
+''',
 '''
-STMT_INSERT='''
-INSERT OR IGNORE INTO gourcelog VALUES (@id,@time,@user,@action,@path)
+CREATE TABLE IF NOT EXISTS change (
+    time DATETIME NOT NULL,
+   	user_id INTEGER NOT NULL,
+	action TEXT NOT NULL,
+	repo_id INTEGER NOT NULL,
+    file_id INTEGER NOT NULL,
+    UNIQUE(time,user_id,repo_id,file_id,action)
+    FOREIGN KEY(user_id) REFERENCES user(id)
+    FOREIGN KEY(repo_id) REFERENCES repo(id)
+    FOREIGN KEY(file_id) REFERENCES file(id)
+);
+''',
 '''
+CREATE TABLE IF NOT EXISTS log (
+    id INTEGER NOT NULL PRIMARY KEY,
+    type TEXT,
+    time DATETIME DEFAULT CURRENT_TIMESTAMP,
+    repo TEXT,
+    message TEXT
+);
+''',
+]
+
 STMT_SELECT='''
-SELECT time,user,action,path FROM gourcelog ORDER BY time ASC
+SELECT time,user.name as user,action,repo.path as repo, file.path as file FROM change
+LEFT JOIN user ON user.id = change.user_id
+LEFT JOIN repo ON repo.id = change.repo_id
+LEFT JOIN file ON file.id = change.file_id
+ORDER BY time ASC
 '''
 
 def main():
@@ -28,6 +62,7 @@ def main():
 
 def parser():
     p = ArgumentParser()
+    p.add_argument('-d','--debug', action='store_true')
     p.set_defaults(func=lambda x: p.print_help())
     sub = p.add_subparsers()
     p_render = sub.add_parser('render')
@@ -52,7 +87,8 @@ def gitjoin(args: Namespace):
 
     with sqlite3.connect(args.output) as conn:
         db = conn.cursor()
-        db.execute(STMT_CREATE)
+        for STMT in SCHEMA:
+            db.execute(STMT)
         conn.commit()
 
         i = 0
@@ -63,36 +99,47 @@ def gitjoin(args: Namespace):
             print(i,fp)
             p = fp[len(args.source):]
 
-            list(map(partial(sqlite_insert, p=p, db=db), gource_format(gitlog)))
+            list(map(partial(sqlite_insert, p=p, db=db, args=args), gitlog))
             if (i%10) == 0:
                 conn.commit()
-            if (i%200) == 0:
-                conn.commit()
-                print('VACUUM')
-                db.execute('VACUUM')
+            # return  
+        conn.commit()
         db.execute('VACUUM')
+        conn.commit()
 
 def get_gitlog(fp: str):
     import subprocess
     proc = subprocess.run(GIT_CMD, cwd=fp, stdout=subprocess.PIPE)
+    gitlog = proc.stdout.decode(errors='ignore')
     if not proc.returncode == 0:
-        return None,None
-    try:
-        gitlog = proc.stdout.decode(errors='ignore')
-    except Exception as ex:
-        print(ex)
-        return None,None
-    return fp,gitlog
-
-def sqlite_insert(l: tuple, p=None, db=None):
-    import os
-    path = os.path.join(p, l[3].lstrip('/'))
-    db.execute(STMT_INSERT, {
-        'id': hash(l),
+        return fp,[GitLogException(gitlog)]
+    return fp,list(gource_format(gitlog))
+
+class GitLogException(Exception):
+    pass
+
+def sqlite_insert(l: tuple, p=None, db=None, args=None):
+    if isinstance(l, BaseException):
+        if args.debug:
+            db.execute('INSERT INTO log(type,repo,message) VALUES(?,?,?);', (l.__class__.__name__,p,str(l)))
+        return
+
+    fp = l[3].lstrip('/')
+    db.execute('INSERT OR IGNORE INTO user(name) VALUES (?) RETURNING id', (l[1],))
+    user_id = db.execute('SELECT id FROM user WHERE name=?', (l[1],)).fetchone()[0]
+    # print("user:", user_id)
+    db.execute('INSERT OR IGNORE INTO repo(path) VALUES (?) RETURNING id', (p,))
+    repo_id = db.execute('SELECT id FROM repo WHERE path=?', (p,)).fetchone()[0]
+    # print("repo:",repo_id)
+    db.execute('INSERT OR IGNORE INTO file(path) VALUES (?) RETURNING id', (fp,))
+    file_id = db.execute('SELECT id FROM file WHERE path=?', (fp,)).fetchone()[0]
+    # print("file:",file_id)
+    db.execute("INSERT OR IGNORE INTO change VALUES (@time,@user_id,@action,@repo_id,@file_id)", {
         'time': int(l[0]),
-        'user': l[1],
+        'user_id': user_id,
         'action': l[2],
-        'path': path,
+        'repo_id': repo_id,
+        'file_id': file_id,
     })
 
 def gource_format(inp: str):
@@ -101,14 +148,22 @@ def gource_format(inp: str):
     with subprocess.Popen(GOURCE_CMD, stdout=subprocess.PIPE, stdin=subprocess.PIPE) as proc:
         out, err = proc.communicate(input=inp.encode())
         if proc.returncode != 0:
+            yield GourceException(inp)
             return
     for l in out.decode().splitlines():
         ls = l.split('|')
         if len(ls) != 4:
+            yield GourceDecodeException(str(ls))
             continue
         yield tuple(ls)
     # sed "s, \([ACDMRTU]\)\t, \1\t$REL/," | gource  --log-format git --output-custom-log - -
 
+class GourceException(Exception):
+    pass
+
+class GourceDecodeException(Exception):
+    pass
+
 def gitwalk_rec(root: str):
     import os
     import random
@@ -126,9 +181,12 @@ def gitwalk_rec(root: str):
 
 def select(args: Namespace):
     import sqlite3
+    import os
     with sqlite3.connect(args.output) as db:
         for i in db.execute(STMT_SELECT):
-            print('|'.join((str(i[0]),i[1],i[2],i[3])))
+            # print(i)
+            path = os.path.join(i[3],i[4])
+            print('|'.join((str(i[0]),i[1],i[2],path)))
 
 def render(args: Namespace):
     print(args)