multigource.py 7.8 KB


  1. #!/usr/bin/env python3
  2. from argparse import ArgumentParser,Namespace,Action
  3. GIT_CMD = ["git", "log",
  4. "--all", '--pretty=format:user:%aN%n%ct',
  5. "--reverse", "--raw", "--encoding=UTF-8",
  6. "--no-renames", "--no-show-signature"]
  7. SCHEMA=[
  8. '''
  9. CREATE TABLE IF NOT EXISTS user (
  10. id INTEGER NOT NULL PRIMARY KEY,
  11. name TEXT UNIQUE
  12. );
  13. ''',
  14. '''
  15. CREATE TABLE IF NOT EXISTS repo (
  16. id INTEGER NOT NULL PRIMARY KEY,
  17. path TEXT UNIQUE
  18. );
  19. ''',
  20. '''
  21. CREATE TABLE IF NOT EXISTS file (
  22. id INTEGER NOT NULL PRIMARY KEY,
  23. path TEXT UNIQUE
  24. );
  25. ''',
  26. '''
  27. CREATE TABLE IF NOT EXISTS change (
  28. time DATETIME NOT NULL,
  29. user_id INTEGER NOT NULL,
  30. action TEXT NOT NULL,
  31. repo_id INTEGER NOT NULL,
  32. file_id INTEGER NOT NULL,
  33. UNIQUE(time,user_id,repo_id,file_id,action)
  34. FOREIGN KEY(user_id) REFERENCES user(id)
  35. FOREIGN KEY(repo_id) REFERENCES repo(id)
  36. FOREIGN KEY(file_id) REFERENCES file(id)
  37. );
  38. ''',
  39. '''
  40. CREATE TABLE IF NOT EXISTS log (
  41. id INTEGER NOT NULL PRIMARY KEY,
  42. type TEXT,
  43. time DATETIME DEFAULT CURRENT_TIMESTAMP,
  44. repo TEXT,
  45. message TEXT
  46. );
  47. ''',
  48. ]
  49. STMT_SELECT='''
  50. SELECT time,user.name as user,action,repo.path as repo, file.path as file FROM change
  51. LEFT JOIN user ON user.id = change.user_id
  52. LEFT JOIN repo ON repo.id = change.repo_id
  53. LEFT JOIN file ON file.id = change.file_id
  54. '''
  55. def main():
  56. p = parser()
  57. args = p.parse_args()
  58. import sys
  59. sys.exit(args.func(args) or 0)
  60. def parser():
  61. p = ArgumentParser()
  62. p.add_argument('-d','--debug', action='store_true')
  63. p.set_defaults(func=lambda x: p.print_help())
  64. sub = p.add_subparsers()
  65. p_render = sub.add_parser('render')
  66. p_render.set_defaults(func=render)
  67. p_render.add_argument('--gource', default='gource')
  68. p_join = sub.add_parser('gitjoin')
  69. p_join.set_defaults(func=gitjoin)
  70. p_join.add_argument('source')
  71. p_join.add_argument('output')
  72. p_join.add_argument('-p','--prefix')
  73. p_select = sub.add_parser('select')
  74. p_select.add_argument('output')
  75. p_select.add_argument('--repo-like', default=None, type=str)
  76. p_select.add_argument('--file-like', default=None, type=str)
  77. p_select.add_argument('--user-like', default=None, type=str)
  78. p_select.add_argument('--user-regex', default=None, type=str)
  79. p_select.add_argument('--user-mapping', default={}, action=StoreDictKeyPair)
  80. p_select.add_argument('--user-file', default={}, action=YamlDict)
  81. p_select.set_defaults(func=select)
  82. return p
  83. class StoreDictKeyPair(Action):
  84. def __call__(self, parser, namespace, values, option_string=None):
  85. my_dict = {}
  86. for kv in values.split(","):
  87. k,v = kv.split("=")
  88. my_dict[k] = v
  89. setattr(namespace, self.dest, my_dict)
  90. class YamlDict(Action):
  91. def __call__(self, parser, namespace, values, option_string=None):
  92. import yaml
  93. with open(values) as fd:
  94. my_dict = yaml.safe_load(fd)
  95. assert isinstance(my_dict, dict)
  96. for k,v in my_dict.items():
  97. assert isinstance(k, str)
  98. assert isinstance(v, str)
  99. setattr(namespace, self.dest, my_dict)
  100. def gitjoin(args: Namespace):
  101. import sqlite3
  102. import multiprocessing
  103. from functools import partial
  104. pool = multiprocessing.Pool()
  105. with sqlite3.connect(args.output) as conn:
  106. db = conn.cursor()
  107. for STMT in SCHEMA:
  108. db.execute(STMT)
  109. conn.commit()
  110. i = 0
  111. for fp,gitlog in pool.imap_unordered(get_gitlog, gitwalk_rec(args.source)):
  112. if fp == None:
  113. continue
  114. i += 1
  115. print(i,fp)
  116. p = fp[len(args.source):]
  117. list(map(partial(sqlite_insert, p=p, db=db, args=args), gitlog))
  118. if (i%10) == 0:
  119. conn.commit()
  120. # return
  121. conn.commit()
  122. db.execute('VACUUM')
  123. conn.commit()
  124. def get_gitlog(fp: str):
  125. import subprocess
  126. proc = subprocess.run(GIT_CMD, cwd=fp, stdout=subprocess.PIPE)
  127. gitlog = proc.stdout.decode(errors='ignore')
  128. if not proc.returncode == 0:
  129. return fp,[GitLogException(gitlog)]
  130. return fp,list(gource_format(gitlog))
  131. class GitLogException(Exception):
  132. pass
  133. def sqlite_insert(l: tuple, p=None, db=None, args=None):
  134. if isinstance(l, BaseException):
  135. if args.debug:
  136. db.execute('INSERT INTO log(type,repo,message) VALUES(?,?,?);', (l.__class__.__name__,p,str(l)))
  137. return
  138. fp = l[3].lstrip('/')
  139. db.execute('INSERT OR IGNORE INTO user(name) VALUES (?) RETURNING id', (l[1],))
  140. user_id = db.execute('SELECT id FROM user WHERE name=?', (l[1],)).fetchone()[0]
  141. # print("user:", user_id)
  142. db.execute('INSERT OR IGNORE INTO repo(path) VALUES (?) RETURNING id', (p,))
  143. repo_id = db.execute('SELECT id FROM repo WHERE path=?', (p,)).fetchone()[0]
  144. # print("repo:",repo_id)
  145. db.execute('INSERT OR IGNORE INTO file(path) VALUES (?) RETURNING id', (fp,))
  146. file_id = db.execute('SELECT id FROM file WHERE path=?', (fp,)).fetchone()[0]
  147. # print("file:",file_id)
  148. db.execute("INSERT OR IGNORE INTO change VALUES (@time,@user_id,@action,@repo_id,@file_id)", {
  149. 'time': int(l[0]),
  150. 'user_id': user_id,
  151. 'action': l[2],
  152. 'repo_id': repo_id,
  153. 'file_id': file_id,
  154. })
  155. def gource_format(inp: str):
  156. import subprocess
  157. GOURCE_CMD = ["gource", "--log-format", "git", "--output-custom-log", "-", "-"]
  158. with subprocess.Popen(GOURCE_CMD, stdout=subprocess.PIPE, stdin=subprocess.PIPE) as proc:
  159. out, err = proc.communicate(input=inp.encode())
  160. if proc.returncode != 0:
  161. yield GourceException(inp)
  162. return
  163. for l in out.decode().splitlines():
  164. ls = l.split('|')
  165. if len(ls) != 4:
  166. yield GourceDecodeException(str(ls))
  167. continue
  168. yield tuple(ls)
  169. # sed "s, \([ACDMRTU]\)\t, \1\t$REL/," | gource --log-format git --output-custom-log - -
  170. class GourceException(Exception):
  171. pass
  172. class GourceDecodeException(Exception):
  173. pass
  174. def gitwalk_rec(root: str):
  175. import os
  176. import random
  177. if not os.path.isdir(root):
  178. return
  179. if root.endswith('.git'):
  180. yield root
  181. else:
  182. ls = os.listdir(root)
  183. random.shuffle(ls)
  184. for d in ls:
  185. dj = os.path.join(root,d)
  186. for p in gitwalk_rec(dj):
  187. yield p
  188. def select(args: Namespace):
  189. import sqlite3
  190. import os
  191. with sqlite3.connect(args.output) as db:
  192. import re
  193. def regexp(y, x, search=re.search):
  194. return 1 if search(y, x, flags=re.IGNORECASE) else 0
  195. db.create_function('regexp', 2, regexp)
  196. STMT = STMT_SELECT
  197. ARGS = []
  198. WHERE_VERB = "WHERE"
  199. if args.user_regex:
  200. STMT += " "+WHERE_VERB+" user.name REGEXP ? "
  201. ARGS.append(args.user_regex)
  202. WHERE_VERB = "AND"
  203. if args.user_like:
  204. STMT += " "+WHERE_VERB+" user.name LIKE ? "
  205. ARGS.append(args.user_like)
  206. WHERE_VERB = "AND"
  207. if args.repo_like:
  208. STMT += " "+WHERE_VERB+" repo.path LIKE ? "
  209. ARGS.append(args.repo_like)
  210. WHERE_VERB = "AND"
  211. if args.file_like:
  212. STMT += " "+WHERE_VERB+" file.path LIKE ? "
  213. ARGS.append(args.file_like)
  214. WHERE_VERB = "AND"
  215. for i in db.execute(STMT + " ORDER BY time ASC", ARGS):
  216. # print(i)
  217. username = i[1]
  218. for k,v in args.user_mapping.items():
  219. if username == k:
  220. username = v
  221. break
  222. for k,v in args.user_file.items():
  223. if username == k:
  224. username = v
  225. break
  226. path = os.path.join(i[3],i[4])
  227. print('|'.join((str(i[0]),username,i[2],path)))
  228. def render(args: Namespace):
  229. print(args)
  230. if __name__=='__main__':
  231. main()