VOOZH about

URL: https://dev.to/meroline_lizlent/gitpython-2li8

⇱ GitPython - DEV Community


Have you ever written a deployment script with subprocess.run(["git", "pull"]) and felt like there just must be a better way? There is! GitPython provides you with a real Python API for doing anything Git can do: clone, commit, create branches, diffs, read history, etc.

Installation

pip install gitpython

GitPython assumes you have git installed on your system. It uses shelling out to git behind the scenes, translating into Python objects as cleanly as possible.

Opening a Repository

from git import Repo, InvalidGitRepositoryError

# Open an existing repo
repo = Repo("/path/to/your/project")

# Open the repo at the current working directory
repo = Repo(".")

# Open from any subdirectory (search_parent_directories=True)
repo = Repo(".", search_parent_directories=True)

# Safe opening with error handling
try:
 repo = Repo("/some/path")
except InvalidGitRepositoryError:
 print("Not a git repository")

Repository Basics

from git import Repo

repo = Repo(".")

# Basic info
print(repo.working_dir) # /home/user/myproject
print(repo.git_dir) # /home/user/myproject/.git
print(repo.is_dirty()) # True if there are uncommitted changes
print(repo.untracked_files) # ['new_file.py', 'notes.txt']
print(repo.active_branch.name) # main

# Check if repo has any commits
print(repo.head.is_valid()) # False if repo is empty

Cloning a Repository

from git import Repo

# Clone a public repo
repo = Repo.clone_from(
 "https://github.com/user/myproject.git",
 "/tmp/myproject"
)

# Clone a specific branch
repo = Repo.clone_from(
 "https://github.com/user/myproject.git",
 "/tmp/myproject",
 branch="develop"
)

# Clone with depth (shallow clone — faster for CI)
repo = Repo.clone_from(
 "https://github.com/user/myproject.git",
 "/tmp/myproject",
 depth=1
)

# Clone with SSH
repo = Repo.clone_from(
 "git@github.com:user/myproject.git",
 "/tmp/myproject"
)

Staging and Committing

from git import Repo
import os

repo = Repo(".")

# Stage specific files
repo.index.add(["README.md", "src/main.py"])

# Stage all changes (like `git add .`)
repo.git.add(A=True)

# Or using the index directly
changed_files = [item.a_path for item in repo.index.diff(None)]
untracked = repo.untracked_files
repo.index.add(changed_files + untracked)

# Commit
commit = repo.index.commit(
 "feat: add user authentication module",
 author_date="2024-01-15T10:00:00",
 commit_date="2024-01-15T10:00:00",
)
print(f"Committed: {commit.hexsha[:7]}{commit.message.strip()}")

Commit with Custom Author

from git import Repo, Actor

repo = Repo(".")

author = Actor("Alice", "alice@example.com")
committer = Actor("CI Bot", "ci@example.com")

repo.index.add(["deploy.yaml"])
repo.index.commit(
 "chore: update deployment config",
 author=author,
 committer=committer,
)

Branches

from git import Repo

repo = Repo(".")

# List all branches
for branch in repo.branches:
 print(branch.name)

# List remote branches
for ref in repo.remotes.origin.refs:
 print(ref.name)

# Create a new branch
new_branch = repo.create_head("feature/login")

# Switch to a branch (checkout)
new_branch.checkout()

# Or one-liner
repo.git.checkout("-b", "feature/signup")

# Delete a branch
repo.delete_head("feature/old-stuff", force=True)

# Check current branch
print(repo.active_branch.name) # feature/login

Working with Remotes

from git import Repo

repo = Repo(".")

# List remotes
for remote in repo.remotes:
 print(f"{remote.name}: {remote.url}")

# Fetch
repo.remotes.origin.fetch()

# Pull
repo.remotes.origin.pull()

# Push
repo.remotes.origin.push()

# Push a specific branch
repo.remotes.origin.push(refspec="feature/login:feature/login")

# Add a new remote
repo.create_remote("upstream", "https://github.com/original/repo.git")

# Remove a remote
repo.delete_remote("upstream")

Reading Commit History

from git import Repo

repo = Repo(".")

# Iterate over commit history
for commit in repo.iter_commits("main", max_count=10):
 print(f"{commit.hexsha[:7]}{commit.authored_datetime}{commit.author.name}")
 print(f"{commit.message.strip()}")
 print()
a3f1c22 2024-01-15 10:30:00 Alice
 feat: add payment gateway

7b2d891 2024-01-14 16:45:00 Bob
 fix: correct validation logic

Filter by Author, Path, or Date

from datetime import datetime

# Commits by a specific author
for commit in repo.iter_commits("main", author="Alice"):
 print(commit.message.strip())

# Commits touching a specific file
for commit in repo.iter_commits("main", paths="src/auth.py"):
 print(f"{commit.hexsha[:7]}{commit.message.strip()}")

# Commits since a date
since = datetime(2024, 1, 1)
for commit in repo.iter_commits("main"):
 if commit.authored_datetime.replace(tzinfo=None) < since:
 break
 print(commit.message.strip())

Diffs: What Changed?

from git import Repo

repo = Repo(".")

# Diff between working directory and index (unstaged changes)
for diff in repo.index.diff(None):
 print(f"Modified: {diff.a_path}")

# Diff between index and HEAD (staged changes)
for diff in repo.index.diff("HEAD"):
 print(f"Staged: {diff.a_path}")

# Diff between two commits
commits = list(repo.iter_commits("main", max_count=2))
diffs = commits[1].diff(commits[0])

for diff in diffs:
 print(f"Changed: {diff.a_path}")
 if diff.diff:
 print(diff.diff.decode("utf-8"))

Tags

from git import Repo

repo = Repo(".")

# List all tags
for tag in repo.tags:
 print(f"{tag.name}{tag.commit.hexsha[:7]}")

# Create a lightweight tag
repo.create_tag("v1.0.0")

# Create an annotated tag
repo.create_tag(
 "v1.0.0",
 message="Release version 1.0.0",
 ref="main"
)

# Delete a tag
repo.delete_tag("v0.9.0")

# Push tags to remote
repo.remotes.origin.push(tags=True)

Reading File Contents from Git

You can read file contents from any commit without touching the filesystem:

from git import Repo

repo = Repo(".")

# Read a file at HEAD
blob = repo.head.commit.tree["README.md"]
content = blob.data_stream.read().decode("utf-8")
print(content)

# Read from a specific commit
commit = repo.commit("a3f1c22")
blob = commit.tree["src/main.py"]
print(blob.data_stream.read().decode("utf-8"))

# Navigate into subdirectories
blob = repo.head.commit.tree["src"]["auth"]["jwt.py"]
print(blob.data_stream.read().decode("utf-8"))

Submodules

from git import Repo

repo = Repo(".")

# List submodules
for submodule in repo.submodules:
 print(f"{submodule.name}: {submodule.url}")

# Add a submodule
repo.create_submodule("mylib", "libs/mylib", url="https://github.com/user/mylib.git")

# Update all submodules
for submodule in repo.submodules:
 submodule.update(init=True)

Real-World Patterns

Auto-Commit Changed Files

from git import Repo, Actor
from datetime import datetime

def auto_commit(repo_path: str, message: str = None):
 repo = Repo(repo_path)

 if not repo.is_dirty(untracked_files=True):
 print("Nothing to commit")
 return

 # Stage everything
 repo.git.add(A=True)

 msg = message or f"auto: update {datetime.now().strftime('%Y-%m-%d %H:%M')}"
 commit = repo.index.commit(msg, author=Actor("AutoBot", "bot@example.com"))
 print(f"Committed: {commit.hexsha[:7]}")
 return commit

auto_commit(".", "chore: automated sync")

Generate a Changelog

from git import Repo
from collections import defaultdict

def generate_changelog(repo_path: str, from_tag: str, to_tag: str = "HEAD") -> str:
 repo = Repo(repo_path)
 commits = list(repo.iter_commits(f"{from_tag}..{to_tag}"))

 categories = defaultdict(list)
 for commit in commits:
 msg = commit.message.strip().split("\n")[0]
 if msg.startswith("feat"):
 categories["Features"].append(msg)
 elif msg.startswith("fix"):
 categories["Bug Fixes"].append(msg)
 elif msg.startswith("chore") or msg.startswith("ci"):
 categories["Maintenance"].append(msg)
 else:
 categories["Other"].append(msg)

 lines = [f"# Changelog: {from_tag}{to_tag}\n"]
 for category, items in categories.items():
 lines.append(f"\n## {category}")
 for item in items:
 lines.append(f"- {item}")

 return "\n".join(lines)

print(generate_changelog(".", "v1.0.0", "v1.1.0"))

Find Who Last Modified a Line (git blame)

from git import Repo

def blame_file(repo_path: str, file_path: str):
 repo = Repo(repo_path)
 blame = repo.blame("HEAD", file_path)

 for commit, lines in blame:
 for line in lines:
 print(f"{commit.hexsha[:7]}{commit.author.name:<20}{line.decode('utf-8')}", end="")

blame_file(".", "src/auth.py")

Check if Branch Is Behind Remote

from git import Repo

def check_sync_status(repo_path: str):
 repo = Repo(repo_path)
 origin = repo.remotes.origin
 origin.fetch()

 branch = repo.active_branch
 tracking = branch.tracking_branch()

 if tracking is None:
 print("Branch has no remote tracking")
 return

 ahead = list(repo.iter_commits(f"{tracking}..{branch}"))
 behind = list(repo.iter_commits(f"{branch}..{tracking}"))

 print(f"Branch '{branch.name}':")
 print(f" Ahead by: {len(ahead)} commit(s)")
 print(f" Behind by: {len(behind)} commit(s)")

check_sync_status(".")

Using the Raw Git Interface

For executing Git commands not exposed through GitPython's API, call repo.git with any git command you want to run:

from git import Repo

repo = Repo(".")

# Any git command as a method call
output = repo.git.log("--oneline", "-5")
print(output)

# git stash
repo.git.stash("save", "work in progress")
repo.git.stash("pop")

# git cherry-pick
repo.git.cherry_pick("a3f1c22")

# git rebase
repo.git.rebase("main")

Summarizing

GitPython elevates Git from a shell-scriptable command line tool to a fully fledged Python object. Writing deployment tools, code analysis apps, changelog generators or scripts to massage repository data? GitPython has you covered. No more cutting and parsing subprocess output.

Ideally suited to automation: anything you would normally do by hand, with a series of git commands, can be turned into a neat, testable Python function.