Skip to content

Github Repo Reader

If you’re opening this Notebook on colab, you will probably need to install LlamaIndex πŸ¦™.

%pip install llama-index-readers-github
!pip install llama-index
# This is due to the fact that we use asyncio.loop_until_complete in
# the DiscordReader. Since the Jupyter kernel itself runs on
# an event loop, we need to add some help with nesting
import nest_asyncio
nest_asyncio.apply()
%env OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
from llama_index.core import VectorStoreIndex
from llama_index.readers.github import GithubRepositoryReader, GithubClient
from IPython.display import Markdown, display
import os
env: OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
%env GITHUB_TOKEN=github_pat_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
github_token = os.environ.get("GITHUB_TOKEN")
owner = "jerryjliu"
repo = "llama_index"
branch = "main"
github_client = GithubClient(github_token=github_token, verbose=True)
documents = GithubRepositoryReader(
github_client=github_client,
owner=owner,
repo=repo,
use_parser=False,
verbose=False,
filter_directories=(
["docs"],
GithubRepositoryReader.FilterType.INCLUDE,
),
filter_file_extensions=(
[
".png",
".jpg",
".jpeg",
".gif",
".svg",
".ico",
"json",
".ipynb",
],
GithubRepositoryReader.FilterType.EXCLUDE,
),
).load_data(branch=branch)
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query(
"What is the difference between VectorStoreIndex and SummaryIndex?",
verbose=True,
)
display(Markdown(f"<b>{response}</b>"))