Github Repo Reader
If youβre opening this Notebook on colab, you will probably need to install LlamaIndex π¦.
%pip install llama-index-readers-github
!pip install llama-index
# This is due to the fact that we use asyncio.loop_until_complete in# the DiscordReader. Since the Jupyter kernel itself runs on# an event loop, we need to add some help with nestingimport nest_asyncio
nest_asyncio.apply()
%env OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxfrom llama_index.core import VectorStoreIndexfrom llama_index.readers.github import GithubRepositoryReader, GithubClientfrom IPython.display import Markdown, displayimport os
env: OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
%env GITHUB_TOKEN=github_pat_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxgithub_token = os.environ.get("GITHUB_TOKEN")owner = "jerryjliu"repo = "llama_index"branch = "main"
github_client = GithubClient(github_token=github_token, verbose=True)
documents = GithubRepositoryReader( github_client=github_client, owner=owner, repo=repo, use_parser=False, verbose=False, filter_directories=( ["docs"], GithubRepositoryReader.FilterType.INCLUDE, ), filter_file_extensions=( [ ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", "json", ".ipynb", ], GithubRepositoryReader.FilterType.EXCLUDE, ),).load_data(branch=branch)
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()response = query_engine.query( "What is the difference between VectorStoreIndex and SummaryIndex?", verbose=True,)
display(Markdown(f"<b>{response}</b>"))