Unlocking the power of unstructured data with RAG - The GitHub Blog<\/title>\n<meta name=\"description\" content=\"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Unlocking the power of unstructured data with RAG\" \/>\n<meta property=\"og:description\" content=\"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.\" \/>\n<meta property=\"og:url\" content=\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\" \/>\n<meta property=\"og:site_name\" content=\"The GitHub Blog\" \/>\n<meta property=\"article:published_time\" content=\"2024-06-13T16:00:28+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-07-23T13:14:10+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\" \/>\n\t<meta property=\"og:image:width\" content=\"1200\" \/>\n\t<meta property=\"og:image:height\" content=\"630\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"author\" content=\"Nicole Choi\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:image\" content=\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"Nicole Choi\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"10 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#article\",\"isPartOf\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\"},\"author\":{\"name\":\"Nicole Choi\",\"@id\":\"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20\"},\"headline\":\"Unlocking the power of unstructured data with RAG\",\"datePublished\":\"2024-06-13T16:00:28+00:00\",\"dateModified\":\"2024-07-23T13:14:10+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\"},\"wordCount\":2070,\"image\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\",\"keywords\":[\"AI Insights\",\"GitHub Copilot\"],\"articleSection\":[\"AI & ML\",\"LLMs\"],\"inLanguage\":\"en-US\"},{\"@type\":\"WebPage\",\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\",\"url\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\",\"name\":\"Unlocking the power of unstructured data with RAG - The GitHub Blog\",\"isPartOf\":{\"@id\":\"https:\/\/github.blog\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage\"},\"image\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\",\"datePublished\":\"2024-06-13T16:00:28+00:00\",\"dateModified\":\"2024-07-23T13:14:10+00:00\",\"author\":{\"@id\":\"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20\"},\"description\":\"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.\",\"breadcrumb\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage\",\"url\":\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\",\"contentUrl\":\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\",\"width\":1200,\"height\":630},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/github.blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"AI & ML\",\"item\":\"https:\/\/github.blog\/ai-and-ml\/\"},{\"@type\":\"ListItem\",\"position\":3,\"name\":\"LLMs\",\"item\":\"https:\/\/github.blog\/ai-and-ml\/llms\/\"},{\"@type\":\"ListItem\",\"position\":4,\"name\":\"Unlocking the power of unstructured data with RAG\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/github.blog\/#website\",\"url\":\"https:\/\/github.blog\/\",\"name\":\"The GitHub Blog\",\"description\":\"Updates, ideas, and inspiration from GitHub to help developers build and design software.\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/github.blog\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20\",\"name\":\"Nicole Choi\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/github.blog\/#\/schema\/person\/image\/cabf0f5bcb7699cf6311cda62f32cd74\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/119fcc9cb84bbe63c1822706312e4272564f304917839735f4f45d53ac06e2f1?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/119fcc9cb84bbe63c1822706312e4272564f304917839735f4f45d53ac06e2f1?s=96&d=mm&r=g\",\"caption\":\"Nicole Choi\"},\"url\":\"https:\/\/github.blog\/author\/nicchoi29\/\"}]}<\/script>\n","yoast_head_json":{"title":"Unlocking the power of unstructured data with RAG - The GitHub Blog","description":"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/","og_locale":"en_US","og_type":"article","og_title":"Unlocking the power of unstructured data with RAG","og_description":"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.","og_url":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/","og_site_name":"The GitHub Blog","article_published_time":"2024-06-13T16:00:28+00:00","article_modified_time":"2024-07-23T13:14:10+00:00","og_image":[{"width":1200,"height":630,"url":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","type":"image\/png"}],"author":"Nicole Choi","twitter_card":"summary_large_image","twitter_image":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","twitter_misc":{"Written by":"Nicole Choi","Est. reading time":"10 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#article","isPartOf":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/"},"author":{"name":"Nicole Choi","@id":"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20"},"headline":"Unlocking the power of unstructured data with RAG","datePublished":"2024-06-13T16:00:28+00:00","dateModified":"2024-07-23T13:14:10+00:00","mainEntityOfPage":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/"},"wordCount":2070,"image":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage"},"thumbnailUrl":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","keywords":["AI Insights","GitHub Copilot"],"articleSection":["AI & ML","LLMs"],"inLanguage":"en-US"},{"@type":"WebPage","@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/","url":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/","name":"Unlocking the power of unstructured data with RAG - The GitHub Blog","isPartOf":{"@id":"https:\/\/github.blog\/#website"},"primaryImageOfPage":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage"},"image":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage"},"thumbnailUrl":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","datePublished":"2024-06-13T16:00:28+00:00","dateModified":"2024-07-23T13:14:10+00:00","author":{"@id":"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20"},"description":"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.","breadcrumb":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage","url":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","contentUrl":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","width":1200,"height":630},{"@type":"BreadcrumbList","@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/github.blog\/"},{"@type":"ListItem","position":2,"name":"AI & ML","item":"https:\/\/github.blog\/ai-and-ml\/"},{"@type":"ListItem","position":3,"name":"LLMs","item":"https:\/\/github.blog\/ai-and-ml\/llms\/"},{"@type":"ListItem","position":4,"name":"Unlocking the power of unstructured data with RAG"}]},{"@type":"WebSite","@id":"https:\/\/github.blog\/#website","url":"https:\/\/github.blog\/","name":"The GitHub Blog","description":"Updates, ideas, and inspiration from GitHub to help developers build and design software.","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/github.blog\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Person","@id":"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20","name":"Nicole Choi","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/github.blog\/#\/schema\/person\/image\/cabf0f5bcb7699cf6311cda62f32cd74","url":"https:\/\/secure.gravatar.com\/avatar\/119fcc9cb84bbe63c1822706312e4272564f304917839735f4f45d53ac06e2f1?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/119fcc9cb84bbe63c1822706312e4272564f304917839735f4f45d53ac06e2f1?s=96&d=mm&r=g","caption":"Nicole Choi"},"url":"https:\/\/github.blog\/author\/nicchoi29\/"}]}},"jetpack_publicize_connections":[],"jetpack_featured_media_url":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","jetpack_shortlink":"https:\/\/wp.me\/pamS32-koe","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/posts\/78382","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/users\/2123"}],"replies":[{"embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/comments?post=78382"}],"version-history":[{"count":12,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/posts\/78382\/revisions"}],"predecessor-version":[{"id":78706,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/posts\/78382\/revisions\/78706"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/media\/78383"}],"wp:attachment":[{"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/media?parent=78382"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/categories?post=78382"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/tags?post=78382"},{"taxonomy":"author","embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/coauthors?post=78382"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}

The same features that make unstructured data valuable also make it hard to analyze.<\/p>\n

Unstructured data lacks inherent organization, as it often consists of free-form text, images, or multimedia content.<\/p>\n

“Without clear boundaries or predefined formats, extracting meaningful information from unstructured data becomes very challenging,” Guo says.<\/p>\n

But LLMs can help to identify complex patterns in unstructured data<\/strong>—especially text. Though not all unstructured data is text, a lot of text is unstructured. And LLMs can help you to analyze it.<\/p>\n

“When dealing with ambiguous, semi-structured or unstructured data, LLMs dramatically excel at identifying patterns, sentiments, entities, and topics within text data and uncover valuable insights that might otherwise remain hidden,” Guo explains.<\/p>\n

\n\n\n
Need a refresher on LLMs? Check out our AI explainers, guides, and best practices ><\/a><\/td>\n<\/tr>\n<\/tbody>\n<\/table><\/content-table-wrap><\/div>\n
Here are a few reasons why developers and IT leaders might consider using RAG-powered LLMs<\/strong> to leverage unstructured data:<\/p>\n
\n
Surface organizational best practices and establish consistency<\/strong>. Through RAG, an LLM can receive a prompt with additional context pulled from an organization’s repositories and documents. So, instead of sifting through and piece-mealing documents, developers can quickly receive answers from an LLM that align with their organization’s knowledge and best practices.\n<\/li>\n
\n
Accelerate and deepen understanding of an existing codebase<\/strong>—including its conventions, functions, common issues, and bugs. Understanding and familiarizing yourself with code written by another developer is a persisting challenge for several reasons, including but not limited to: code complexity, use of different coding styles, a lack of documentation, use of legacy code or deprecated libraries and APIs, and the buildup of technical debt from quick fixes and workarounds.<\/p>\n<\/li>\n<\/ul>\n
RAG can help to mediate these pain points by enabling developers to ask and receive answers in natural language about a specific codebase. It can also guide developers to relevant documentation or existing solutions.<\/p>\n
Accelerated and<\/em> deepened understanding of a codebase enables junior developers to contribute their first pull request with less onboarding time and senior developers to mitigate live site incidents, even when they’re unfamiliar with the service that’s failing. It also means that legacy code suffering from “code rot” and natural aging can be more quickly modernized and easily maintained.<\/p>\n
Unstructured data doesn’t just help to improve development processes. It can also improve product decisions by surfacing user pain points.<\/strong><\/p>\n
Moriarty says, “Structured data might show a user’s decision to upgrade or renew a subscription, or how frequently they use a product or not. While those decisions represent the user’s attitude and feelings toward the product, it’s not a complete representation. Unstructured data allows for more nuanced and qualitative feedback, making for a more complete picture.”<\/p>\n
A lot of information and feedback is shared during informal discussions, whether those discussions happen on a call, over email, on social platforms, or in an instant message. From these discussions, decision makers and builders can find helpful feedback to improve a service or product, and understand general public and user sentiment.<\/p>\n
What about structured data?<\/span><\/a><\/h3>\n
Contrary to unstructured data, structured data—like relational databases, Protobuf files, and configuration files—follows a specific and predefined format.<\/p>\n
We’re not saying unstructured data is more valuable than structured. But the processes for analyzing structured data are more straightforward: you can use SQL functions to modify the data and traditional statistical methods to understand the relationship between different variables.<\/p>\n
That’s not to say AI isn’t used for structured data analysis. “There’s a reason that machine learning, given its predictive power, is and continues to be widespread across industries that use data,” according to Moriarty.<\/p>\n
However, “Structured data is often numeric, and numbers are simply easier to analyze for patterns than words are,” Moriarty says. Not to mention that methods for analyzing structured data have been around longer** **than those for analyzing unstructured data: “A longer history with more focus just means there are more established approaches, and more people are familiar with it,” she explains.<\/p>\n
That’s why the demand to enhance structured data might seem less urgent, according to Guo. “The potential for transformative impact is significantly greater when applied to unstructured data,” she says.<\/p>\n
How does RAG extract value from unstructured data?<\/span><\/a><\/h2>\n
With RAG, an LLM can use data sources beyond its training data to generate an output.<\/p>\n
RAG is a prompting method that uses retrieval—a process for searching for and accessing information—to add more context to a prompt that generates an LLM response.<\/p>\n
This method is designed to improve the quality and relevance of an LLM’s outputs. Additional data sources include a vector database, traditional database, or search engine. So, developers who use an enterprise AI tool equipped with RAG can receive AI outputs customized to their organization’s best practices and knowledge, and proprietary data.<\/p>\n
We break down these data sources in our RAG explainer,<\/a> but here’s a quick summary:<\/p>\n
\n
Vector databases<\/strong>. While you code in your IDE, algorithms create embeddings for your code snippets, which are stored in a vector database. An AI coding tool can search that database to find snippets from across your codebase that are similar to the code you’re currently writing and generate a suggestion.<\/li>\n<\/ul>\n
And when you’re engaging with GitHub Copilot Chat on GitHub.com or in the IDE, your query or code is transformed into an embedding. Our retrieval service then fetches relevant embeddings from the vector database for the repository you’ve indexed. These embeddings are turned back into text and code when they’re added to the prompt as additional context for the LLM. This entire process leverages unstructured data, even though the retrieval system uses embeddings internally.<\/p>\n
\n
General text search<\/strong>. When developers engage with GitHub Copilot Chat under a GitHub Copilot Enterprise plan, they can index repositories—specifically code and documentation. So, when a developer on GitHub.com or in the IDE asks GitHub Copilot Chat a question about an indexed repository, the AI coding tool can retrieve data from all of those indexed, unstructured data sources. And on GitHub.com, GitHub Copilot Chat can tap into a collection of unstructured data in Markdown files from across repositories, which we call knowledge bases<\/a>.<\/li>\n<\/ul>\n
Learn about GitHub Copilot Enterprise features ><\/strong><\/em><\/a><\/p>\n
But wait, why is Markdown considered unstructured data? Though you can use Markdown to format a file, the file itself can contain essentially any kind of data. Think about it this way: how would you put the contents of a Markdown file in a table?<\/p>\n
\n
External or internal search engine<\/strong>. The retrieval method searches and pulls information from a wide range of sources from the public web or your internal platforms and websites. That information is used for RAG, which means the AI model now has data from additional files—like text, image, video, and audio—to answer your questions.<\/li>\n<\/ul>\n
Retrieval also taps into internal search engines. So, if a developer wants to ask a question about a specific repository, they can index the repository and then send their question to GitHub Copilot Chat on GitHub.com. Retrieval uses our internal search engine<\/a> to find relevant code or text from the indexed files, which are then used by RAG to prompt the LLM for a contextually relevant response.<\/p>\n
Stay smart<\/strong>: LLMs can do things they weren’t trained to do<\/a>, so it’s important to always evaluate and verify their outputs.<\/p>\n
RAG and GitHub Copilot Enterprise<\/p>
Powered by RAG, GitHub Copilot Enterprise can help developers and leaders at all levels receive natural language answers to questions about specific repositories. GitHub Copilot can also use content in commits, issues, and discussions to provide contextually relevant responses.<\/p>\n
In fact, by asking GitHub Copilot questions, developers actually provide GitHub Copilot with more details about the context in which information is being used, which then helps the AI coding tool provide more accurate responses tailored to an organization’s unique codebase.<\/p>\n
Learn more about the use cases and benefits of GitHub Copilot Enterprise.<\/a><\/p>\n<\/aside>\n<\/p>
Use RAG to unlock insights from unstructured data<\/span><\/a><\/h2>\n
As developers improve their productivity and write more code with AI tools like GitHub Copilot<\/a>, there’ll be even more unstructured data. Not just in the code itself, but also the information used to build, contextualize, maintain, and improve that code.<\/p>\n
That means even more data containing rich insights that organizations can surface and leverage, or let sink and disappear.<\/p>\n
Developers and IT leaders can use RAG as a tool to help improve their productivity, produce high-quality and consistent code at greater speed, preserve and share information, and increase their understanding of existing codebases, which can impact reduced onboarding time.<\/p>\n
With a RAG-powered AI tool, developers and IT leaders can quickly discover, analyze, and evaluate a wealth of unstructured data—simply by asking a question.<\/p>\n
A RAG reading list 📚<\/span><\/a><\/h2>\n
\n
What is retrieval-augmented generation, and what does it do for generative AI?<\/a><\/li>\n
Customizing and fine-tuning LLMs: What you need to know<\/a><\/li>\n
How we’re experimenting with LLMs to evolve GitHub Copilot <\/a><\/li>\n
How GitHub Copilot is getting better at understanding your code<\/a><\/li>\n<\/ul>\n<\/body><\/html>\n","protected":false},"excerpt":{"rendered":"
Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.<\/p>\n","protected":false},"author":2123,"featured_media":78383,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_gh_post_show_toc":"yes","_gh_post_is_no_robots":"no","_gh_post_is_featured":"no","_gh_post_is_excluded":"no","_gh_post_is_unlisted":"no","_gh_post_related_link_1":"","_gh_post_related_link_2":"","_gh_post_related_link_3":"","_gh_post_sq_img":"","_gh_post_sq_img_id":"","_gh_post_cta_title":"","_gh_post_cta_text":"","_gh_post_cta_link":"","_gh_post_cta_button":"Click Here to Learn More","_gh_post_recirc_hide":"no","_gh_post_recirc_col_1":"gh-auto-select","_gh_post_recirc_col_2":"77524","_gh_post_recirc_col_3":"65303","_gh_post_recirc_col_4":"65316","_featured_video":"","_gh_post_additional_query_params":"","_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":true,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2},"_wpas_customize_per_network":false},"categories":[3293,3296],"tags":[3241,2535],"coauthors":[3118],"class_list":["post-78382","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-ai-and-ml","category-llms","tag-ai-insights","tag-github-copilot"],"yoast_head":"\nUnlocking the power of unstructured data with RAG - The GitHub Blog<\/title>\n<meta name=\"description\" content=\"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Unlocking the power of unstructured data with RAG\" \/>\n<meta property=\"og:description\" content=\"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.\" \/>\n<meta property=\"og:url\" content=\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\" \/>\n<meta property=\"og:site_name\" content=\"The GitHub Blog\" \/>\n<meta property=\"article:published_time\" content=\"2024-06-13T16:00:28+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-07-23T13:14:10+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\" \/>\n\t<meta property=\"og:image:width\" content=\"1200\" \/>\n\t<meta property=\"og:image:height\" content=\"630\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"author\" content=\"Nicole Choi\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:image\" content=\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"Nicole Choi\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"10 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#article\",\"isPartOf\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\"},\"author\":{\"name\":\"Nicole Choi\",\"@id\":\"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20\"},\"headline\":\"Unlocking the power of unstructured data with RAG\",\"datePublished\":\"2024-06-13T16:00:28+00:00\",\"dateModified\":\"2024-07-23T13:14:10+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\"},\"wordCount\":2070,\"image\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\",\"keywords\":[\"AI Insights\",\"GitHub Copilot\"],\"articleSection\":[\"AI & ML\",\"LLMs\"],\"inLanguage\":\"en-US\"},{\"@type\":\"WebPage\",\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\",\"url\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\",\"name\":\"Unlocking the power of unstructured data with RAG - The GitHub Blog\",\"isPartOf\":{\"@id\":\"https:\/\/github.blog\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage\"},\"image\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\",\"datePublished\":\"2024-06-13T16:00:28+00:00\",\"dateModified\":\"2024-07-23T13:14:10+00:00\",\"author\":{\"@id\":\"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20\"},\"description\":\"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.\",\"breadcrumb\":{\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage\",\"url\":\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\",\"contentUrl\":\"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630\",\"width\":1200,\"height\":630},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/github.blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"AI & ML\",\"item\":\"https:\/\/github.blog\/ai-and-ml\/\"},{\"@type\":\"ListItem\",\"position\":3,\"name\":\"LLMs\",\"item\":\"https:\/\/github.blog\/ai-and-ml\/llms\/\"},{\"@type\":\"ListItem\",\"position\":4,\"name\":\"Unlocking the power of unstructured data with RAG\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/github.blog\/#website\",\"url\":\"https:\/\/github.blog\/\",\"name\":\"The GitHub Blog\",\"description\":\"Updates, ideas, and inspiration from GitHub to help developers build and design software.\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/github.blog\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20\",\"name\":\"Nicole Choi\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/github.blog\/#\/schema\/person\/image\/cabf0f5bcb7699cf6311cda62f32cd74\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/119fcc9cb84bbe63c1822706312e4272564f304917839735f4f45d53ac06e2f1?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/119fcc9cb84bbe63c1822706312e4272564f304917839735f4f45d53ac06e2f1?s=96&d=mm&r=g\",\"caption\":\"Nicole Choi\"},\"url\":\"https:\/\/github.blog\/author\/nicchoi29\/\"}]}<\/script>\n","yoast_head_json":{"title":"Unlocking the power of unstructured data with RAG - The GitHub Blog","description":"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/","og_locale":"en_US","og_type":"article","og_title":"Unlocking the power of unstructured data with RAG","og_description":"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.","og_url":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/","og_site_name":"The GitHub Blog","article_published_time":"2024-06-13T16:00:28+00:00","article_modified_time":"2024-07-23T13:14:10+00:00","og_image":[{"width":1200,"height":630,"url":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","type":"image\/png"}],"author":"Nicole Choi","twitter_card":"summary_large_image","twitter_image":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","twitter_misc":{"Written by":"Nicole Choi","Est. reading time":"10 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#article","isPartOf":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/"},"author":{"name":"Nicole Choi","@id":"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20"},"headline":"Unlocking the power of unstructured data with RAG","datePublished":"2024-06-13T16:00:28+00:00","dateModified":"2024-07-23T13:14:10+00:00","mainEntityOfPage":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/"},"wordCount":2070,"image":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage"},"thumbnailUrl":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","keywords":["AI Insights","GitHub Copilot"],"articleSection":["AI & ML","LLMs"],"inLanguage":"en-US"},{"@type":"WebPage","@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/","url":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/","name":"Unlocking the power of unstructured data with RAG - The GitHub Blog","isPartOf":{"@id":"https:\/\/github.blog\/#website"},"primaryImageOfPage":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage"},"image":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage"},"thumbnailUrl":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","datePublished":"2024-06-13T16:00:28+00:00","dateModified":"2024-07-23T13:14:10+00:00","author":{"@id":"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20"},"description":"Unstructured data holds valuable information about codebases, organizational best practices, and customer feedback. Here are some ways you can leverage it with RAG, or retrieval-augmented generation.","breadcrumb":{"@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#primaryimage","url":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","contentUrl":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","width":1200,"height":630},{"@type":"BreadcrumbList","@id":"https:\/\/github.blog\/ai-and-ml\/llms\/unlocking-the-power-of-unstructured-data-with-rag\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/github.blog\/"},{"@type":"ListItem","position":2,"name":"AI & ML","item":"https:\/\/github.blog\/ai-and-ml\/"},{"@type":"ListItem","position":3,"name":"LLMs","item":"https:\/\/github.blog\/ai-and-ml\/llms\/"},{"@type":"ListItem","position":4,"name":"Unlocking the power of unstructured data with RAG"}]},{"@type":"WebSite","@id":"https:\/\/github.blog\/#website","url":"https:\/\/github.blog\/","name":"The GitHub Blog","description":"Updates, ideas, and inspiration from GitHub to help developers build and design software.","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/github.blog\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Person","@id":"https:\/\/github.blog\/#\/schema\/person\/8a8cb984893a6f3fa8f80dcbe2afff20","name":"Nicole Choi","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/github.blog\/#\/schema\/person\/image\/cabf0f5bcb7699cf6311cda62f32cd74","url":"https:\/\/secure.gravatar.com\/avatar\/119fcc9cb84bbe63c1822706312e4272564f304917839735f4f45d53ac06e2f1?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/119fcc9cb84bbe63c1822706312e4272564f304917839735f4f45d53ac06e2f1?s=96&d=mm&r=g","caption":"Nicole Choi"},"url":"https:\/\/github.blog\/author\/nicchoi29\/"}]}},"jetpack_publicize_connections":[],"jetpack_featured_media_url":"https:\/\/github.blog\/wp-content\/uploads\/2024\/06\/AI-DarkMode-4.png?fit=1200%2C630","jetpack_shortlink":"https:\/\/wp.me\/pamS32-koe","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/posts\/78382","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/users\/2123"}],"replies":[{"embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/comments?post=78382"}],"version-history":[{"count":12,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/posts\/78382\/revisions"}],"predecessor-version":[{"id":78706,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/posts\/78382\/revisions\/78706"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/media\/78383"}],"wp:attachment":[{"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/media?parent=78382"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/categories?post=78382"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/tags?post=78382"},{"taxonomy":"author","embeddable":true,"href":"https:\/\/github.blog\/wp-json\/wp\/v2\/coauthors?post=78382"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}