Workflows

`combine_answers(answers, research_question, use_gpt4=False, temperature=0.1)`

Combines a list of answers into a concise literature review using OpenAI API.

Parameters:

Name	Type	Description	Default
`answers`	`list`	A list of answers to combine.	required
`research_question`	`str`	The research question to use in the literature review.	required
`use_gpt4`	`bool`	Whether to use GPT-4 for the literature review. Defaults to False.	`False`
`temperature`	`float`	The temperature to use for the OpenAI API. Defaults to 0.1.	`0.1`

Returns:

Name	Type	Description
`str`		The literature review.

Examples:

>>> answers = ["Answer 1", "Answer 2"]
>>> research_question = "What is the impact of AI on society?"
>>> combine_answers(answers, research_question)
"The impact of AI on society is significant. Answer 1...Answer 2..."

Source code in autoresearcher/workflows/literature_review/combine_answers.py

def combine_answers(answers, research_question, use_gpt4=False, temperature=0.1):
    """
    Combines a list of answers into a concise literature review using OpenAI API.
    Args:
      answers (list): A list of answers to combine.
      research_question (str): The research question to use in the literature review.
      use_gpt4 (bool, optional): Whether to use GPT-4 for the literature review. Defaults to False.
      temperature (float, optional): The temperature to use for the OpenAI API. Defaults to 0.1.
    Returns:
      str: The literature review.
    Examples:
      >>> answers = ["Answer 1", "Answer 2"]
      >>> research_question = "What is the impact of AI on society?"
      >>> combine_answers(answers, research_question)
      "The impact of AI on society is significant. Answer 1...Answer 2..."
    """
    answer_list = "\n\n".join(answers)
    prompt = literature_review_prompt.format(
        research_question=research_question, answer_list=answer_list
    )

    # Calculate the tokens in the input
    input_tokens = count_tokens(prompt)

    # Calculate the remaining tokens for the response
    remaining_tokens = 4080 - input_tokens
    max_tokens = max(remaining_tokens, 0)
    literature_review = openai_call(
        prompt, use_gpt4=use_gpt4, temperature=temperature, max_tokens=max_tokens
    )

    return literature_review

`literature_review(research_question, output_file=None)`

Generates an academic literature review for a given research question.

Parameters:

Name	Type	Description	Default
`research_question`	`str`	The research question to generate a literature review for.	required
`output_file`	`str`	The file path to save the literature review to.	`None`

Returns:

Name	Type	Description
`str`		The generated literature review.

Examples:

>>> literature_review('What is the impact of AI on healthcare?')
Research question: What is the impact of AI on healthcare?
Auto Researcher initiated!
Generating keyword combinations...
Keyword combinations generated!
Fetching top 20 papers...
Top 20 papers fetched!
Extracting research findings from papers...
Research findings extracted!
Synthesizing answers...
Literature review generated!
Academic Literature Review: ...
References:
1. ...
Keyword combinations used to search for papers: 1. AI healthcare, 2. impact AI healthcare

Source code in autoresearcher/workflows/literature_review/literature_review.py

def literature_review(research_question, output_file=None):
    """
    Generates an academic literature review for a given research question.
    Args:
      research_question (str): The research question to generate a literature review for.
      output_file (str, optional): The file path to save the literature review to.
    Returns:
      str: The generated literature review.
    Examples:
      >>> literature_review('What is the impact of AI on healthcare?')
      Research question: What is the impact of AI on healthcare?
      Auto Researcher initiated!
      Generating keyword combinations...
      Keyword combinations generated!
      Fetching top 20 papers...
      Top 20 papers fetched!
      Extracting research findings from papers...
      Research findings extracted!
      Synthesizing answers...
      Literature review generated!
      Academic Literature Review: ...
      References:
      1. ...
      Keyword combinations used to search for papers: 1. AI healthcare, 2. impact AI healthcare
    """
    SemanticScholar = SemanticScholarLoader()

    print(
        colored(
            f"Research question: {research_question}", "yellow", attrs=["bold", "blink"]
        )
    )
    print(colored("Auto Researcher initiated!", "yellow"))

    # Generate keyword combinations
    print(colored("Generating keyword combinations...", "yellow"))
    keyword_combinations = generate_keyword_combinations(research_question)
    print(colored("Keyword combinations generated!", "green"))

    # Fetch the top 20 papers for the research question
    search_query = research_question
    print(colored("Fetching top 20 papers...", "yellow"))
    top_papers = SemanticScholar.fetch_and_sort_papers(
        search_query, keyword_combinations=keyword_combinations, year_range="2000-2023"
    )
    print(colored("Top 20 papers fetched!", "green"))

    # Extract answers and from the top 20 papers
    print(colored("Extracting research findings from papers...", "yellow"))
    answers = extract_answers_from_papers(top_papers, research_question)
    print(colored("Research findings extracted!", "green"))

    # Combine answers into a concise academic literature review
    print(colored("Synthesizing answers...", "yellow"))
    literature_review = combine_answers(answers, research_question)
    print(colored("Literature review generated!", "green"))

    # Extract citations from answers and append a references list to the literature review
    citations = extract_citations(answers)
    references_list = "\n".join(
        [f"{idx + 1}. {citation}" for idx, citation in enumerate(citations)]
    )
    literature_review += "\n\nReferences:\n" + references_list

    # Append the keyword combinations to the literature review
    literature_review += "\n\nKeyword combinations used to search for papers: "
    literature_review += ", ".join(
        [f"{i+1}. {combination}" for i, combination in enumerate(keyword_combinations)]
    )

    # Print the academic literature review
    print(colored("Academic Literature Review:", "cyan"), literature_review, "\n")

    # Save the literature review to a file if the output_file argument is provided
    if output_file:
        with open(output_file, "w") as f:
            f.write(literature_review)
        print(colored(f"Literature review saved to {output_file}", "green"))

    return literature_review

`extract_answers_from_papers(papers, research_question, use_gpt4=False, temperature=0, max_tokens=150)`

Extracts answers from paper abstracts.

Parameters:

Name	Type	Description	Default
`papers`	`list`	A list of papers.	required
`research_question`	`str`	The research question to answer.	required
`use_gpt4`	`bool`	Whether to use GPT-4 for answer extraction. Defaults to False.	`False`
`temperature`	`float`	The temperature for GPT-4 answer extraction. Defaults to 0.	`0`
`max_tokens`	`int`	The maximum number of tokens for GPT-4 answer extraction. Defaults to 150.	`150`

Returns:

Name	Type	Description
`list`		A list of answers extracted from the paper abstracts.

Examples:

>>> extract_answers_from_papers(papers, research_question)
['Answer 1 SOURCE: Citation 1', 'Answer 2 SOURCE: Citation 2']

Source code in autoresearcher/workflows/literature_review/extract_answers_from_papers.py

def extract_answers_from_papers(
    papers, research_question, use_gpt4=False, temperature=0, max_tokens=150
):
    """
    Extracts answers from paper abstracts.
    Args:
      papers (list): A list of papers.
      research_question (str): The research question to answer.
      use_gpt4 (bool, optional): Whether to use GPT-4 for answer extraction. Defaults to False.
      temperature (float, optional): The temperature for GPT-4 answer extraction. Defaults to 0.
      max_tokens (int, optional): The maximum number of tokens for GPT-4 answer extraction. Defaults to 150.
    Returns:
      list: A list of answers extracted from the paper abstracts.
    Examples:
      >>> extract_answers_from_papers(papers, research_question)
      ['Answer 1 SOURCE: Citation 1', 'Answer 2 SOURCE: Citation 2']
    """
    answers = []
    default_answer = "No answer found."

    for paper in papers:
        abstract = paper.get("abstract", "")
        title = colored(paper.get("title", ""), "magenta", attrs=["bold"])
        if "externalIds" in paper and "DOI" in paper["externalIds"]:
            citation = get_citation_by_doi(paper["externalIds"]["DOI"])
        else:
            citation = paper["url"]
        prompt = extract_answer_prompt.format(
            research_question=research_question, abstract=abstract
        )
        answer = openai_call(
            prompt, use_gpt4=use_gpt4, temperature=temperature, max_tokens=max_tokens
        )

        print(f"Processing paper: {title}")

        answer_with_citation = f"{answer}\n{citation}"
        if answer != default_answer:
            answer_with_citation = f"{answer} SOURCE: {citation}"
            answers.append(answer_with_citation)
            print(colored(f"Answer found!", "green"))
            print(colored(f"{answer_with_citation}", "cyan"))

    return answers

`extract_citations(answers)`

Extracts bibliographical citations from a list of answers.

Parameters:

Name	Type	Description	Default
`answers`	`list`	A list of strings containing answers.	required

Returns:

Name	Type	Description
`list`		A list of strings containing bibliographical citations.

Examples:

>>> answers = ["This is an answer. SOURCE: Smith, J. (2020).",
...            "This is another answer. SOURCE: Jones, A. (2021)."]
>>> extract_citations(answers)
["Smith, J. (2020)", "Jones, A. (2021)"]

Source code in autoresearcher/workflows/literature_review/extract_citations.py

def extract_citations(answers):
    """
    Extracts bibliographical citations from a list of answers.
    Args:
      answers (list): A list of strings containing answers.
    Returns:
      list: A list of strings containing bibliographical citations.
    Examples:
      >>> answers = ["This is an answer. SOURCE: Smith, J. (2020).",
      ...            "This is another answer. SOURCE: Jones, A. (2021)."]
      >>> extract_citations(answers)
      ["Smith, J. (2020)", "Jones, A. (2021)"]
    """
    citations = []
    for answer in answers:
        citation_start = answer.rfind("SOURCE: ")
        if citation_start != -1:
            citation = answer[citation_start + len("SOURCE: ") :]
            citations.append(citation)
    return citations