definit_env_file( env_path=".env", default_content='OPENAI_API_KEY="1234567 (replace with your own)"\nBASE_URL="example.chat (replace with your own)"', ): ifnot os.path.exists(env_path): withopen(env_path, "w", encoding="utf-8") as f: f.write(default_content) print("WARNING, remember to use your own api secret key")
defload_api_key(): # init api file init_env_file()
import os import requests import json from utils import load_api_key
defget_completions_online(prompt): from openai import OpenAI
OPENAI_API_KEY, BASE_URL = load_api_key() # get prompts: added_prompts = "For the final answer you have given , please return: <answer>Your final answer</answer> at the end of your response, remember you are only allowed to return a float number! For example, 18 or 18.0 is allowed but $18 is not allowed"
defget_completions_offline(prompt): """get completions from the local host LLMs
Args: prompt (str): Given prompts into the LLM max_length (int, optional): max length ofn single query. Defaults to 100.
Returns: str: final response from the model """ headers = {"Content-Type": "application/json"} max_length = 100
# get prompts: added_prompts = "For the final answer you have given , please return: <answer>Your final answer</answer> at the end of your response, remember you are only allowed to return a float number! For example, 18 or 18.0 is allowed but $18 is not allowed"
from tqdm import tqdm from app import get_completions
deftest_evaluate(dataset): """a small test function for testing LLM's response
Args: dataset (List): the loaded dataset from function load_local_dataset """ get_completions("Hello, introduce yourself!") test_results = [] single_evaluate(test_results, dataset[0]) print(test_results)
defsingle_evaluate(results: list, item, status): """single evaluation for single query
Args: results (list): result list item (dict): a single query from dataset """ prompt: str = item["question"] expected_answer: str = item["answer"]
# get LLM's response llm_response: str = get_completions(prompt, status)
match = re.search(r"<answer>(.*?)</answer>(.*)", llm_response)
# exception handling ifmatchisNone: final_answer = "LLM fails to response" results.append( { "prompt": prompt, "expected_answer": expected_answer, "model_response": llm_response, "final_answer": final_answer, # None means LLM fails to output in the right format "correct": None, } ) else: final_answer = float(match.group(1).replace(",", "")) results.append( { "prompt": prompt, "expected_answer": expected_answer, "model_response": llm_response, "final_answer": final_answer, "correct": final_answer == float(expected_answer.split("####")[-1].strip().replace(",", "")), } )
# run tests and evaluate models defevaluate(datasets, status): results = [] for item in tqdm(datasets, desc="Evaluating"): single_evaluate(results=results, item=item, status=status) return results
defcalculate_metrics(results): total = len(results) correct = sum(1for result in results if result["correct"] isTrue) no_answer = sum(1for result in results if result["correct"] isNone) accuracy = correct / total if total > 0else0 no_answer_rate = no_answer / total if total > 0else0 return { "total": total, "correct": correct, "accuracy": accuracy, "no_answer_rate": no_answer_rate, }
# section 2: activate LLMs # ! let just skip that part # model_dir = download_model() # ! please run the model in the certain port, see https://github.com/datawhalechina/self-llm/blob/master/models/LLaMA3/01-LLaMA3-8B-Instruct%20FastApi%20%E9%83%A8%E7%BD%B2%E8%B0%83%E7%94%A8.md for reference # ! we assume that your model is available in localhost: 6005
from fastapi import FastAPI, Request from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig import uvicorn import json import datetime import torch import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3,4,5,6" # Set device parameters DEVICE = "cuda"# Use CUDA DEVICE_ID = "0"# CUDA device ID, leave empty if not set CUDA_DEVICE = ( f"{DEVICE}:{DEVICE_ID}"if DEVICE_ID else DEVICE ) # Combine CUDA device info
# Function to clear GPU memory deftorch_gc(): if torch.cuda.is_available(): # Check if CUDA is available with torch.cuda.device(CUDA_DEVICE): # Specify CUDA device torch.cuda.empty_cache() # Clear CUDA cache torch.cuda.ipc_collect() # Collect CUDA memory fragments
# Create FastAPI app app = FastAPI()
# Endpoint to handle POST requests @app.post("/") asyncdefcreate_item(request: Request): global model, tokenizer # Declare global variables for model and tokenizer json_post_raw = await request.json() # Get JSON data from POST request json_post = json.dumps(json_post_raw) # Convert JSON data to string json_post_list = json.loads(json_post) # Convert string to Python object prompt = json_post_list.get("prompt") # Get prompt from request max_length = json_post_list.get("max_length") # Get max_length from request
now = datetime.datetime.now() # Get current time time = now.strftime("%Y-%m-%d %H:%M:%S") # Format time as string # Build response JSON answer = {"response": result, "status": 200, "time": time} # Build log information log = ( "[" + time + "] " + '", prompt:"' + prompt + '", response:"' + repr(result) + '"' ) print(log) # Print log torch_gc() # Perform GPU memory cleanup return answer # Return response
# Main entry point if __name__ == "__main__": model_name_or_path = ( "/GPFS/rhome/xiyuanyang/local_LLM/deepseek-ai/deepseek-moe-16b-chat" ) # Load pretrained tokenizer and model tokenizer = AutoTokenizer.from_pretrained( model_name_or_path, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( model_name_or_path, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto", ) model.generation_config = GenerationConfig.from_pretrained(model_name_or_path) model.generation_config.pad_token_id = model.generation_config.eos_token_id model.eval() # Set model to evaluation mode # Start FastAPI app # Using port 6006 allows port mapping from autodl to local, so the API can be used locally uvicorn.run( app, host="0.0.0.0", port=6000, workers=1 ) # Start app on specified host and port
接着你就可以看到:
1 2 3 4 5
Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████| 7/7 [00:19<00:00, 2.73s/it] INFO: Started server process [525302] INFO: Waiting for application startup. INFO: Application startup complete. INFO: Uvicorn running on http://0.0.0.0:6000 (Press CTRL+C to quit)
if __name__ == "__main__": prompt = input() content: str = get_completion(prompt, 10000000) contents = content.strip().split("\n") for con in contents: print(con)
Please input your prompts.😘 I am a student learning computer science and artificial intelligence. I want to learn the basic knowledge of front-back end developing in the summer vacation and become an excellent developer in the future, can you arrange me a plan for 40 dayas to improve my developing skills? I want to improve myself in aspects of how to conduct product research and task-planning, basic knowlwdge of front-back end developing and software engineering, generate a whole report for me. Thank you! Sure, I can help you create a plan to improve your front-end and back-end developing skills and gain knowledge in product research and task planning. Here's a 4-step plan:
1. Learn the basics of front-end and back-end development: * Front-end development: HTML, CSS, JavaScript, and frameworks like React, Angular, or Vue.js. * Back-end development: Python, Java, or Ruby on Rails, and databases like MySQL, MongoDB, or PostgreSQL. * Learn how to use version control systems like Git and GitHub. 2. Conduct product research and task planning: * Learn how to conduct market research and user research to understand your target audience and their needs. * Learn how to create user personas, user stories, and user journeys to understand user behavior and create a user-centered design. * Learn how to create a project roadmap, including milestones, deadlines, and tasks. 3. Learn software engineering principles: * Learn how to write clean, maintainable, and scalable code. * Learn how to use design patterns and best practices for software development. * Learn how to use agile methodologies like Scrum or Kanban for project management. 4. Generate a whole report: * Create a report that summarizes your learning and skills gained during the summer vacation. * Include a section on front-end and back-end development, including the technologies and frameworks you learned. * Include a section on product research and task planning, including the methods and tools you used. * Include a section on software engineering principles, including the design patterns and best practices you learned. * Include a conclusion that summarizes your learning and future goals.
I hope this plan helps you improve your front-end and back-end developing skills and gain knowledge in product research and task planning. Good luck with your summer vacation!